diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..cd5e365e7 --- /dev/null +++ b/.clang-format @@ -0,0 +1,74 @@ +--- +Language: Cpp + +# Indentation +UseTab: Never +IndentWidth: 4 +BreakBeforeBraces: Attach +IndentCaseLabels: false +NamespaceIndentation: None +ContinuationIndentWidth: 4 +IndentPPDirectives: None +IndentWrappedFunctionNames: false +AccessModifierOffset: -2 + +# Alignment +AlignAfterOpenBracket: BlockIndent +AlignConsecutiveAssignments: false +AlignConsecutiveDeclarations: false +AlignConsecutiveMacros: false +AlignEscapedNewlines: Left +AlignOperands: false +AlignTrailingComments: true +DerivePointerAlignment: false +PointerAlignment: Right + +# Function calls formatting +BinPackArguments: false +BinPackParameters: false +AllowAllArgumentsOnNextLine: false +ExperimentalAutoDetectBinPacking: false +PenaltyBreakBeforeFirstCallParameter: 1 +AlwaysBreakAfterDefinitionReturnType: None + +# Wrapping and Breaking +ColumnLimit: 0 +AllowShortBlocksOnASingleLine: Never +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: Always +AllowShortLoopsOnASingleLine: false +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: false +AlwaysBreakTemplateDeclarations: No +BreakBeforeBinaryOperators: None +BreakBeforeTernaryOperators: false +BreakConstructorInitializers: BeforeColon +BreakInheritanceList: BeforeColon +BreakStringLiterals: false +CompactNamespaces: false +ConstructorInitializerAllOnOneLineOrOnePerLine: false +Cpp11BracedListStyle: false +ReflowComments: false +SortIncludes: Never + +# Spaces +SpaceAfterCStyleCast: true +SpaceAfterLogicalNot: false +SpaceAfterTemplateKeyword: true +SpaceBeforeAssignmentOperators: true +SpaceBeforeCpp11BracedList: true +SpaceBeforeCtorInitializerColon: true +SpaceBeforeInheritanceColon: true +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false +SpaceInEmptyBlock: false +SpaceInEmptyParentheses: false +SpacesBeforeTrailingComments: 1 +SpacesInAngles: false +SpacesInCStyleCastParentheses: false +SpacesInConditionalStatement: false +SpacesInContainerLiterals: true +SpacesInParentheses: false +SpacesInSquareBrackets: false diff --git a/.github/workflows/c-check.yml b/.github/workflows/c-check.yml new file mode 100644 index 000000000..b1bdc66cc --- /dev/null +++ b/.github/workflows/c-check.yml @@ -0,0 +1,54 @@ +name: C Code Generation and Formatting Check + +on: + push: + pull_request: {} + merge_group: {} + +jobs: + format-check: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: ruby/setup-ruby@v1 + with: + ruby-version: "3.4" + bundler-cache: none + - name: Set working directory as safe + run: git config --global --add safe.directory $(pwd) + - name: Install dependencies + run: | + sudo apt-get update + sudo apt-get install -y libdb-dev curl autoconf automake m4 libtool + - name: Install clang-format from LLVM + run: | + wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add - + sudo apt-add-repository "deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-20 main" + sudo apt-get update + sudo apt-get install -y clang-format-20 + sudo ln -sf /usr/bin/clang-format-20 /usr/local/bin/clang-format + clang-format --version + - name: Count processors + run: nproc + - name: Install Re2c + run: | + cd /tmp + curl -L https://github.com/skvadrik/re2c/archive/refs/tags/4.3.tar.gz > re2c-4.3.tar.gz + tar xf re2c-4.3.tar.gz + cd re2c-4.3 + autoreconf -i -W all + ./configure + make -j"$(nproc)" -l"$(nproc)" + sudo make install + - name: Update rubygems & bundler + run: | + ruby -v + gem update --system + - name: bin/setup + run: | + bin/setup + - name: Check C code generation and formatting + run: | + clang-format --version + bundle exec rake lexer templates compile confirm_lexer confirm_templates + bundle exec rake format:c_check diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index ede0364e1..976c781c8 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -28,8 +28,6 @@ jobs: - ruby: "3.4" job: stdlib_test rubyopt: "--enable-frozen-string-literal" - - ruby: "3.4" - job: lexer templates compile confirm_lexer confirm_templates - ruby: "3.4" job: rubocop validate test_doc build test_generate_stdlib raap - ruby: "3.4" @@ -48,17 +46,6 @@ jobs: run: | sudo apt-get update sudo apt-get install -y libdb-dev curl autoconf automake m4 libtool python3 - - name: Install Re2c - if: ${{ contains(matrix.job, 'lexer') }} - run: | - cd /tmp - curl -L https://github.com/skvadrik/re2c/archive/refs/tags/3.1.tar.gz > re2c-3.1.tar.gz - tar xf re2c-3.1.tar.gz - cd re2c-3.1 - autoreconf -i -W all - ./configure - make - sudo make install - name: Update rubygems & bundler run: | ruby -v @@ -102,3 +89,31 @@ jobs: - run: bundle exec rake test:valgrind env: RUBY_FREE_AT_EXIT: 1 + C99_compile: + runs-on: macos-latest + strategy: + fail-fast: false + matrix: + ruby: ['3.4', head] + steps: + - uses: actions/checkout@v4 + - name: Install dependencies + run: | + brew install ruby-build + - uses: ruby/setup-ruby@v1 + with: + ruby-version: ${{ matrix.ruby }} + bundler: none + - name: Set working directory as safe + run: git config --global --add safe.directory $(pwd) + - name: Update rubygems & bundler + run: | + ruby -v + gem update --system + - name: clang version + run: clang --version + - name: bin/setup + run: | + bin/setup + - run: bundle exec rake clean compile_c99 + diff --git a/README.md b/README.md index 9890c554e..2b994253e 100644 --- a/README.md +++ b/README.md @@ -198,6 +198,43 @@ After checking out the repo, run `bin/setup` to install dependencies. Then, run To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org). +### C Code Formatting + +This project uses `clang-format` to enforce consistent formatting of C code with a `.clang-format` configuration in the root directory. + +#### Setup + +First, install clang-format: + +```bash +# macOS +brew install clang-format + +# Ubuntu/Debian +sudo apt-get install clang-format + +# Windows +choco install llvm +``` + +#### Usage + +Format all C source files: + +```bash +rake format:c +``` + +Check formatting without making changes: + +```bash +rake format:c_check +``` + +#### Editor Integration + +For VS Code users, install the "clangd" extension which will automatically use the project's `.clang-format` file. + ## Contributing Bug reports and pull requests are welcome on GitHub at https://github.com/ruby/rbs. diff --git a/Rakefile b/Rakefile index 59f4b4bb7..b9c9340ef 100644 --- a/Rakefile +++ b/Rakefile @@ -36,12 +36,13 @@ end multitask :default => [:test, :stdlib_test, :typecheck_test, :rubocop, :validate, :test_doc] task :lexer do - sh "re2c -W --no-generation-date -o ext/rbs_extension/lexer.c ext/rbs_extension/lexer.re" + sh "re2c -W --no-generation-date -o src/lexer.c src/lexer.re" + sh "clang-format -i -style=file src/lexer.c" end task :confirm_lexer => :lexer do puts "Testing if lexer.c is updated with respect to lexer.re" - sh "git diff --exit-code ext/rbs_extension/lexer.c" + sh "git diff --exit-code src/lexer.c" end task :confirm_templates => :templates do @@ -49,6 +50,84 @@ task :confirm_templates => :templates do sh "git diff --exit-code -- include src" end +# Task to format C code using clang-format +namespace :format do + dirs = ["src", "ext", "include"] + + # Find all C source and header files + files = `find #{dirs.join(" ")} -type f \\( -name "*.c" -o -name "*.h" \\)`.split("\n") + + desc "Format C source files using clang-format" + task :c do + puts "Formatting C files..." + + # Check if clang-format is installed + unless system("which clang-format > /dev/null 2>&1") + abort "Error: clang-format not found. Please install clang-format first." + end + + if files.empty? + puts "No C files found to format" + next + end + + puts "Found #{files.length} files to format (excluding generated files)" + + exit_status = 0 + files.each do |file| + puts "Formatting #{file}" + unless system("clang-format -i -style=file #{file}") + puts "❌ Error formatting #{file}" + exit_status = 1 + end + end + + exit exit_status unless exit_status == 0 + puts "✅ All files formatted successfully" + end + + desc "Check if C source files are properly formatted" + task :c_check do + puts "Checking C file formatting..." + + # Check if clang-format is installed + unless system("which clang-format > /dev/null 2>&1") + abort "Error: clang-format not found. Please install clang-format first." + end + + if files.empty? + puts "No C files found to check" + next + end + + puts "Found #{files.length} files to check (excluding generated files)" + + needs_format = false + files.each do |file| + formatted = `clang-format -style=file #{file}` + original = File.read(file) + + if formatted != original + puts "❌ #{file} needs formatting" + puts "Diff:" + # Save formatted version to temp file and run diff + temp_file = "#{file}.formatted" + File.write(temp_file, formatted) + system("diff -u #{file} #{temp_file}") + File.unlink(temp_file) + needs_format = true + end + end + + if needs_format + warn "Some files need formatting. Run 'rake format:c' to format them." + exit 1 + else + puts "✅ All files are properly formatted" + end + end +end + rule ".c" => ".re" do |t| puts "⚠️⚠️⚠️ #{t.name} is older than #{t.source}. You may need to run `rake lexer` ⚠️⚠️⚠️" end @@ -70,17 +149,22 @@ task :confirm_annotation do end task :templates do - sh "#{ruby} templates/template.rb include/rbs/constants.h" - sh "#{ruby} templates/template.rb include/rbs/ruby_objs.h" - sh "#{ruby} templates/template.rb src/constants.c" - sh "#{ruby} templates/template.rb src/ruby_objs.c" + sh "#{ruby} templates/template.rb ext/rbs_extension/ast_translation.h" + sh "#{ruby} templates/template.rb ext/rbs_extension/ast_translation.c" + + sh "#{ruby} templates/template.rb ext/rbs_extension/class_constants.h" + sh "#{ruby} templates/template.rb ext/rbs_extension/class_constants.c" + + sh "#{ruby} templates/template.rb include/rbs/ast.h" + sh "#{ruby} templates/template.rb src/ast.c" + + # Format the generated files + Rake::Task["format:c"].invoke end -task :compile => "ext/rbs_extension/lexer.c" -task :compile => "include/rbs/constants.h" -task :compile => "include/rbs/ruby_objs.h" -task :compile => "src/constants.c" -task :compile => "src/ruby_objs.c" +task :compile => "ext/rbs_extension/class_constants.h" +task :compile => "ext/rbs_extension/class_constants.c" +task :compile => "src/lexer.c" task :test_doc do files = Dir.chdir(File.expand_path('..', __FILE__)) do @@ -430,3 +514,25 @@ task :changelog do puts " (🤑 There is no *unreleased* pull request associated to the milestone.)" end end + +desc "Compile extension without C23 extensions" +task :compile_c99 do + ENV["TEST_NO_C23"] = "true" + Rake::Task[:"compile"].invoke +ensure + ENV.delete("TEST_NO_C23") +end + +task :prepare_bench do + ENV.delete("DEBUG") + Rake::Task[:"clobber"].invoke + Rake::Task[:"templates"].invoke + Rake::Task[:"compile"].invoke +end + +task :prepare_profiling do + ENV["DEBUG"] = "1" + Rake::Task[:"clobber"].invoke + Rake::Task[:"templates"].invoke + Rake::Task[:"compile"].invoke +end \ No newline at end of file diff --git a/bin/benchmark-parse.rb b/bin/benchmark-parse.rb new file mode 100644 index 000000000..98e4ce16d --- /dev/null +++ b/bin/benchmark-parse.rb @@ -0,0 +1,25 @@ +require "rbs" +require "benchmark/ips" +require "csv" +require "pathname" + +files = {} +ARGV.each do |file| + content = File.read(file) + files[file] = RBS::Buffer.new(content: content, name: Pathname(file)) +end + +puts "Benchmarking parsing #{files.size} files..." + +result = Benchmark.ips do |x| + x.report("parsing") do + files.each do |file, content| + RBS::Parser.parse_signature(content) + end + end + + x.quiet = true +end + +entry = result.entries[0] +puts "✅ #{"%0.3f" % entry.ips} i/s (±#{"%0.3f" % entry.error_percentage}%)" diff --git a/bin/profile-parse.rb b/bin/profile-parse.rb new file mode 100644 index 000000000..3f9f75dbb --- /dev/null +++ b/bin/profile-parse.rb @@ -0,0 +1,39 @@ +require 'rbs' +require "optparse" + +wait = false +duration = 3 + +args = ARGV.dup + +OptionParser.new do |opts| + opts.banner = "Usage: profile-parse.rb [options] FILE" + + opts.on("--wait", "Wait for enter before starting") do + wait = true + end + opts.on("--duration=NUMBER", "Repeat parsing for seconds") do |number| + duration = number.to_i + end +end.parse!(args) + +if wait + puts "⏯️ Waiting for enter to continue at #{Process.pid}..." + STDIN.gets +end + +file = args.shift or raise "No file path is given" +sig = File.read(file) + +puts "Parsing #{file} -- #{sig.bytesize} bytes" + +started_at = Time.now +count = 0 + +loop do + count += 1 + RBS::Parser.parse_signature(sig) + break if (Time.now - started_at) > duration +end + +puts "✅ Done #{count} loop(s)" \ No newline at end of file diff --git a/config.yml b/config.yml index b17f864a7..682ae299e 100644 --- a/config.yml +++ b/config.yml @@ -2,316 +2,444 @@ nodes: - name: RBS::AST::Annotation fields: - name: string - - name: location + c_type: rbs_string + - name: RBS::AST::Bool + expose_to_ruby: false + expose_location: false + fields: + - name: value + c_type: bool - name: RBS::AST::Comment fields: - name: string - - name: location + c_type: rbs_string - name: RBS::AST::Declarations::Class fields: - name: name + c_type: rbs_type_name - name: type_params + c_type: rbs_node_list - name: super_class + c_type: rbs_ast_declarations_class_super - name: members + c_type: rbs_node_list - name: annotations - - name: location + c_type: rbs_node_list - name: comment + c_type: rbs_ast_comment - name: RBS::AST::Declarations::Class::Super fields: - name: name + c_type: rbs_type_name - name: args - - name: location + c_type: rbs_node_list - name: RBS::AST::Declarations::ClassAlias fields: - name: new_name + c_type: rbs_type_name - name: old_name - - name: location + c_type: rbs_type_name - name: comment + c_type: rbs_ast_comment - name: annotations + c_type: rbs_node_list - name: RBS::AST::Declarations::Constant fields: - name: name + c_type: rbs_type_name - name: type - - name: location + c_type: rbs_node - name: comment + c_type: rbs_ast_comment - name: annotations + c_type: rbs_node_list - name: RBS::AST::Declarations::Global fields: - name: name + c_type: rbs_ast_symbol - name: type - - name: location + c_type: rbs_node - name: comment + c_type: rbs_ast_comment - name: annotations + c_type: rbs_node_list - name: RBS::AST::Declarations::Interface fields: - name: name + c_type: rbs_type_name - name: type_params + c_type: rbs_node_list - name: members + c_type: rbs_node_list - name: annotations - - name: location + c_type: rbs_node_list - name: comment + c_type: rbs_ast_comment - name: RBS::AST::Declarations::Module fields: - name: name + c_type: rbs_type_name - name: type_params + c_type: rbs_node_list - name: self_types + c_type: rbs_node_list - name: members + c_type: rbs_node_list - name: annotations - - name: location + c_type: rbs_node_list - name: comment + c_type: rbs_ast_comment - name: RBS::AST::Declarations::Module::Self fields: - name: name + c_type: rbs_type_name - name: args - - name: location + c_type: rbs_node_list - name: RBS::AST::Declarations::ModuleAlias fields: - name: new_name + c_type: rbs_type_name - name: old_name - - name: location + c_type: rbs_type_name - name: comment + c_type: rbs_ast_comment - name: annotations + c_type: rbs_node_list - name: RBS::AST::Declarations::TypeAlias fields: - name: name + c_type: rbs_type_name - name: type_params + c_type: rbs_node_list - name: type + c_type: rbs_node - name: annotations - - name: location + c_type: rbs_node_list - name: comment + c_type: rbs_ast_comment - name: RBS::AST::Directives::Use fields: - name: clauses - - name: location + c_type: rbs_node_list - name: RBS::AST::Directives::Use::SingleClause fields: - name: type_name + c_type: rbs_type_name - name: new_name - - name: location + c_type: rbs_ast_symbol - name: RBS::AST::Directives::Use::WildcardClause fields: - name: namespace - - name: location + c_type: rbs_namespace + c_name: rbs_namespace - name: RBS::AST::Members::Alias fields: - name: new_name + c_type: rbs_ast_symbol - name: old_name + c_type: rbs_ast_symbol - name: kind + c_type: rbs_keyword - name: annotations - - name: location + c_type: rbs_node_list - name: comment + c_type: rbs_ast_comment - name: RBS::AST::Members::AttrAccessor fields: - name: name + c_type: rbs_ast_symbol - name: type + c_type: rbs_node - name: ivar_name + c_type: rbs_node # rbs_ast_symbol_t, NULL or rbs_ast_bool_new(false) - name: kind + c_type: rbs_keyword - name: annotations - - name: location + c_type: rbs_node_list - name: comment + c_type: rbs_ast_comment - name: visibility + c_type: rbs_keyword - name: RBS::AST::Members::AttrReader fields: - name: name + c_type: rbs_ast_symbol - name: type + c_type: rbs_node - name: ivar_name + c_type: rbs_node # rbs_ast_symbol_t, NULL or rbs_ast_bool_new(false) - name: kind + c_type: rbs_keyword - name: annotations - - name: location + c_type: rbs_node_list - name: comment + c_type: rbs_ast_comment - name: visibility + c_type: rbs_keyword - name: RBS::AST::Members::AttrWriter fields: - name: name + c_type: rbs_ast_symbol - name: type + c_type: rbs_node - name: ivar_name + c_type: rbs_node # rbs_ast_symbol_t, NULL or rbs_ast_bool_new(false) - name: kind + c_type: rbs_keyword - name: annotations - - name: location + c_type: rbs_node_list - name: comment + c_type: rbs_ast_comment - name: visibility + c_type: rbs_keyword - name: RBS::AST::Members::ClassInstanceVariable fields: - name: name + c_type: rbs_ast_symbol - name: type - - name: location + c_type: rbs_node - name: comment + c_type: rbs_ast_comment - name: RBS::AST::Members::ClassVariable fields: - name: name + c_type: rbs_ast_symbol - name: type - - name: location + c_type: rbs_node - name: comment + c_type: rbs_ast_comment - name: RBS::AST::Members::Extend fields: - name: name + c_type: rbs_type_name - name: args + c_type: rbs_node_list - name: annotations - - name: location + c_type: rbs_node_list - name: comment + c_type: rbs_ast_comment - name: RBS::AST::Members::Include fields: - name: name + c_type: rbs_type_name - name: args + c_type: rbs_node_list - name: annotations - - name: location + c_type: rbs_node_list - name: comment + c_type: rbs_ast_comment - name: RBS::AST::Members::InstanceVariable fields: - name: name + c_type: rbs_ast_symbol - name: type - - name: location + c_type: rbs_node - name: comment + c_type: rbs_ast_comment - name: RBS::AST::Members::MethodDefinition fields: - name: name + c_type: rbs_ast_symbol - name: kind + c_type: rbs_keyword - name: overloads + c_type: rbs_node_list - name: annotations - - name: location + c_type: rbs_node_list - name: comment + c_type: rbs_ast_comment - name: overloading + c_type: bool - name: visibility + c_type: rbs_keyword - name: RBS::AST::Members::MethodDefinition::Overload + expose_location: false fields: - name: annotations + c_type: rbs_node_list - name: method_type + c_type: rbs_node - name: RBS::AST::Members::Prepend fields: - name: name + c_type: rbs_type_name - name: args + c_type: rbs_node_list - name: annotations - - name: location + c_type: rbs_node_list - name: comment + c_type: rbs_ast_comment - name: RBS::AST::Members::Private - fields: - - name: location - name: RBS::AST::Members::Public - fields: - - name: location - name: RBS::AST::TypeParam fields: - name: name + c_type: rbs_ast_symbol - name: variance + c_type: rbs_keyword - name: upper_bound + c_type: rbs_node - name: default_type + c_type: rbs_node - name: unchecked - - name: location + c_type: bool + - name: RBS::AST::Integer + expose_to_ruby: false + expose_location: false + fields: + - name: string_representation + c_type: rbs_string + - name: RBS::AST::String + expose_to_ruby: false + expose_location: false + fields: + - name: string + c_type: rbs_string - name: RBS::MethodType fields: - name: type_params + c_type: rbs_node_list - name: type + c_type: rbs_node - name: block - - name: location + c_type: rbs_types_block - name: RBS::Namespace + expose_location: false fields: - name: path + c_type: rbs_node_list - name: absolute + c_type: bool + - name: RBS::Signature + expose_to_ruby: false + expose_location: false + fields: + - name: directives + c_type: rbs_node_list + - name: declarations + c_type: rbs_node_list - name: RBS::TypeName + expose_location: false fields: - name: namespace + c_type: rbs_namespace + c_name: rbs_namespace - name: name + c_type: rbs_ast_symbol - name: RBS::Types::Alias fields: - name: name + c_type: rbs_type_name - name: args - - name: location + c_type: rbs_node_list - name: RBS::Types::Bases::Any fields: - name: todo - - name: location + c_type: bool - name: RBS::Types::Bases::Bool - fields: - - name: location - name: RBS::Types::Bases::Bottom - fields: - - name: location - name: RBS::Types::Bases::Class - fields: - - name: location - name: RBS::Types::Bases::Instance - fields: - - name: location - name: RBS::Types::Bases::Nil - fields: - - name: location - name: RBS::Types::Bases::Self - fields: - - name: location - name: RBS::Types::Bases::Top - fields: - - name: location - name: RBS::Types::Bases::Void - fields: - - name: location - name: RBS::Types::Block + expose_location: false fields: - name: type + c_type: rbs_node - name: required + c_type: bool - name: self_type + c_type: rbs_node - name: RBS::Types::ClassInstance fields: - name: name + c_type: rbs_type_name - name: args - - name: location + c_type: rbs_node_list - name: RBS::Types::ClassSingleton fields: - name: name - - name: location + c_type: rbs_type_name - name: RBS::Types::Function + expose_location: false fields: - name: required_positionals + c_type: rbs_node_list - name: optional_positionals + c_type: rbs_node_list - name: rest_positionals + c_type: rbs_node - name: trailing_positionals + c_type: rbs_node_list - name: required_keywords + c_type: rbs_hash - name: optional_keywords + c_type: rbs_hash - name: rest_keywords + c_type: rbs_node - name: return_type + c_type: rbs_node - name: RBS::Types::Function::Param fields: - name: type + c_type: rbs_node - name: name - - name: location + c_type: rbs_ast_symbol - name: RBS::Types::Interface fields: - name: name + c_type: rbs_type_name - name: args - - name: location + c_type: rbs_node_list - name: RBS::Types::Intersection fields: - name: types - - name: location + c_type: rbs_node_list - name: RBS::Types::Literal fields: - name: literal - - name: location + c_type: rbs_node - name: RBS::Types::Optional fields: - name: type - - name: location + c_type: rbs_node - name: RBS::Types::Proc fields: - name: type + c_type: rbs_node - name: block - - name: location + c_type: rbs_types_block - name: self_type + c_type: rbs_node - name: RBS::Types::Record fields: - name: all_fields - - name: location + c_type: rbs_hash + - name: RBS::Types::Record::FieldType + expose_to_ruby: false + expose_location: false + fields: + - name: type + c_type: rbs_node + - name: required + c_type: bool - name: RBS::Types::Tuple fields: - name: types - - name: location + c_type: rbs_node_list - name: RBS::Types::Union fields: - name: types - - name: location + c_type: rbs_node_list - name: RBS::Types::UntypedFunction + expose_location: false fields: - name: return_type + c_type: rbs_node - name: RBS::Types::Variable fields: - name: name - - name: location + c_type: rbs_ast_symbol diff --git a/ext/rbs_extension/ast_translation.c b/ext/rbs_extension/ast_translation.c new file mode 100644 index 000000000..bad8f96d3 --- /dev/null +++ b/ext/rbs_extension/ast_translation.c @@ -0,0 +1,1015 @@ +/*----------------------------------------------------------------------------*/ +/* This file is generated by the templates/template.rb script and should not */ +/* be modified manually. */ +/* To change the template see */ +/* templates/ext/rbs_extension/ast_translation.c.erb */ +/*----------------------------------------------------------------------------*/ + +#include "ast_translation.h" + +#include "class_constants.h" +#include "rbs_string_bridging.h" +#include "legacy_location.h" + +VALUE EMPTY_ARRAY; +VALUE EMPTY_HASH; + +#define RBS_LOC_CHILDREN_SIZE(cap) (sizeof(rbs_loc_children) + sizeof(rbs_loc_entry) * ((cap) - 1)) + +rbs_translation_context_t rbs_translation_context_create(rbs_constant_pool_t *constant_pool, VALUE buffer, rb_encoding *ruby_encoding) { + return (rbs_translation_context_t) { + .constant_pool = constant_pool, + .buffer = buffer, + .encoding = ruby_encoding, + }; +} + +VALUE rbs_node_list_to_ruby_array(rbs_translation_context_t ctx, rbs_node_list_t *list) { + VALUE ruby_array = rb_ary_new(); + + for (rbs_node_list_node_t *n = list->head; n != NULL; n = n->next) { + rb_ary_push(ruby_array, rbs_struct_to_ruby_value(ctx, n->node)); + } + + return ruby_array; +} + +VALUE rbs_hash_to_ruby_hash(rbs_translation_context_t ctx, rbs_hash_t *rbs_hash) { + if (!rbs_hash->head) { + return EMPTY_HASH; + } + + VALUE ruby_hash = rb_hash_new(); + + for (rbs_hash_node_t *n = rbs_hash->head; n != NULL; n = n->next) { + VALUE key = rbs_struct_to_ruby_value(ctx, n->key); + VALUE value = rbs_struct_to_ruby_value(ctx, n->value); + rb_hash_aset(ruby_hash, key, value); + } + + return ruby_hash; +} + +VALUE rbs_loc_to_ruby_location(rbs_translation_context_t ctx, rbs_location_t *source_loc) { + if (source_loc == NULL) { + return Qnil; + } + + VALUE new_loc = rbs_new_location(ctx.buffer, source_loc->rg); + rbs_loc *new_loc_struct = rbs_check_location(new_loc); + + if (source_loc->children != NULL) { + rbs_loc_legacy_alloc_children(new_loc_struct, source_loc->children->cap); + memcpy(new_loc_struct->children, source_loc->children, RBS_LOC_CHILDREN_SIZE(source_loc->children->cap)); + } + + return new_loc; +} + +VALUE rbs_location_list_to_ruby_array(rbs_translation_context_t ctx, rbs_location_list_t *list) { + if (list == NULL) { + return EMPTY_ARRAY; + } + + VALUE ruby_array = rb_ary_new(); + + for (rbs_location_list_node_t *n = list->head; n != NULL; n = n->next) { + rb_ary_push(ruby_array, rbs_loc_to_ruby_location(ctx, n->loc)); + } + + return ruby_array; +} + +#ifdef RB_PASS_KEYWORDS +// Ruby 2.7 or later +#define CLASS_NEW_INSTANCE(klass, argc, argv) \ + rb_class_new_instance_kw(argc, argv, klass, RB_PASS_KEYWORDS) +#else +// Ruby 2.6 +#define CLASS_NEW_INSTANCE(receiver, argc, argv) \ + rb_class_new_instance(argc, argv, receiver) +#endif + +VALUE rbs_struct_to_ruby_value(rbs_translation_context_t ctx, rbs_node_t *instance) { + if (instance == NULL) return Qnil; + + switch (instance->type) { + case RBS_AST_ANNOTATION: { + rbs_ast_annotation_t *node = (rbs_ast_annotation_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("string")), rbs_string_to_ruby_string(&node->string, ctx.encoding)); + + return CLASS_NEW_INSTANCE( + RBS_AST_Annotation, + 1, + &h + ); + } + case RBS_AST_BOOL: { + return ((rbs_ast_bool_t *) instance)->value ? Qtrue : Qfalse; + } + case RBS_AST_COMMENT: { + rbs_ast_comment_t *node = (rbs_ast_comment_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("string")), rbs_string_to_ruby_string(&node->string, ctx.encoding)); + + return CLASS_NEW_INSTANCE( + RBS_AST_Comment, + 1, + &h + ); + } + case RBS_AST_DECLARATIONS_CLASS: { + rbs_ast_declarations_class_t *node = (rbs_ast_declarations_class_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("type_params")), rbs_node_list_to_ruby_array(ctx, node->type_params)); + rb_hash_aset(h, ID2SYM(rb_intern("super_class")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->super_class)); // rbs_ast_declarations_class_super + rb_hash_aset(h, ID2SYM(rb_intern("members")), rbs_node_list_to_ruby_array(ctx, node->members)); + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + + rb_funcall( + RBS_AST_TypeParam, + rb_intern("resolve_variables"), + 1, + rb_hash_lookup(h, ID2SYM(rb_intern("type_params"))) + ); + return CLASS_NEW_INSTANCE( + RBS_AST_Declarations_Class, + 1, + &h + ); + } + case RBS_AST_DECLARATIONS_CLASS_SUPER: { + rbs_ast_declarations_class_super_t *node = (rbs_ast_declarations_class_super_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("args")), rbs_node_list_to_ruby_array(ctx, node->args)); + + return CLASS_NEW_INSTANCE( + RBS_AST_Declarations_Class_Super, + 1, + &h + ); + } + case RBS_AST_DECLARATIONS_CLASS_ALIAS: { + rbs_ast_declarations_class_alias_t *node = (rbs_ast_declarations_class_alias_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("new_name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->new_name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("old_name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->old_name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + + return CLASS_NEW_INSTANCE( + RBS_AST_Declarations_ClassAlias, + 1, + &h + ); + } + case RBS_AST_DECLARATIONS_CONSTANT: { + rbs_ast_declarations_constant_t *node = (rbs_ast_declarations_constant_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + + return CLASS_NEW_INSTANCE( + RBS_AST_Declarations_Constant, + 1, + &h + ); + } + case RBS_AST_DECLARATIONS_GLOBAL: { + rbs_ast_declarations_global_t *node = (rbs_ast_declarations_global_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_ast_symbol + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + + return CLASS_NEW_INSTANCE( + RBS_AST_Declarations_Global, + 1, + &h + ); + } + case RBS_AST_DECLARATIONS_INTERFACE: { + rbs_ast_declarations_interface_t *node = (rbs_ast_declarations_interface_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("type_params")), rbs_node_list_to_ruby_array(ctx, node->type_params)); + rb_hash_aset(h, ID2SYM(rb_intern("members")), rbs_node_list_to_ruby_array(ctx, node->members)); + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + + rb_funcall( + RBS_AST_TypeParam, + rb_intern("resolve_variables"), + 1, + rb_hash_lookup(h, ID2SYM(rb_intern("type_params"))) + ); + return CLASS_NEW_INSTANCE( + RBS_AST_Declarations_Interface, + 1, + &h + ); + } + case RBS_AST_DECLARATIONS_MODULE: { + rbs_ast_declarations_module_t *node = (rbs_ast_declarations_module_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("type_params")), rbs_node_list_to_ruby_array(ctx, node->type_params)); + rb_hash_aset(h, ID2SYM(rb_intern("self_types")), rbs_node_list_to_ruby_array(ctx, node->self_types)); + rb_hash_aset(h, ID2SYM(rb_intern("members")), rbs_node_list_to_ruby_array(ctx, node->members)); + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + + rb_funcall( + RBS_AST_TypeParam, + rb_intern("resolve_variables"), + 1, + rb_hash_lookup(h, ID2SYM(rb_intern("type_params"))) + ); + return CLASS_NEW_INSTANCE( + RBS_AST_Declarations_Module, + 1, + &h + ); + } + case RBS_AST_DECLARATIONS_MODULE_SELF: { + rbs_ast_declarations_module_self_t *node = (rbs_ast_declarations_module_self_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("args")), rbs_node_list_to_ruby_array(ctx, node->args)); + + return CLASS_NEW_INSTANCE( + RBS_AST_Declarations_Module_Self, + 1, + &h + ); + } + case RBS_AST_DECLARATIONS_MODULE_ALIAS: { + rbs_ast_declarations_module_alias_t *node = (rbs_ast_declarations_module_alias_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("new_name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->new_name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("old_name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->old_name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + + return CLASS_NEW_INSTANCE( + RBS_AST_Declarations_ModuleAlias, + 1, + &h + ); + } + case RBS_AST_DECLARATIONS_TYPE_ALIAS: { + rbs_ast_declarations_type_alias_t *node = (rbs_ast_declarations_type_alias_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("type_params")), rbs_node_list_to_ruby_array(ctx, node->type_params)); + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + + rb_funcall( + RBS_AST_TypeParam, + rb_intern("resolve_variables"), + 1, + rb_hash_lookup(h, ID2SYM(rb_intern("type_params"))) + ); + return CLASS_NEW_INSTANCE( + RBS_AST_Declarations_TypeAlias, + 1, + &h + ); + } + case RBS_AST_DIRECTIVES_USE: { + rbs_ast_directives_use_t *node = (rbs_ast_directives_use_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("clauses")), rbs_node_list_to_ruby_array(ctx, node->clauses)); + + return CLASS_NEW_INSTANCE( + RBS_AST_Directives_Use, + 1, + &h + ); + } + case RBS_AST_DIRECTIVES_USE_SINGLE_CLAUSE: { + rbs_ast_directives_use_single_clause_t *node = (rbs_ast_directives_use_single_clause_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("type_name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type_name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("new_name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->new_name)); // rbs_ast_symbol + + return CLASS_NEW_INSTANCE( + RBS_AST_Directives_Use_SingleClause, + 1, + &h + ); + } + case RBS_AST_DIRECTIVES_USE_WILDCARD_CLAUSE: { + rbs_ast_directives_use_wildcard_clause_t *node = (rbs_ast_directives_use_wildcard_clause_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("namespace")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->rbs_namespace)); // rbs_namespace + + return CLASS_NEW_INSTANCE( + RBS_AST_Directives_Use_WildcardClause, + 1, + &h + ); + } + case RBS_AST_INTEGER: { + rbs_ast_integer_t *integer_node = (rbs_ast_integer_t *) instance; + rbs_string_t string_repr = integer_node->string_representation; + + VALUE str = rb_enc_str_new(string_repr.start, rbs_string_len(string_repr), rb_utf8_encoding()); + + return rb_funcall(str, rb_intern("to_i"), 0); + } + case RBS_AST_MEMBERS_ALIAS: { + rbs_ast_members_alias_t *node = (rbs_ast_members_alias_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("new_name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->new_name)); // rbs_ast_symbol + rb_hash_aset(h, ID2SYM(rb_intern("old_name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->old_name)); // rbs_ast_symbol + rb_hash_aset(h, ID2SYM(rb_intern("kind")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->kind)); // rbs_keyword + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_Alias, + 1, + &h + ); + } + case RBS_AST_MEMBERS_ATTR_ACCESSOR: { + rbs_ast_members_attr_accessor_t *node = (rbs_ast_members_attr_accessor_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_ast_symbol + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("ivar_name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->ivar_name)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("kind")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->kind)); // rbs_keyword + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + rb_hash_aset(h, ID2SYM(rb_intern("visibility")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->visibility)); // rbs_keyword + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_AttrAccessor, + 1, + &h + ); + } + case RBS_AST_MEMBERS_ATTR_READER: { + rbs_ast_members_attr_reader_t *node = (rbs_ast_members_attr_reader_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_ast_symbol + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("ivar_name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->ivar_name)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("kind")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->kind)); // rbs_keyword + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + rb_hash_aset(h, ID2SYM(rb_intern("visibility")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->visibility)); // rbs_keyword + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_AttrReader, + 1, + &h + ); + } + case RBS_AST_MEMBERS_ATTR_WRITER: { + rbs_ast_members_attr_writer_t *node = (rbs_ast_members_attr_writer_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_ast_symbol + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("ivar_name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->ivar_name)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("kind")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->kind)); // rbs_keyword + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + rb_hash_aset(h, ID2SYM(rb_intern("visibility")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->visibility)); // rbs_keyword + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_AttrWriter, + 1, + &h + ); + } + case RBS_AST_MEMBERS_CLASS_INSTANCE_VARIABLE: { + rbs_ast_members_class_instance_variable_t *node = (rbs_ast_members_class_instance_variable_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_ast_symbol + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_ClassInstanceVariable, + 1, + &h + ); + } + case RBS_AST_MEMBERS_CLASS_VARIABLE: { + rbs_ast_members_class_variable_t *node = (rbs_ast_members_class_variable_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_ast_symbol + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_ClassVariable, + 1, + &h + ); + } + case RBS_AST_MEMBERS_EXTEND: { + rbs_ast_members_extend_t *node = (rbs_ast_members_extend_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("args")), rbs_node_list_to_ruby_array(ctx, node->args)); + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_Extend, + 1, + &h + ); + } + case RBS_AST_MEMBERS_INCLUDE: { + rbs_ast_members_include_t *node = (rbs_ast_members_include_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("args")), rbs_node_list_to_ruby_array(ctx, node->args)); + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_Include, + 1, + &h + ); + } + case RBS_AST_MEMBERS_INSTANCE_VARIABLE: { + rbs_ast_members_instance_variable_t *node = (rbs_ast_members_instance_variable_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_ast_symbol + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_InstanceVariable, + 1, + &h + ); + } + case RBS_AST_MEMBERS_METHOD_DEFINITION: { + rbs_ast_members_method_definition_t *node = (rbs_ast_members_method_definition_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_ast_symbol + rb_hash_aset(h, ID2SYM(rb_intern("kind")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->kind)); // rbs_keyword + rb_hash_aset(h, ID2SYM(rb_intern("overloads")), rbs_node_list_to_ruby_array(ctx, node->overloads)); + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + rb_hash_aset(h, ID2SYM(rb_intern("overloading")), node->overloading ? Qtrue : Qfalse); + rb_hash_aset(h, ID2SYM(rb_intern("visibility")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->visibility)); // rbs_keyword + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_MethodDefinition, + 1, + &h + ); + } + case RBS_AST_MEMBERS_METHOD_DEFINITION_OVERLOAD: { + rbs_ast_members_method_definition_overload_t *node = (rbs_ast_members_method_definition_overload_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("method_type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->method_type)); // rbs_node + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_MethodDefinition_Overload, + 1, + &h + ); + } + case RBS_AST_MEMBERS_PREPEND: { + rbs_ast_members_prepend_t *node = (rbs_ast_members_prepend_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("args")), rbs_node_list_to_ruby_array(ctx, node->args)); + rb_hash_aset(h, ID2SYM(rb_intern("annotations")), rbs_node_list_to_ruby_array(ctx, node->annotations)); + rb_hash_aset(h, ID2SYM(rb_intern("comment")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->comment)); // rbs_ast_comment + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_Prepend, + 1, + &h + ); + } + case RBS_AST_MEMBERS_PRIVATE: { + rbs_ast_members_private_t *node = (rbs_ast_members_private_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_Private, + 1, + &h + ); + } + case RBS_AST_MEMBERS_PUBLIC: { + rbs_ast_members_public_t *node = (rbs_ast_members_public_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + + return CLASS_NEW_INSTANCE( + RBS_AST_Members_Public, + 1, + &h + ); + } + case RBS_AST_STRING: { + rbs_ast_string_t *string_node = (rbs_ast_string_t *) instance; + rbs_string_t s = string_node->string; + + return rb_enc_str_new(s.start, rbs_string_len(s), rb_utf8_encoding()); + } + case RBS_AST_TYPE_PARAM: { + rbs_ast_type_param_t *node = (rbs_ast_type_param_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_ast_symbol + rb_hash_aset(h, ID2SYM(rb_intern("variance")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->variance)); // rbs_keyword + rb_hash_aset(h, ID2SYM(rb_intern("upper_bound")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->upper_bound)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("default_type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->default_type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("unchecked")), node->unchecked ? Qtrue : Qfalse); + + return CLASS_NEW_INSTANCE( + RBS_AST_TypeParam, + 1, + &h + ); + } + case RBS_METHOD_TYPE: { + rbs_method_type_t *node = (rbs_method_type_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("type_params")), rbs_node_list_to_ruby_array(ctx, node->type_params)); + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("block")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->block)); // rbs_types_block + + rb_funcall( + RBS_AST_TypeParam, + rb_intern("resolve_variables"), + 1, + rb_hash_lookup(h, ID2SYM(rb_intern("type_params"))) + ); + return CLASS_NEW_INSTANCE( + RBS_MethodType, + 1, + &h + ); + } + case RBS_NAMESPACE: { + rbs_namespace_t *node = (rbs_namespace_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("path")), rbs_node_list_to_ruby_array(ctx, node->path)); + rb_hash_aset(h, ID2SYM(rb_intern("absolute")), node->absolute ? Qtrue : Qfalse); + + return CLASS_NEW_INSTANCE( + RBS_Namespace, + 1, + &h + ); + } + case RBS_SIGNATURE: { + rbs_signature_t *signature = (rbs_signature_t *) instance; + + VALUE array = rb_ary_new(); + rb_ary_push(array, rbs_node_list_to_ruby_array(ctx, signature->directives)); + rb_ary_push(array, rbs_node_list_to_ruby_array(ctx, signature->declarations)); + return array; + } + case RBS_TYPE_NAME: { + rbs_type_name_t *node = (rbs_type_name_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("namespace")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->rbs_namespace)); // rbs_namespace + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_ast_symbol + + return CLASS_NEW_INSTANCE( + RBS_TypeName, + 1, + &h + ); + } + case RBS_TYPES_ALIAS: { + rbs_types_alias_t *node = (rbs_types_alias_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("args")), rbs_node_list_to_ruby_array(ctx, node->args)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Alias, + 1, + &h + ); + } + case RBS_TYPES_BASES_ANY: { + rbs_types_bases_any_t *node = (rbs_types_bases_any_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("todo")), node->todo ? Qtrue : Qfalse); + + return CLASS_NEW_INSTANCE( + RBS_Types_Bases_Any, + 1, + &h + ); + } + case RBS_TYPES_BASES_BOOL: { + rbs_types_bases_bool_t *node = (rbs_types_bases_bool_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Bases_Bool, + 1, + &h + ); + } + case RBS_TYPES_BASES_BOTTOM: { + rbs_types_bases_bottom_t *node = (rbs_types_bases_bottom_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Bases_Bottom, + 1, + &h + ); + } + case RBS_TYPES_BASES_CLASS: { + rbs_types_bases_class_t *node = (rbs_types_bases_class_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Bases_Class, + 1, + &h + ); + } + case RBS_TYPES_BASES_INSTANCE: { + rbs_types_bases_instance_t *node = (rbs_types_bases_instance_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Bases_Instance, + 1, + &h + ); + } + case RBS_TYPES_BASES_NIL: { + rbs_types_bases_nil_t *node = (rbs_types_bases_nil_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Bases_Nil, + 1, + &h + ); + } + case RBS_TYPES_BASES_SELF: { + rbs_types_bases_self_t *node = (rbs_types_bases_self_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Bases_Self, + 1, + &h + ); + } + case RBS_TYPES_BASES_TOP: { + rbs_types_bases_top_t *node = (rbs_types_bases_top_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Bases_Top, + 1, + &h + ); + } + case RBS_TYPES_BASES_VOID: { + rbs_types_bases_void_t *node = (rbs_types_bases_void_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Bases_Void, + 1, + &h + ); + } + case RBS_TYPES_BLOCK: { + rbs_types_block_t *node = (rbs_types_block_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("required")), node->required ? Qtrue : Qfalse); + rb_hash_aset(h, ID2SYM(rb_intern("self_type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->self_type)); // rbs_node + + return CLASS_NEW_INSTANCE( + RBS_Types_Block, + 1, + &h + ); + } + case RBS_TYPES_CLASS_INSTANCE: { + rbs_types_class_instance_t *node = (rbs_types_class_instance_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("args")), rbs_node_list_to_ruby_array(ctx, node->args)); + + return CLASS_NEW_INSTANCE( + RBS_Types_ClassInstance, + 1, + &h + ); + } + case RBS_TYPES_CLASS_SINGLETON: { + rbs_types_class_singleton_t *node = (rbs_types_class_singleton_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + + return CLASS_NEW_INSTANCE( + RBS_Types_ClassSingleton, + 1, + &h + ); + } + case RBS_TYPES_FUNCTION: { + rbs_types_function_t *node = (rbs_types_function_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("required_positionals")), rbs_node_list_to_ruby_array(ctx, node->required_positionals)); + rb_hash_aset(h, ID2SYM(rb_intern("optional_positionals")), rbs_node_list_to_ruby_array(ctx, node->optional_positionals)); + rb_hash_aset(h, ID2SYM(rb_intern("rest_positionals")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->rest_positionals)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("trailing_positionals")), rbs_node_list_to_ruby_array(ctx, node->trailing_positionals)); + rb_hash_aset(h, ID2SYM(rb_intern("required_keywords")), rbs_hash_to_ruby_hash(ctx, node->required_keywords)); + rb_hash_aset(h, ID2SYM(rb_intern("optional_keywords")), rbs_hash_to_ruby_hash(ctx, node->optional_keywords)); + rb_hash_aset(h, ID2SYM(rb_intern("rest_keywords")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->rest_keywords)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("return_type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->return_type)); // rbs_node + + return CLASS_NEW_INSTANCE( + RBS_Types_Function, + 1, + &h + ); + } + case RBS_TYPES_FUNCTION_PARAM: { + rbs_types_function_param_t *node = (rbs_types_function_param_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_ast_symbol + + return CLASS_NEW_INSTANCE( + RBS_Types_Function_Param, + 1, + &h + ); + } + case RBS_TYPES_INTERFACE: { + rbs_types_interface_t *node = (rbs_types_interface_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_type_name + rb_hash_aset(h, ID2SYM(rb_intern("args")), rbs_node_list_to_ruby_array(ctx, node->args)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Interface, + 1, + &h + ); + } + case RBS_TYPES_INTERSECTION: { + rbs_types_intersection_t *node = (rbs_types_intersection_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("types")), rbs_node_list_to_ruby_array(ctx, node->types)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Intersection, + 1, + &h + ); + } + case RBS_TYPES_LITERAL: { + rbs_types_literal_t *node = (rbs_types_literal_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("literal")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->literal)); // rbs_node + + return CLASS_NEW_INSTANCE( + RBS_Types_Literal, + 1, + &h + ); + } + case RBS_TYPES_OPTIONAL: { + rbs_types_optional_t *node = (rbs_types_optional_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + + return CLASS_NEW_INSTANCE( + RBS_Types_Optional, + 1, + &h + ); + } + case RBS_TYPES_PROC: { + rbs_types_proc_t *node = (rbs_types_proc_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->type)); // rbs_node + rb_hash_aset(h, ID2SYM(rb_intern("block")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->block)); // rbs_types_block + rb_hash_aset(h, ID2SYM(rb_intern("self_type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->self_type)); // rbs_node + + return CLASS_NEW_INSTANCE( + RBS_Types_Proc, + 1, + &h + ); + } + case RBS_TYPES_RECORD: { + rbs_types_record_t *node = (rbs_types_record_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("all_fields")), rbs_hash_to_ruby_hash(ctx, node->all_fields)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Record, + 1, + &h + ); + } + case RBS_TYPES_RECORD_FIELD_TYPE: { + rbs_types_record_field_type_t *record_fieldtype = (rbs_types_record_field_type_t *) instance; + + VALUE array = rb_ary_new(); + rb_ary_push(array, rbs_struct_to_ruby_value(ctx, record_fieldtype->type)); + rb_ary_push(array, record_fieldtype->required ? Qtrue : Qfalse); + return array; + } + case RBS_TYPES_TUPLE: { + rbs_types_tuple_t *node = (rbs_types_tuple_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("types")), rbs_node_list_to_ruby_array(ctx, node->types)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Tuple, + 1, + &h + ); + } + case RBS_TYPES_UNION: { + rbs_types_union_t *node = (rbs_types_union_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("types")), rbs_node_list_to_ruby_array(ctx, node->types)); + + return CLASS_NEW_INSTANCE( + RBS_Types_Union, + 1, + &h + ); + } + case RBS_TYPES_UNTYPED_FUNCTION: { + rbs_types_untyped_function_t *node = (rbs_types_untyped_function_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("return_type")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->return_type)); // rbs_node + + return CLASS_NEW_INSTANCE( + RBS_Types_UntypedFunction, + 1, + &h + ); + } + case RBS_TYPES_VARIABLE: { + rbs_types_variable_t *node = (rbs_types_variable_t *) instance; + + VALUE h = rb_hash_new(); + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + rb_hash_aset(h, ID2SYM(rb_intern("name")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node->name)); // rbs_ast_symbol + + return CLASS_NEW_INSTANCE( + RBS_Types_Variable, + 1, + &h + ); + } + case RBS_KEYWORD: { + rbs_constant_t *constant = rbs_constant_pool_id_to_constant(RBS_GLOBAL_CONSTANT_POOL, ((rbs_keyword_t *) instance)->constant_id); + assert(constant != NULL && "constant is NULL"); + assert(constant->start != NULL && "constant->start is NULL"); + + return ID2SYM(rb_intern2((const char *) constant->start, constant->length)); + } + case RBS_AST_SYMBOL: { + rbs_constant_t *constant = rbs_constant_pool_id_to_constant(ctx.constant_pool, ((rbs_keyword_t *) instance)->constant_id); + assert(constant != NULL && "constant is NULL"); + assert(constant->start != NULL && "constant->start is NULL"); + + return ID2SYM(rb_intern3((const char *) constant->start, constant->length, ctx.encoding)); + } + } + + rb_raise(rb_eRuntimeError, "Unknown node type: %d", instance->type); +} diff --git a/ext/rbs_extension/ast_translation.h b/ext/rbs_extension/ast_translation.h new file mode 100644 index 000000000..eac4f6238 --- /dev/null +++ b/ext/rbs_extension/ast_translation.h @@ -0,0 +1,37 @@ +/*----------------------------------------------------------------------------*/ +/* This file is generated by the templates/template.rb script and should not */ +/* be modified manually. */ +/* To change the template see */ +/* templates/ext/rbs_extension/ast_translation.h.erb */ +/*----------------------------------------------------------------------------*/ + +#ifndef RBS_EXTENSION_AST_TRANSLATION_H +#define RBS_EXTENSION_AST_TRANSLATION_H + +#include "compat.h" + +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_BEGIN +#include "ruby.h" +#include "ruby/encoding.h" +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_END + +#include "rbs/ast.h" +#include "rbs/location.h" + +/// A bag of values needed when copying RBS C structs into Ruby objects. +typedef struct rbs_translation_context { + rbs_constant_pool_t *constant_pool; + VALUE buffer; + rb_encoding *encoding; +} rbs_translation_context_t; + +rbs_translation_context_t rbs_translation_context_create(rbs_constant_pool_t *, VALUE buffer_string, rb_encoding *ruby_encoding); + +VALUE rbs_node_list_to_ruby_array(rbs_translation_context_t, rbs_node_list_t *list); +VALUE rbs_hash_to_ruby_hash(rbs_translation_context_t, rbs_hash_t *hash); +VALUE rbs_struct_to_ruby_value(rbs_translation_context_t, rbs_node_t *instance); + +extern VALUE EMPTY_ARRAY; +extern VALUE EMPTY_HASH; + +#endif diff --git a/ext/rbs_extension/class_constants.c b/ext/rbs_extension/class_constants.c new file mode 100644 index 000000000..5b61043fd --- /dev/null +++ b/ext/rbs_extension/class_constants.c @@ -0,0 +1,157 @@ +/*----------------------------------------------------------------------------*/ +/* This file is generated by the templates/template.rb script and should not */ +/* be modified manually. */ +/* To change the template see */ +/* templates/ext/rbs_extension/class_constants.c.erb */ +/*----------------------------------------------------------------------------*/ + +#include "rbs_extension.h" + +VALUE RBS_Parser; + +VALUE RBS; +VALUE RBS_AST; +VALUE RBS_AST_Declarations; +VALUE RBS_AST_Directives; +VALUE RBS_AST_Members; +VALUE RBS_Parser; +VALUE RBS_Types; +VALUE RBS_Types_Bases; + +VALUE RBS_AST_Annotation; +VALUE RBS_AST_Comment; +VALUE RBS_AST_Declarations_Class; +VALUE RBS_AST_Declarations_Class_Super; +VALUE RBS_AST_Declarations_ClassAlias; +VALUE RBS_AST_Declarations_Constant; +VALUE RBS_AST_Declarations_Global; +VALUE RBS_AST_Declarations_Interface; +VALUE RBS_AST_Declarations_Module; +VALUE RBS_AST_Declarations_Module_Self; +VALUE RBS_AST_Declarations_ModuleAlias; +VALUE RBS_AST_Declarations_TypeAlias; +VALUE RBS_AST_Directives_Use; +VALUE RBS_AST_Directives_Use_SingleClause; +VALUE RBS_AST_Directives_Use_WildcardClause; +VALUE RBS_AST_Members_Alias; +VALUE RBS_AST_Members_AttrAccessor; +VALUE RBS_AST_Members_AttrReader; +VALUE RBS_AST_Members_AttrWriter; +VALUE RBS_AST_Members_ClassInstanceVariable; +VALUE RBS_AST_Members_ClassVariable; +VALUE RBS_AST_Members_Extend; +VALUE RBS_AST_Members_Include; +VALUE RBS_AST_Members_InstanceVariable; +VALUE RBS_AST_Members_MethodDefinition; +VALUE RBS_AST_Members_MethodDefinition_Overload; +VALUE RBS_AST_Members_Prepend; +VALUE RBS_AST_Members_Private; +VALUE RBS_AST_Members_Public; +VALUE RBS_AST_TypeParam; +VALUE RBS_MethodType; +VALUE RBS_Namespace; +VALUE RBS_TypeName; +VALUE RBS_Types_Alias; +VALUE RBS_Types_Bases_Any; +VALUE RBS_Types_Bases_Bool; +VALUE RBS_Types_Bases_Bottom; +VALUE RBS_Types_Bases_Class; +VALUE RBS_Types_Bases_Instance; +VALUE RBS_Types_Bases_Nil; +VALUE RBS_Types_Bases_Self; +VALUE RBS_Types_Bases_Top; +VALUE RBS_Types_Bases_Void; +VALUE RBS_Types_Block; +VALUE RBS_Types_ClassInstance; +VALUE RBS_Types_ClassSingleton; +VALUE RBS_Types_Function; +VALUE RBS_Types_Function_Param; +VALUE RBS_Types_Interface; +VALUE RBS_Types_Intersection; +VALUE RBS_Types_Literal; +VALUE RBS_Types_Optional; +VALUE RBS_Types_Proc; +VALUE RBS_Types_Record; +VALUE RBS_Types_Tuple; +VALUE RBS_Types_Union; +VALUE RBS_Types_UntypedFunction; +VALUE RBS_Types_Variable; + +VALUE RBS_ParsingError; + +#define IMPORT_CONSTANT(var, parent, name) \ + { \ + var = rb_const_get(parent, rb_intern(name)); \ + rb_gc_register_mark_object(var); \ + } + +void rbs__init_constants(void) { + IMPORT_CONSTANT(RBS, rb_cObject, "RBS"); + IMPORT_CONSTANT(RBS_ParsingError, RBS, "ParsingError"); + + IMPORT_CONSTANT(RBS_AST, RBS, "AST"); + IMPORT_CONSTANT(RBS_AST_Declarations, RBS_AST, "Declarations"); + IMPORT_CONSTANT(RBS_AST_Directives, RBS_AST, "Directives"); + IMPORT_CONSTANT(RBS_AST_Members, RBS_AST, "Members"); + IMPORT_CONSTANT(RBS_Types, RBS, "Types"); + IMPORT_CONSTANT(RBS_Types_Bases, RBS_Types, "Bases"); + + IMPORT_CONSTANT(RBS_AST_Annotation, RBS_AST, "Annotation"); + IMPORT_CONSTANT(RBS_AST_Comment, RBS_AST, "Comment"); + IMPORT_CONSTANT(RBS_AST_Declarations_Class, RBS_AST_Declarations, "Class"); + IMPORT_CONSTANT(RBS_AST_Declarations_Class_Super, RBS_AST_Declarations_Class, "Super"); + IMPORT_CONSTANT(RBS_AST_Declarations_ClassAlias, RBS_AST_Declarations, "ClassAlias"); + IMPORT_CONSTANT(RBS_AST_Declarations_Constant, RBS_AST_Declarations, "Constant"); + IMPORT_CONSTANT(RBS_AST_Declarations_Global, RBS_AST_Declarations, "Global"); + IMPORT_CONSTANT(RBS_AST_Declarations_Interface, RBS_AST_Declarations, "Interface"); + IMPORT_CONSTANT(RBS_AST_Declarations_Module, RBS_AST_Declarations, "Module"); + IMPORT_CONSTANT(RBS_AST_Declarations_Module_Self, RBS_AST_Declarations_Module, "Self"); + IMPORT_CONSTANT(RBS_AST_Declarations_ModuleAlias, RBS_AST_Declarations, "ModuleAlias"); + IMPORT_CONSTANT(RBS_AST_Declarations_TypeAlias, RBS_AST_Declarations, "TypeAlias"); + IMPORT_CONSTANT(RBS_AST_Directives_Use, RBS_AST_Directives, "Use"); + IMPORT_CONSTANT(RBS_AST_Directives_Use_SingleClause, RBS_AST_Directives_Use, "SingleClause"); + IMPORT_CONSTANT(RBS_AST_Directives_Use_WildcardClause, RBS_AST_Directives_Use, "WildcardClause"); + IMPORT_CONSTANT(RBS_AST_Members_Alias, RBS_AST_Members, "Alias"); + IMPORT_CONSTANT(RBS_AST_Members_AttrAccessor, RBS_AST_Members, "AttrAccessor"); + IMPORT_CONSTANT(RBS_AST_Members_AttrReader, RBS_AST_Members, "AttrReader"); + IMPORT_CONSTANT(RBS_AST_Members_AttrWriter, RBS_AST_Members, "AttrWriter"); + IMPORT_CONSTANT(RBS_AST_Members_ClassInstanceVariable, RBS_AST_Members, "ClassInstanceVariable"); + IMPORT_CONSTANT(RBS_AST_Members_ClassVariable, RBS_AST_Members, "ClassVariable"); + IMPORT_CONSTANT(RBS_AST_Members_Extend, RBS_AST_Members, "Extend"); + IMPORT_CONSTANT(RBS_AST_Members_Include, RBS_AST_Members, "Include"); + IMPORT_CONSTANT(RBS_AST_Members_InstanceVariable, RBS_AST_Members, "InstanceVariable"); + IMPORT_CONSTANT(RBS_AST_Members_MethodDefinition, RBS_AST_Members, "MethodDefinition"); + IMPORT_CONSTANT(RBS_AST_Members_MethodDefinition_Overload, RBS_AST_Members_MethodDefinition, "Overload"); + IMPORT_CONSTANT(RBS_AST_Members_Prepend, RBS_AST_Members, "Prepend"); + IMPORT_CONSTANT(RBS_AST_Members_Private, RBS_AST_Members, "Private"); + IMPORT_CONSTANT(RBS_AST_Members_Public, RBS_AST_Members, "Public"); + IMPORT_CONSTANT(RBS_AST_TypeParam, RBS_AST, "TypeParam"); + IMPORT_CONSTANT(RBS_MethodType, RBS, "MethodType"); + IMPORT_CONSTANT(RBS_Namespace, RBS, "Namespace"); + IMPORT_CONSTANT(RBS_TypeName, RBS, "TypeName"); + IMPORT_CONSTANT(RBS_Types_Alias, RBS_Types, "Alias"); + IMPORT_CONSTANT(RBS_Types_Bases_Any, RBS_Types_Bases, "Any"); + IMPORT_CONSTANT(RBS_Types_Bases_Bool, RBS_Types_Bases, "Bool"); + IMPORT_CONSTANT(RBS_Types_Bases_Bottom, RBS_Types_Bases, "Bottom"); + IMPORT_CONSTANT(RBS_Types_Bases_Class, RBS_Types_Bases, "Class"); + IMPORT_CONSTANT(RBS_Types_Bases_Instance, RBS_Types_Bases, "Instance"); + IMPORT_CONSTANT(RBS_Types_Bases_Nil, RBS_Types_Bases, "Nil"); + IMPORT_CONSTANT(RBS_Types_Bases_Self, RBS_Types_Bases, "Self"); + IMPORT_CONSTANT(RBS_Types_Bases_Top, RBS_Types_Bases, "Top"); + IMPORT_CONSTANT(RBS_Types_Bases_Void, RBS_Types_Bases, "Void"); + IMPORT_CONSTANT(RBS_Types_Block, RBS_Types, "Block"); + IMPORT_CONSTANT(RBS_Types_ClassInstance, RBS_Types, "ClassInstance"); + IMPORT_CONSTANT(RBS_Types_ClassSingleton, RBS_Types, "ClassSingleton"); + IMPORT_CONSTANT(RBS_Types_Function, RBS_Types, "Function"); + IMPORT_CONSTANT(RBS_Types_Function_Param, RBS_Types_Function, "Param"); + IMPORT_CONSTANT(RBS_Types_Interface, RBS_Types, "Interface"); + IMPORT_CONSTANT(RBS_Types_Intersection, RBS_Types, "Intersection"); + IMPORT_CONSTANT(RBS_Types_Literal, RBS_Types, "Literal"); + IMPORT_CONSTANT(RBS_Types_Optional, RBS_Types, "Optional"); + IMPORT_CONSTANT(RBS_Types_Proc, RBS_Types, "Proc"); + IMPORT_CONSTANT(RBS_Types_Record, RBS_Types, "Record"); + IMPORT_CONSTANT(RBS_Types_Tuple, RBS_Types, "Tuple"); + IMPORT_CONSTANT(RBS_Types_Union, RBS_Types, "Union"); + IMPORT_CONSTANT(RBS_Types_UntypedFunction, RBS_Types, "UntypedFunction"); + IMPORT_CONSTANT(RBS_Types_Variable, RBS_Types, "Variable"); +} diff --git a/include/rbs/constants.h b/ext/rbs_extension/class_constants.h similarity index 94% rename from include/rbs/constants.h rename to ext/rbs_extension/class_constants.h index 995f5f923..c67553916 100644 --- a/include/rbs/constants.h +++ b/ext/rbs_extension/class_constants.h @@ -2,12 +2,18 @@ /* This file is generated by the templates/template.rb script and should not */ /* be modified manually. */ /* To change the template see */ -/* templates/include/rbs/constants.h.erb */ +/* templates/ext/rbs_extension/class_constants.h.erb */ /*----------------------------------------------------------------------------*/ #ifndef RBS__CONSTANTS_H #define RBS__CONSTANTS_H +#include "compat.h" + +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_BEGIN +#include "ruby.h" +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_END + extern VALUE RBS; extern VALUE RBS_AST; diff --git a/ext/rbs_extension/compat.h b/ext/rbs_extension/compat.h new file mode 100644 index 000000000..3af9320a5 --- /dev/null +++ b/ext/rbs_extension/compat.h @@ -0,0 +1,10 @@ +#ifdef __clang__ +#define SUPPRESS_RUBY_HEADER_DIAGNOSTICS_BEGIN \ + _Pragma("clang diagnostic push") \ + _Pragma("clang diagnostic ignored \"-Wc2x-extensions\"") +#define SUPPRESS_RUBY_HEADER_DIAGNOSTICS_END \ + _Pragma("clang diagnostic pop") +#else +#define SUPPRESS_RUBY_HEADER_DIAGNOSTICS_BEGIN +#define SUPPRESS_RUBY_HEADER_DIAGNOSTICS_END +#endif diff --git a/ext/rbs_extension/extconf.rb b/ext/rbs_extension/extconf.rb index 9ea9b0006..5a45307f2 100644 --- a/ext/rbs_extension/extconf.rb +++ b/ext/rbs_extension/extconf.rb @@ -11,5 +11,17 @@ $srcs = Dir.glob("#{root_dir}/src/**/*.c") + Dir.glob("#{root_dir}/ext/rbs_extension/*.c") -append_cflags ['-std=gnu99'] +append_cflags [ + '-std=gnu99', + '-Wimplicit-fallthrough', + '-Wunused-result', + '-Wc++-compat', +] + +append_cflags ['-O0', '-pg'] if ENV['DEBUG'] +if ENV["TEST_NO_C23"] + puts "Adding -Wc2x-extensions to CFLAGS" + $CFLAGS << " -Werror -Wc2x-extensions" +end + create_makefile 'rbs_extension' diff --git a/ext/rbs_extension/legacy_location.c b/ext/rbs_extension/legacy_location.c new file mode 100644 index 000000000..ba7f30684 --- /dev/null +++ b/ext/rbs_extension/legacy_location.c @@ -0,0 +1,317 @@ +#include "legacy_location.h" +#include "rbs_extension.h" + +#define RBS_LOC_REQUIRED_P(loc, i) ((loc)->children->required_p & (1 << (i))) +#define RBS_LOC_OPTIONAL_P(loc, i) (!RBS_LOC_REQUIRED_P((loc), (i))) +#define RBS_LOC_CHILDREN_SIZE(cap) (sizeof(rbs_loc_children) + sizeof(rbs_loc_entry) * ((cap) - 1)) +#define NULL_LOC_RANGE_P(rg) ((rg).start == -1) + +rbs_loc_range RBS_LOC_NULL_RANGE = { -1, -1 }; +VALUE RBS_Location; + +rbs_position_t rbs_loc_position(int char_pos) { + return (rbs_position_t) { 0, char_pos, -1, -1 }; +} + +rbs_position_t rbs_loc_position3(int char_pos, int line, int column) { + return (rbs_position_t) { 0, char_pos, line, column }; +} + +static rbs_loc_range rbs_new_loc_range(rbs_range_t rg) { + rbs_loc_range r = { rg.start.char_pos, rg.end.char_pos }; + return r; +} + +static void check_children_max(unsigned short n) { + size_t max = sizeof(rbs_loc_entry_bitmap) * 8; + if (n > max) { + rb_raise(rb_eRuntimeError, "Too many children added to location: %d", n); + } +} + +void rbs_loc_legacy_alloc_children(rbs_loc *loc, unsigned short cap) { + check_children_max(cap); + + size_t s = RBS_LOC_CHILDREN_SIZE(cap); + loc->children = malloc(s); + + *loc->children = (rbs_loc_children) { + .len = 0, + .required_p = 0, + .cap = cap, + .entries = { { 0 } }, + }; +} + +static void check_children_cap(rbs_loc *loc) { + if (loc->children == NULL) { + rbs_loc_legacy_alloc_children(loc, 1); + } else { + if (loc->children->len == loc->children->cap) { + check_children_max(loc->children->cap + 1); + size_t s = RBS_LOC_CHILDREN_SIZE(++loc->children->cap); + loc->children = realloc(loc->children, s); + } + } +} + +void rbs_loc_legacy_add_optional_child(rbs_loc *loc, rbs_constant_id_t name, rbs_range_t r) { + check_children_cap(loc); + + unsigned short i = loc->children->len++; + loc->children->entries[i] = (rbs_loc_entry) { + .name = name, + .rg = rbs_new_loc_range(r), + }; +} + +void rbs_loc_legacy_add_required_child(rbs_loc *loc, rbs_constant_id_t name, rbs_range_t r) { + rbs_loc_legacy_add_optional_child(loc, name, r); + + unsigned short last_index = loc->children->len - 1; + loc->children->required_p |= 1 << last_index; +} + +void rbs_loc_init(rbs_loc *loc, VALUE buffer, rbs_loc_range rg) { + *loc = (rbs_loc) { + .buffer = buffer, + .rg = rg, + .children = NULL, + }; +} + +void rbs_loc_free(rbs_loc *loc) { + free(loc->children); + ruby_xfree(loc); +} + +static void rbs_loc_mark(void *ptr) { + rbs_loc *loc = ptr; + rb_gc_mark(loc->buffer); +} + +static size_t rbs_loc_memsize(const void *ptr) { + const rbs_loc *loc = ptr; + if (loc->children == NULL) { + return sizeof(rbs_loc); + } else { + return sizeof(rbs_loc) + RBS_LOC_CHILDREN_SIZE(loc->children->cap); + } +} + +static rb_data_type_t location_type = { + "RBS::Location", + { rbs_loc_mark, (RUBY_DATA_FUNC) rbs_loc_free, rbs_loc_memsize }, + 0, + 0, + RUBY_TYPED_FREE_IMMEDIATELY +}; + +static VALUE location_s_allocate(VALUE klass) { + rbs_loc *loc; + VALUE obj = TypedData_Make_Struct(klass, rbs_loc, &location_type, loc); + + rbs_loc_init(loc, Qnil, RBS_LOC_NULL_RANGE); + + return obj; +} + +rbs_loc *rbs_check_location(VALUE obj) { + return rb_check_typeddata(obj, &location_type); +} + +static VALUE location_initialize(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos) { + rbs_loc *loc = rbs_check_location(self); + + int start = FIX2INT(start_pos); + int end = FIX2INT(end_pos); + + *loc = (rbs_loc) { + .buffer = buffer, + .rg = (rbs_loc_range) { start, end }, + .children = NULL, + }; + + return Qnil; +} + +static VALUE location_initialize_copy(VALUE self, VALUE other) { + rbs_loc *self_loc = rbs_check_location(self); + rbs_loc *other_loc = rbs_check_location(other); + + *self_loc = (rbs_loc) { + .buffer = other_loc->buffer, + .rg = other_loc->rg, + .children = NULL, + }; + + if (other_loc->children != NULL) { + rbs_loc_legacy_alloc_children(self_loc, other_loc->children->cap); + memcpy(self_loc->children, other_loc->children, RBS_LOC_CHILDREN_SIZE(other_loc->children->cap)); + } + + return Qnil; +} + +static VALUE location_buffer(VALUE self) { + rbs_loc *loc = rbs_check_location(self); + return loc->buffer; +} + +static VALUE location_start_pos(VALUE self) { + rbs_loc *loc = rbs_check_location(self); + return INT2FIX(loc->rg.start); +} + +static VALUE location_end_pos(VALUE self) { + rbs_loc *loc = rbs_check_location(self); + return INT2FIX(loc->rg.end); +} + +static rbs_constant_id_t rbs_constant_pool_insert_ruby_symbol(VALUE symbol) { + VALUE name = rb_sym2str(symbol); + + // Constants inserted here will never be freed, but that's acceptable because: + // 1. Most symbols passed into here will be the ones already inserted into the constant pool by `parser.c`. + // 2. Methods like `add_required_child` and `add_optional_child` will usually only get called with a few different symbols. + return rbs_constant_pool_insert_constant(RBS_GLOBAL_CONSTANT_POOL, (const uint8_t *) RSTRING_PTR(name), RSTRING_LEN(name)); +} + +static VALUE location_add_required_child(VALUE self, VALUE name, VALUE start, VALUE end) { + rbs_loc *loc = rbs_check_location(self); + + rbs_range_t rg; + rg.start = rbs_loc_position(FIX2INT(start)); + rg.end = rbs_loc_position(FIX2INT(end)); + + rbs_loc_legacy_add_required_child(loc, rbs_constant_pool_insert_ruby_symbol(name), rg); + + return Qnil; +} + +static VALUE location_add_optional_child(VALUE self, VALUE name, VALUE start, VALUE end) { + rbs_loc *loc = rbs_check_location(self); + + rbs_range_t rg; + rg.start = rbs_loc_position(FIX2INT(start)); + rg.end = rbs_loc_position(FIX2INT(end)); + + rbs_loc_legacy_add_optional_child(loc, rbs_constant_pool_insert_ruby_symbol(name), rg); + + return Qnil; +} + +static VALUE location_add_optional_no_child(VALUE self, VALUE name) { + rbs_loc *loc = rbs_check_location(self); + + rbs_loc_legacy_add_optional_child(loc, rbs_constant_pool_insert_ruby_symbol(name), NULL_RANGE); + + return Qnil; +} + +VALUE rbs_new_location(VALUE buffer, rbs_range_t rg) { + rbs_loc *loc; + VALUE obj = TypedData_Make_Struct(RBS_Location, rbs_loc, &location_type, loc); + + rbs_loc_init(loc, buffer, rbs_new_loc_range(rg)); + + return obj; +} + +static VALUE rbs_new_location_from_loc_range(VALUE buffer, rbs_loc_range rg) { + rbs_loc *loc; + VALUE obj = TypedData_Make_Struct(RBS_Location, rbs_loc, &location_type, loc); + + rbs_loc_init(loc, buffer, rg); + + return obj; +} + +static rbs_constant_id_t rbs_constant_pool_find_ruby_symbol(VALUE symbol) { + VALUE name = rb_sym2str(symbol); + + return rbs_constant_pool_find(RBS_GLOBAL_CONSTANT_POOL, (const uint8_t *) RSTRING_PTR(name), RSTRING_LEN(name)); +} + +static VALUE location_aref(VALUE self, VALUE name) { + rbs_loc *loc = rbs_check_location(self); + + rbs_constant_id_t id = rbs_constant_pool_find_ruby_symbol(name); + + if (loc->children != NULL && id != RBS_CONSTANT_ID_UNSET) { + for (unsigned short i = 0; i < loc->children->len; i++) { + if (loc->children->entries[i].name == id) { + rbs_loc_range result = loc->children->entries[i].rg; + + if (RBS_LOC_OPTIONAL_P(loc, i) && NULL_LOC_RANGE_P(result)) { + return Qnil; + } else { + return rbs_new_location_from_loc_range(loc->buffer, result); + } + } + } + } + + VALUE string = rb_funcall(name, rb_intern("to_s"), 0); + rb_raise(rb_eRuntimeError, "Unknown child name given: %s", RSTRING_PTR(string)); +} + +static VALUE rbs_constant_to_ruby_symbol(rbs_constant_t *constant) { + return ID2SYM(rb_intern2((const char *) constant->start, constant->length)); +} + +static VALUE location_optional_keys(VALUE self) { + VALUE keys = rb_ary_new(); + + rbs_loc *loc = rbs_check_location(self); + rbs_loc_children *children = loc->children; + if (children == NULL) { + return keys; + } + + for (unsigned short i = 0; i < children->len; i++) { + if (RBS_LOC_OPTIONAL_P(loc, i)) { + rbs_constant_t *key_id = rbs_constant_pool_id_to_constant(RBS_GLOBAL_CONSTANT_POOL, children->entries[i].name); + VALUE key_sym = rbs_constant_to_ruby_symbol(key_id); + rb_ary_push(keys, key_sym); + } + } + + return keys; +} + +static VALUE location_required_keys(VALUE self) { + VALUE keys = rb_ary_new(); + + rbs_loc *loc = rbs_check_location(self); + rbs_loc_children *children = loc->children; + if (children == NULL) { + return keys; + } + + for (unsigned short i = 0; i < children->len; i++) { + if (RBS_LOC_REQUIRED_P(loc, i)) { + rbs_constant_t *key_id = rbs_constant_pool_id_to_constant(RBS_GLOBAL_CONSTANT_POOL, children->entries[i].name); + VALUE key_sym = rbs_constant_to_ruby_symbol(key_id); + rb_ary_push(keys, key_sym); + } + } + + return keys; +} + +void rbs__init_location(void) { + RBS_Location = rb_define_class_under(RBS, "Location", rb_cObject); + rb_define_alloc_func(RBS_Location, location_s_allocate); + rb_define_private_method(RBS_Location, "initialize", location_initialize, 3); + rb_define_private_method(RBS_Location, "initialize_copy", location_initialize_copy, 1); + rb_define_method(RBS_Location, "buffer", location_buffer, 0); + rb_define_method(RBS_Location, "start_pos", location_start_pos, 0); + rb_define_method(RBS_Location, "end_pos", location_end_pos, 0); + rb_define_method(RBS_Location, "_add_required_child", location_add_required_child, 3); + rb_define_method(RBS_Location, "_add_optional_child", location_add_optional_child, 3); + rb_define_method(RBS_Location, "_add_optional_no_child", location_add_optional_no_child, 1); + rb_define_method(RBS_Location, "_optional_keys", location_optional_keys, 0); + rb_define_method(RBS_Location, "_required_keys", location_required_keys, 0); + rb_define_method(RBS_Location, "[]", location_aref, 1); +} diff --git a/ext/rbs_extension/legacy_location.h b/ext/rbs_extension/legacy_location.h new file mode 100644 index 000000000..f8dbab21a --- /dev/null +++ b/ext/rbs_extension/legacy_location.h @@ -0,0 +1,45 @@ +#ifndef RBS_LOCATION_H +#define RBS_LOCATION_H + +#include "compat.h" + +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_BEGIN +#include "ruby.h" +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_END + +#include "rbs.h" + +/** + * RBS::Location class + * */ +extern VALUE RBS_Location; + +typedef struct { + VALUE buffer; + rbs_loc_range rg; + rbs_loc_children *children; // NULL when no children is allocated +} rbs_loc; + +/** + * Returns new RBS::Location object, with given buffer and range. + * */ +VALUE rbs_new_location(VALUE buffer, rbs_range_t rg); + +/** + * Return rbs_loc associated with the RBS::Location object. + * */ +rbs_loc *rbs_check_location(VALUE location); + +/** + * Allocate memory for child locations. + * + * Do not call twice for the same location. + * */ +void rbs_loc_legacy_alloc_children(rbs_loc *loc, unsigned short cap); + +/** + * Define RBS::Location class. + * */ +void rbs__init_location(); + +#endif diff --git a/ext/rbs_extension/lexer.c b/ext/rbs_extension/lexer.c deleted file mode 100644 index 73984cf84..000000000 --- a/ext/rbs_extension/lexer.c +++ /dev/null @@ -1,2728 +0,0 @@ -/* Generated by re2c 3.1 */ -#line 1 "ext/rbs_extension/lexer.re" -#include "rbs_extension.h" - -token rbsparser_next_token(lexstate *state) { - lexstate backup; - - backup = *state; - - -#line 12 "ext/rbs_extension/lexer.c" -{ - unsigned int yych; - unsigned int yyaccept = 0; - yych = peek(state); - switch (yych) { - case 0x00000000: goto yy1; - case '\t': - case ' ': goto yy4; - case '\n': - case '\r': goto yy6; - case '!': goto yy7; - case '"': goto yy9; - case '#': goto yy10; - case '$': goto yy12; - case '%': goto yy13; - case '&': goto yy14; - case '\'': goto yy15; - case '(': goto yy16; - case ')': goto yy17; - case '*': goto yy18; - case '+': goto yy19; - case ',': goto yy20; - case '-': goto yy21; - case '.': goto yy22; - case '/': - case '~': goto yy24; - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': goto yy25; - case ':': goto yy27; - case '<': goto yy29; - case '=': goto yy31; - case '>': goto yy33; - case '?': goto yy34; - case '@': goto yy35; - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - case 'G': - case 'H': - case 'I': - case 'J': - case 'K': - case 'L': - case 'M': - case 'N': - case 'O': - case 'P': - case 'Q': - case 'R': - case 'S': - case 'T': - case 'U': - case 'V': - case 'W': - case 'X': - case 'Y': - case 'Z': goto yy36; - case '[': goto yy38; - case ']': goto yy39; - case '^': goto yy40; - case '_': goto yy41; - case '`': goto yy43; - case 'a': goto yy45; - case 'b': goto yy47; - case 'c': goto yy48; - case 'd': goto yy49; - case 'e': goto yy50; - case 'f': goto yy51; - case 'g': - case 'h': - case 'j': - case 'k': - case 'l': - case 'q': - case 'r': - case 'w': - case 'x': - case 'y': - case 'z': goto yy52; - case 'i': goto yy54; - case 'm': goto yy55; - case 'n': goto yy56; - case 'o': goto yy57; - case 'p': goto yy58; - case 's': goto yy59; - case 't': goto yy60; - case 'u': goto yy61; - case 'v': goto yy62; - case '{': goto yy63; - case '|': goto yy64; - case '}': goto yy65; - default: goto yy2; - } -yy1: - rbs_skip(state); -#line 144 "ext/rbs_extension/lexer.re" - { return next_eof_token(state); } -#line 121 "ext/rbs_extension/lexer.c" -yy2: - rbs_skip(state); -yy3: -#line 145 "ext/rbs_extension/lexer.re" - { return next_token(state, ErrorToken); } -#line 127 "ext/rbs_extension/lexer.c" -yy4: - rbs_skip(state); - yych = peek(state); - if (yych == '\t') goto yy4; - if (yych == ' ') goto yy4; -yy5: -#line 143 "ext/rbs_extension/lexer.re" - { return next_token(state, tTRIVIA); } -#line 136 "ext/rbs_extension/lexer.c" -yy6: - rbs_skip(state); - goto yy5; -yy7: - rbs_skip(state); - yych = peek(state); - if (yych == '=') goto yy24; - if (yych == '~') goto yy24; -yy8: -#line 48 "ext/rbs_extension/lexer.re" - { return next_token(state, tOPERATOR); } -#line 148 "ext/rbs_extension/lexer.c" -yy9: - yyaccept = 0; - rbs_skip(state); - backup = *state; - yych = peek(state); - if (yych <= 0x00000000) goto yy3; - goto yy67; -yy10: - rbs_skip(state); - yych = peek(state); - if (yych <= 0x00000000) goto yy11; - if (yych != '\n') goto yy10; -yy11: -#line 59 "ext/rbs_extension/lexer.re" - { - return next_token( - state, - state->first_token_of_line ? tLINECOMMENT : tCOMMENT - ); - } -#line 169 "ext/rbs_extension/lexer.c" -yy12: - rbs_skip(state); - yych = peek(state); - if (yych <= ')') { - if (yych <= 0x0000001F) { - if (yych <= '\n') { - if (yych <= 0x00000000) goto yy3; - if (yych <= 0x00000008) goto yy71; - goto yy3; - } else { - if (yych == '\r') goto yy3; - goto yy71; - } - } else { - if (yych <= '#') { - if (yych <= ' ') goto yy3; - if (yych <= '"') goto yy73; - goto yy71; - } else { - if (yych == '%') goto yy3; - if (yych <= '\'') goto yy73; - goto yy3; - } - } - } else { - if (yych <= 'Z') { - if (yych <= '/') { - if (yych == '-') goto yy71; - goto yy73; - } else { - if (yych <= '9') goto yy71; - if (yych <= '>') goto yy73; - goto yy71; - } - } else { - if (yych <= '^') { - if (yych == '\\') goto yy73; - goto yy3; - } else { - if (yych <= 'z') goto yy71; - if (yych <= '}') goto yy3; - if (yych <= '~') goto yy73; - goto yy71; - } - } - } -yy13: - yyaccept = 1; - rbs_skip(state); - backup = *state; - yych = peek(state); - if (yych == 'a') goto yy74; - goto yy8; -yy14: - rbs_skip(state); -#line 33 "ext/rbs_extension/lexer.re" - { return next_token(state, pAMP); } -#line 227 "ext/rbs_extension/lexer.c" -yy15: - yyaccept = 0; - rbs_skip(state); - backup = *state; - yych = peek(state); - if (yych <= 0x00000000) goto yy3; - goto yy76; -yy16: - rbs_skip(state); -#line 24 "ext/rbs_extension/lexer.re" - { return next_token(state, pLPAREN); } -#line 239 "ext/rbs_extension/lexer.c" -yy17: - rbs_skip(state); -#line 25 "ext/rbs_extension/lexer.re" - { return next_token(state, pRPAREN); } -#line 244 "ext/rbs_extension/lexer.c" -yy18: - rbs_skip(state); - yych = peek(state); - if (yych == '*') goto yy80; -#line 35 "ext/rbs_extension/lexer.re" - { return next_token(state, pSTAR); } -#line 251 "ext/rbs_extension/lexer.c" -yy19: - rbs_skip(state); - yych = peek(state); - if (yych <= '/') goto yy8; - if (yych <= '9') goto yy25; - if (yych == '@') goto yy24; - goto yy8; -yy20: - rbs_skip(state); -#line 30 "ext/rbs_extension/lexer.re" - { return next_token(state, pCOMMA); } -#line 263 "ext/rbs_extension/lexer.c" -yy21: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') goto yy8; - if (yych <= '9') goto yy25; - goto yy8; - } else { - if (yych <= '>') goto yy81; - if (yych == '@') goto yy24; - goto yy8; - } -yy22: - yyaccept = 2; - rbs_skip(state); - backup = *state; - yych = peek(state); - if (yych == '.') goto yy82; -yy23: -#line 37 "ext/rbs_extension/lexer.re" - { return next_token(state, pDOT); } -#line 285 "ext/rbs_extension/lexer.c" -yy24: - rbs_skip(state); - goto yy8; -yy25: - rbs_skip(state); - yych = peek(state); - if (yych <= '/') goto yy26; - if (yych <= '9') goto yy25; - if (yych == '_') goto yy25; -yy26: -#line 51 "ext/rbs_extension/lexer.re" - { return next_token(state, tINTEGER); } -#line 298 "ext/rbs_extension/lexer.c" -yy27: - yyaccept = 3; - rbs_skip(state); - backup = *state; - yych = peek(state); - switch (yych) { - case '!': goto yy83; - case '"': goto yy85; - case '$': goto yy86; - case '%': - case '&': - case '/': - case '^': - case '`': - case '|': - case '~': goto yy87; - case '\'': goto yy88; - case '*': goto yy89; - case '+': - case '-': goto yy90; - case ':': goto yy91; - case '<': goto yy92; - case '=': goto yy93; - case '>': goto yy94; - case '@': goto yy95; - case 'A': - case 'B': - case 'C': - case 'D': - case 'E': - case 'F': - case 'G': - case 'H': - case 'I': - case 'J': - case 'K': - case 'L': - case 'M': - case 'N': - case 'O': - case 'P': - case 'Q': - case 'R': - case 'S': - case 'T': - case 'U': - case 'V': - case 'W': - case 'X': - case 'Y': - case 'Z': - case '_': - case 'a': - case 'b': - case 'c': - case 'd': - case 'e': - case 'f': - case 'g': - case 'h': - case 'i': - case 'j': - case 'k': - case 'l': - case 'm': - case 'n': - case 'o': - case 'p': - case 'q': - case 'r': - case 's': - case 't': - case 'u': - case 'v': - case 'w': - case 'x': - case 'y': - case 'z': goto yy96; - case '[': goto yy98; - default: goto yy28; - } -yy28: -#line 44 "ext/rbs_extension/lexer.re" - { return next_token(state, pCOLON); } -#line 383 "ext/rbs_extension/lexer.c" -yy29: - rbs_skip(state); - yych = peek(state); - if (yych <= ';') goto yy30; - if (yych <= '<') goto yy24; - if (yych <= '=') goto yy99; -yy30: -#line 46 "ext/rbs_extension/lexer.re" - { return next_token(state, pLT); } -#line 393 "ext/rbs_extension/lexer.c" -yy31: - rbs_skip(state); - yych = peek(state); - if (yych <= '>') { - if (yych <= '<') goto yy32; - if (yych <= '=') goto yy100; - goto yy101; - } else { - if (yych == '~') goto yy24; - } -yy32: -#line 43 "ext/rbs_extension/lexer.re" - { return next_token(state, pEQ); } -#line 407 "ext/rbs_extension/lexer.c" -yy33: - rbs_skip(state); - yych = peek(state); - if (yych <= '<') goto yy8; - if (yych <= '>') goto yy24; - goto yy8; -yy34: - rbs_skip(state); -#line 34 "ext/rbs_extension/lexer.re" - { return next_token(state, pQUESTION); } -#line 418 "ext/rbs_extension/lexer.c" -yy35: - yyaccept = 0; - rbs_skip(state); - backup = *state; - yych = peek(state); - if (yych <= '^') { - if (yych <= '?') goto yy3; - if (yych <= '@') goto yy102; - if (yych <= 'Z') goto yy103; - goto yy3; - } else { - if (yych == '`') goto yy3; - if (yych <= 'z') goto yy103; - goto yy3; - } -yy36: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy36; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy37; - if (yych <= 'Z') goto yy36; - } else { - if (yych == '`') goto yy37; - if (yych <= 'z') goto yy36; - } - } -yy37: -#line 129 "ext/rbs_extension/lexer.re" - { return next_token(state, tUIDENT); } -#line 456 "ext/rbs_extension/lexer.c" -yy38: - rbs_skip(state); - yych = peek(state); - if (yych == ']') goto yy107; -#line 26 "ext/rbs_extension/lexer.re" - { return next_token(state, pLBRACKET); } -#line 463 "ext/rbs_extension/lexer.c" -yy39: - rbs_skip(state); -#line 27 "ext/rbs_extension/lexer.re" - { return next_token(state, pRBRACKET); } -#line 468 "ext/rbs_extension/lexer.c" -yy40: - rbs_skip(state); -#line 32 "ext/rbs_extension/lexer.re" - { return next_token(state, pHAT); } -#line 473 "ext/rbs_extension/lexer.c" -yy41: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy108; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy42; - if (yych <= 'Z') goto yy111; - } else { - if (yych <= '_') goto yy113; - if (yych <= '`') goto yy42; - if (yych <= 'z') goto yy108; - } - } -yy42: -#line 132 "ext/rbs_extension/lexer.re" - { return next_token(state, tULLIDENT); } -#line 497 "ext/rbs_extension/lexer.c" -yy43: - yyaccept = 4; - rbs_skip(state); - backup = *state; - yych = peek(state); - if (yych <= ' ') { - if (yych <= 0x00000000) goto yy44; - if (yych <= 0x0000001F) goto yy114; - } else { - if (yych != ':') goto yy114; - } -yy44: -#line 39 "ext/rbs_extension/lexer.re" - { return next_token(state, tOPERATOR); } -#line 512 "ext/rbs_extension/lexer.c" -yy45: - rbs_skip(state); - yych = peek(state); - if (yych <= 'r') { - if (yych == 'l') goto yy115; - goto yy53; - } else { - if (yych <= 's') goto yy116; - if (yych <= 't') goto yy118; - goto yy53; - } -yy46: -#line 128 "ext/rbs_extension/lexer.re" - { return next_token(state, tLIDENT); } -#line 527 "ext/rbs_extension/lexer.c" -yy47: - rbs_skip(state); - yych = peek(state); - if (yych == 'o') goto yy119; - goto yy53; -yy48: - rbs_skip(state); - yych = peek(state); - if (yych == 'l') goto yy120; - goto yy53; -yy49: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy121; - goto yy53; -yy50: - rbs_skip(state); - yych = peek(state); - if (yych == 'n') goto yy122; - if (yych == 'x') goto yy123; - goto yy53; -yy51: - rbs_skip(state); - yych = peek(state); - if (yych == 'a') goto yy124; - goto yy53; -yy52: - rbs_skip(state); - yych = peek(state); -yy53: - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - goto yy46; - } else { - if (yych <= '9') goto yy52; - if (yych <= '<') goto yy46; - goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy46; - if (yych <= 'Z') goto yy52; - goto yy46; - } else { - if (yych == '`') goto yy46; - if (yych <= 'z') goto yy52; - goto yy46; - } - } -yy54: - rbs_skip(state); - yych = peek(state); - if (yych == 'n') goto yy125; - goto yy53; -yy55: - rbs_skip(state); - yych = peek(state); - if (yych == 'o') goto yy127; - goto yy53; -yy56: - rbs_skip(state); - yych = peek(state); - if (yych == 'i') goto yy128; - goto yy53; -yy57: - rbs_skip(state); - yych = peek(state); - if (yych == 'u') goto yy129; - goto yy53; -yy58: - rbs_skip(state); - yych = peek(state); - if (yych == 'r') goto yy130; - if (yych == 'u') goto yy131; - goto yy53; -yy59: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy132; - if (yych == 'i') goto yy133; - goto yy53; -yy60: - rbs_skip(state); - yych = peek(state); - if (yych <= 'q') { - if (yych == 'o') goto yy134; - goto yy53; - } else { - if (yych <= 'r') goto yy135; - if (yych == 'y') goto yy136; - goto yy53; - } -yy61: - rbs_skip(state); - yych = peek(state); - if (yych == 'n') goto yy137; - if (yych == 's') goto yy138; - goto yy53; -yy62: - rbs_skip(state); - yych = peek(state); - if (yych == 'o') goto yy139; - goto yy53; -yy63: - rbs_skip(state); -#line 28 "ext/rbs_extension/lexer.re" - { return next_token(state, pLBRACE); } -#line 636 "ext/rbs_extension/lexer.c" -yy64: - rbs_skip(state); -#line 31 "ext/rbs_extension/lexer.re" - { return next_token(state, pBAR); } -#line 641 "ext/rbs_extension/lexer.c" -yy65: - rbs_skip(state); -#line 29 "ext/rbs_extension/lexer.re" - { return next_token(state, pRBRACE); } -#line 646 "ext/rbs_extension/lexer.c" -yy66: - rbs_skip(state); - yych = peek(state); -yy67: - if (yych <= '"') { - if (yych <= 0x00000000) goto yy68; - if (yych <= '!') goto yy66; - goto yy69; - } else { - if (yych == '\\') goto yy70; - goto yy66; - } -yy68: - *state = backup; - if (yyaccept <= 3) { - if (yyaccept <= 1) { - if (yyaccept == 0) { - goto yy3; - } else { - goto yy8; - } - } else { - if (yyaccept == 2) { - goto yy23; - } else { - goto yy28; - } - } - } else { - if (yyaccept <= 5) { - if (yyaccept == 4) { - goto yy44; - } else { - goto yy78; - } - } else { - goto yy155; - } - } -yy69: - rbs_skip(state); -#line 106 "ext/rbs_extension/lexer.re" - { return next_token(state, tDQSTRING); } -#line 690 "ext/rbs_extension/lexer.c" -yy70: - rbs_skip(state); - yych = peek(state); - if (yych == 'u') goto yy140; - if (yych == 'x') goto yy141; - goto yy66; -yy71: - rbs_skip(state); - yych = peek(state); - if (yych <= ',') { - if (yych <= '\f') { - if (yych <= 0x00000000) goto yy72; - if (yych <= 0x00000008) goto yy71; - if (yych >= '\v') goto yy71; - } else { - if (yych <= 0x0000001F) { - if (yych >= 0x0000000E) goto yy71; - } else { - if (yych == '#') goto yy71; - } - } - } else { - if (yych <= '>') { - if (yych <= '-') goto yy71; - if (yych <= '/') goto yy72; - if (yych <= '9') goto yy71; - } else { - if (yych <= '^') { - if (yych <= 'Z') goto yy71; - } else { - if (yych <= 'z') goto yy71; - if (yych >= 0x0000007F) goto yy71; - } - } - } -yy72: -#line 139 "ext/rbs_extension/lexer.re" - { return next_token(state, tGIDENT); } -#line 729 "ext/rbs_extension/lexer.c" -yy73: - rbs_skip(state); - goto yy72; -yy74: - rbs_skip(state); - yych = peek(state); - if (yych <= 'Z') { - if (yych <= '(') { - if (yych <= '\'') goto yy68; - goto yy142; - } else { - if (yych == '<') goto yy143; - goto yy68; - } - } else { - if (yych <= 'z') { - if (yych <= '[') goto yy144; - goto yy68; - } else { - if (yych <= '{') goto yy145; - if (yych <= '|') goto yy146; - goto yy68; - } - } -yy75: - rbs_skip(state); - yych = peek(state); -yy76: - if (yych <= '\'') { - if (yych <= 0x00000000) goto yy68; - if (yych <= '&') goto yy75; - } else { - if (yych == '\\') goto yy79; - goto yy75; - } -yy77: - rbs_skip(state); -yy78: -#line 107 "ext/rbs_extension/lexer.re" - { return next_token(state, tSQSTRING); } -#line 770 "ext/rbs_extension/lexer.c" -yy79: - rbs_skip(state); - yych = peek(state); - if (yych <= '\'') { - if (yych <= 0x00000000) goto yy68; - if (yych <= '&') goto yy75; - goto yy147; - } else { - if (yych == '\\') goto yy79; - goto yy75; - } -yy80: - rbs_skip(state); -#line 36 "ext/rbs_extension/lexer.re" - { return next_token(state, pSTAR2); } -#line 786 "ext/rbs_extension/lexer.c" -yy81: - rbs_skip(state); -#line 41 "ext/rbs_extension/lexer.re" - { return next_token(state, pARROW); } -#line 791 "ext/rbs_extension/lexer.c" -yy82: - rbs_skip(state); - yych = peek(state); - if (yych == '.') goto yy148; - goto yy68; -yy83: - rbs_skip(state); - yych = peek(state); - if (yych == '=') goto yy87; - if (yych == '~') goto yy87; -yy84: -#line 126 "ext/rbs_extension/lexer.re" - { return next_token(state, tSYMBOL); } -#line 805 "ext/rbs_extension/lexer.c" -yy85: - rbs_skip(state); - yych = peek(state); - if (yych <= '"') { - if (yych <= 0x00000000) goto yy68; - if (yych <= '!') goto yy85; - goto yy149; - } else { - if (yych == '\\') goto yy150; - goto yy85; - } -yy86: - rbs_skip(state); - yych = peek(state); - if (yych <= ')') { - if (yych <= 0x0000001F) { - if (yych <= '\n') { - if (yych <= 0x00000000) goto yy68; - if (yych <= 0x00000008) goto yy151; - goto yy68; - } else { - if (yych == '\r') goto yy68; - goto yy151; - } - } else { - if (yych <= '#') { - if (yych <= ' ') goto yy68; - if (yych <= '"') goto yy153; - goto yy151; - } else { - if (yych == '%') goto yy68; - if (yych <= '\'') goto yy153; - goto yy68; - } - } - } else { - if (yych <= 'Z') { - if (yych <= '/') { - if (yych == '-') goto yy151; - goto yy153; - } else { - if (yych <= '9') goto yy151; - if (yych <= '>') goto yy153; - goto yy151; - } - } else { - if (yych <= '^') { - if (yych == '\\') goto yy153; - goto yy68; - } else { - if (yych <= 'z') goto yy151; - if (yych <= '}') goto yy68; - if (yych <= '~') goto yy153; - goto yy151; - } - } - } -yy87: - rbs_skip(state); - goto yy84; -yy88: - rbs_skip(state); - yych = peek(state); - if (yych <= '\'') { - if (yych <= 0x00000000) goto yy68; - if (yych <= '&') goto yy88; - goto yy154; - } else { - if (yych == '\\') goto yy156; - goto yy88; - } -yy89: - rbs_skip(state); - yych = peek(state); - if (yych == '*') goto yy87; - goto yy84; -yy90: - rbs_skip(state); - yych = peek(state); - if (yych == '@') goto yy87; - goto yy84; -yy91: - rbs_skip(state); -#line 45 "ext/rbs_extension/lexer.re" - { return next_token(state, pCOLON2); } -#line 891 "ext/rbs_extension/lexer.c" -yy92: - rbs_skip(state); - yych = peek(state); - if (yych <= ';') goto yy84; - if (yych <= '<') goto yy87; - if (yych <= '=') goto yy157; - goto yy84; -yy93: - rbs_skip(state); - yych = peek(state); - if (yych == '=') goto yy158; - if (yych == '~') goto yy87; - goto yy68; -yy94: - rbs_skip(state); - yych = peek(state); - if (yych <= '<') goto yy84; - if (yych <= '>') goto yy87; - goto yy84; -yy95: - rbs_skip(state); - yych = peek(state); - if (yych <= '^') { - if (yych <= '?') goto yy68; - if (yych <= '@') goto yy159; - if (yych <= 'Z') goto yy160; - goto yy68; - } else { - if (yych == '`') goto yy68; - if (yych <= 'z') goto yy160; - goto yy68; - } -yy96: - rbs_skip(state); - yych = peek(state); - if (yych <= '>') { - if (yych <= '/') { - if (yych == '!') goto yy162; - } else { - if (yych <= '9') goto yy96; - if (yych == '=') goto yy162; - } - } else { - if (yych <= '^') { - if (yych <= '?') goto yy162; - if (yych <= '@') goto yy97; - if (yych <= 'Z') goto yy96; - } else { - if (yych == '`') goto yy97; - if (yych <= 'z') goto yy96; - } - } -yy97: -#line 122 "ext/rbs_extension/lexer.re" - { return next_token(state, tSYMBOL); } -#line 947 "ext/rbs_extension/lexer.c" -yy98: - rbs_skip(state); - yych = peek(state); - if (yych == ']') goto yy158; - goto yy68; -yy99: - rbs_skip(state); - yych = peek(state); - if (yych == '>') goto yy24; - goto yy8; -yy100: - rbs_skip(state); - yych = peek(state); - if (yych == '=') goto yy24; - goto yy8; -yy101: - rbs_skip(state); -#line 42 "ext/rbs_extension/lexer.re" - { return next_token(state, pFATARROW); } -#line 967 "ext/rbs_extension/lexer.c" -yy102: - rbs_skip(state); - yych = peek(state); - if (yych <= '^') { - if (yych <= '@') goto yy68; - if (yych <= 'Z') goto yy163; - goto yy68; - } else { - if (yych == '`') goto yy68; - if (yych <= 'z') goto yy163; - goto yy68; - } -yy103: - rbs_skip(state); - yych = peek(state); - if (yych <= 'Z') { - if (yych <= '/') goto yy104; - if (yych <= '9') goto yy103; - if (yych >= 'A') goto yy103; - } else { - if (yych <= '_') { - if (yych >= '_') goto yy103; - } else { - if (yych <= '`') goto yy104; - if (yych <= 'z') goto yy103; - } - } -yy104: -#line 136 "ext/rbs_extension/lexer.re" - { return next_token(state, tAIDENT); } -#line 998 "ext/rbs_extension/lexer.c" -yy105: - rbs_skip(state); -#line 133 "ext/rbs_extension/lexer.re" - { return next_token(state, tBANGIDENT); } -#line 1003 "ext/rbs_extension/lexer.c" -yy106: - rbs_skip(state); -#line 134 "ext/rbs_extension/lexer.re" - { return next_token(state, tEQIDENT); } -#line 1008 "ext/rbs_extension/lexer.c" -yy107: - rbs_skip(state); - yych = peek(state); - if (yych == '=') goto yy24; -#line 47 "ext/rbs_extension/lexer.re" - { return next_token(state, pAREF_OPR); } -#line 1015 "ext/rbs_extension/lexer.c" -yy108: - rbs_skip(state); - yych = peek(state); -yy109: - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy108; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy110; - if (yych <= 'Z') goto yy108; - } else { - if (yych == '`') goto yy110; - if (yych <= 'z') goto yy108; - } - } -yy110: -#line 130 "ext/rbs_extension/lexer.re" - { return next_token(state, tULLIDENT); } -#line 1039 "ext/rbs_extension/lexer.c" -yy111: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy111; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy112; - if (yych <= 'Z') goto yy111; - } else { - if (yych == '`') goto yy112; - if (yych <= 'z') goto yy111; - } - } -yy112: -#line 131 "ext/rbs_extension/lexer.re" - { return next_token(state, tULIDENT); } -#line 1062 "ext/rbs_extension/lexer.c" -yy113: - rbs_skip(state); - yych = peek(state); - if (yych == 't') goto yy165; - goto yy109; -yy114: - rbs_skip(state); - yych = peek(state); - if (yych <= 0x00000000) goto yy68; - if (yych == '`') goto yy166; - goto yy114; -yy115: - rbs_skip(state); - yych = peek(state); - if (yych == 'i') goto yy167; - goto yy53; -yy116: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy117; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy117; - if (yych <= 'z') goto yy52; - } - } -yy117: -#line 96 "ext/rbs_extension/lexer.re" - { return next_token(state, kAS); } -#line 1101 "ext/rbs_extension/lexer.c" -yy118: - rbs_skip(state); - yych = peek(state); - if (yych == 't') goto yy168; - goto yy53; -yy119: - rbs_skip(state); - yych = peek(state); - if (yych == 'o') goto yy169; - if (yych == 't') goto yy170; - goto yy53; -yy120: - rbs_skip(state); - yych = peek(state); - if (yych == 'a') goto yy172; - goto yy53; -yy121: - rbs_skip(state); - yych = peek(state); - if (yych == 'f') goto yy173; - goto yy53; -yy122: - rbs_skip(state); - yych = peek(state); - if (yych == 'd') goto yy175; - goto yy53; -yy123: - rbs_skip(state); - yych = peek(state); - if (yych == 't') goto yy177; - goto yy53; -yy124: - rbs_skip(state); - yych = peek(state); - if (yych == 'l') goto yy178; - goto yy53; -yy125: - rbs_skip(state); - yych = peek(state); - if (yych <= '^') { - if (yych <= '9') { - if (yych == '!') goto yy105; - if (yych >= '0') goto yy52; - } else { - if (yych <= '=') { - if (yych >= '=') goto yy106; - } else { - if (yych <= '@') goto yy126; - if (yych <= 'Z') goto yy52; - } - } - } else { - if (yych <= 'c') { - if (yych == '`') goto yy126; - if (yych <= 'b') goto yy52; - goto yy179; - } else { - if (yych <= 's') { - if (yych <= 'r') goto yy52; - goto yy180; - } else { - if (yych <= 't') goto yy181; - if (yych <= 'z') goto yy52; - } - } - } -yy126: -#line 77 "ext/rbs_extension/lexer.re" - { return next_token(state, kIN); } -#line 1171 "ext/rbs_extension/lexer.c" -yy127: - rbs_skip(state); - yych = peek(state); - if (yych == 'd') goto yy182; - goto yy53; -yy128: - rbs_skip(state); - yych = peek(state); - if (yych == 'l') goto yy183; - goto yy53; -yy129: - rbs_skip(state); - yych = peek(state); - if (yych == 't') goto yy185; - goto yy53; -yy130: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy187; - if (yych == 'i') goto yy188; - goto yy53; -yy131: - rbs_skip(state); - yych = peek(state); - if (yych == 'b') goto yy189; - goto yy53; -yy132: - rbs_skip(state); - yych = peek(state); - if (yych == 'l') goto yy190; - goto yy53; -yy133: - rbs_skip(state); - yych = peek(state); - if (yych == 'n') goto yy191; - goto yy53; -yy134: - rbs_skip(state); - yych = peek(state); - if (yych == 'p') goto yy192; - goto yy53; -yy135: - rbs_skip(state); - yych = peek(state); - if (yych == 'u') goto yy194; - goto yy53; -yy136: - rbs_skip(state); - yych = peek(state); - if (yych == 'p') goto yy195; - goto yy53; -yy137: - rbs_skip(state); - yych = peek(state); - if (yych == 'c') goto yy196; - if (yych == 't') goto yy197; - goto yy53; -yy138: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy198; - goto yy53; -yy139: - rbs_skip(state); - yych = peek(state); - if (yych == 'i') goto yy200; - goto yy53; -yy140: - rbs_skip(state); - yych = peek(state); - if (yych <= '@') { - if (yych <= '/') goto yy68; - if (yych <= '9') goto yy201; - goto yy68; - } else { - if (yych <= 'F') goto yy201; - if (yych <= '`') goto yy68; - if (yych <= 'f') goto yy201; - goto yy68; - } -yy141: - rbs_skip(state); - yych = peek(state); - if (yych <= '/') goto yy68; - if (yych <= '9') goto yy66; - if (yych <= '`') goto yy68; - if (yych <= 'f') goto yy66; - goto yy68; -yy142: - rbs_skip(state); - yych = peek(state); - if (yych <= 0x00000000) goto yy68; - if (yych == ')') goto yy202; - goto yy142; -yy143: - rbs_skip(state); - yych = peek(state); - if (yych <= 0x00000000) goto yy68; - if (yych == '>') goto yy203; - goto yy143; -yy144: - rbs_skip(state); - yych = peek(state); - if (yych <= 0x00000000) goto yy68; - if (yych == ']') goto yy204; - goto yy144; -yy145: - rbs_skip(state); - yych = peek(state); - if (yych <= 0x00000000) goto yy68; - if (yych == '}') goto yy205; - goto yy145; -yy146: - rbs_skip(state); - yych = peek(state); - if (yych <= 0x00000000) goto yy68; - if (yych == '|') goto yy206; - goto yy146; -yy147: - yyaccept = 5; - rbs_skip(state); - backup = *state; - yych = peek(state); - if (yych <= '\'') { - if (yych <= 0x00000000) goto yy78; - if (yych <= '&') goto yy75; - goto yy77; - } else { - if (yych == '\\') goto yy79; - goto yy75; - } -yy148: - rbs_skip(state); -#line 38 "ext/rbs_extension/lexer.re" - { return next_token(state, pDOT3); } -#line 1307 "ext/rbs_extension/lexer.c" -yy149: - rbs_skip(state); -#line 108 "ext/rbs_extension/lexer.re" - { return next_token(state, tDQSYMBOL); } -#line 1312 "ext/rbs_extension/lexer.c" -yy150: - rbs_skip(state); - yych = peek(state); - if (yych == 'u') goto yy207; - if (yych == 'x') goto yy208; - goto yy85; -yy151: - rbs_skip(state); - yych = peek(state); - if (yych <= ',') { - if (yych <= '\f') { - if (yych <= 0x00000000) goto yy152; - if (yych <= 0x00000008) goto yy151; - if (yych >= '\v') goto yy151; - } else { - if (yych <= 0x0000001F) { - if (yych >= 0x0000000E) goto yy151; - } else { - if (yych == '#') goto yy151; - } - } - } else { - if (yych <= '>') { - if (yych <= '-') goto yy151; - if (yych <= '/') goto yy152; - if (yych <= '9') goto yy151; - } else { - if (yych <= '^') { - if (yych <= 'Z') goto yy151; - } else { - if (yych <= 'z') goto yy151; - if (yych >= 0x0000007F) goto yy151; - } - } - } -yy152: -#line 125 "ext/rbs_extension/lexer.re" - { return next_token(state, tSYMBOL); } -#line 1351 "ext/rbs_extension/lexer.c" -yy153: - rbs_skip(state); - goto yy152; -yy154: - rbs_skip(state); -yy155: -#line 109 "ext/rbs_extension/lexer.re" - { return next_token(state, tSQSYMBOL); } -#line 1360 "ext/rbs_extension/lexer.c" -yy156: - rbs_skip(state); - yych = peek(state); - if (yych <= '\'') { - if (yych <= 0x00000000) goto yy68; - if (yych <= '&') goto yy88; - goto yy209; - } else { - if (yych == '\\') goto yy156; - goto yy88; - } -yy157: - rbs_skip(state); - yych = peek(state); - if (yych == '>') goto yy87; - goto yy84; -yy158: - rbs_skip(state); - yych = peek(state); - if (yych == '=') goto yy87; - goto yy84; -yy159: - rbs_skip(state); - yych = peek(state); - if (yych <= '^') { - if (yych <= '@') goto yy68; - if (yych <= 'Z') goto yy210; - goto yy68; - } else { - if (yych == '`') goto yy68; - if (yych <= 'z') goto yy210; - goto yy68; - } -yy160: - rbs_skip(state); - yych = peek(state); - if (yych <= '>') { - if (yych <= '/') { - if (yych == '!') goto yy212; - } else { - if (yych <= '9') goto yy160; - if (yych == '=') goto yy212; - } - } else { - if (yych <= '^') { - if (yych <= '?') goto yy212; - if (yych <= '@') goto yy161; - if (yych <= 'Z') goto yy160; - } else { - if (yych == '`') goto yy161; - if (yych <= 'z') goto yy160; - } - } -yy161: -#line 123 "ext/rbs_extension/lexer.re" - { return next_token(state, tSYMBOL); } -#line 1417 "ext/rbs_extension/lexer.c" -yy162: - rbs_skip(state); - goto yy97; -yy163: - rbs_skip(state); - yych = peek(state); - if (yych <= 'Z') { - if (yych <= '/') goto yy164; - if (yych <= '9') goto yy163; - if (yych >= 'A') goto yy163; - } else { - if (yych <= '_') { - if (yych >= '_') goto yy163; - } else { - if (yych <= '`') goto yy164; - if (yych <= 'z') goto yy163; - } - } -yy164: -#line 137 "ext/rbs_extension/lexer.re" - { return next_token(state, tA2IDENT); } -#line 1439 "ext/rbs_extension/lexer.c" -yy165: - rbs_skip(state); - yych = peek(state); - if (yych == 'o') goto yy213; - goto yy109; -yy166: - rbs_skip(state); -#line 40 "ext/rbs_extension/lexer.re" - { return next_token(state, tQIDENT); } -#line 1449 "ext/rbs_extension/lexer.c" -yy167: - rbs_skip(state); - yych = peek(state); - if (yych == 'a') goto yy214; - goto yy53; -yy168: - rbs_skip(state); - yych = peek(state); - if (yych == 'r') goto yy215; - goto yy53; -yy169: - rbs_skip(state); - yych = peek(state); - if (yych == 'l') goto yy216; - goto yy53; -yy170: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy171; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy171; - if (yych <= 'z') goto yy52; - } - } -yy171: -#line 71 "ext/rbs_extension/lexer.re" - { return next_token(state, kBOT); } -#line 1487 "ext/rbs_extension/lexer.c" -yy172: - rbs_skip(state); - yych = peek(state); - if (yych == 's') goto yy218; - goto yy53; -yy173: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy174; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy174; - if (yych <= 'z') goto yy52; - } - } -yy174: -#line 73 "ext/rbs_extension/lexer.re" - { return next_token(state, kDEF); } -#line 1515 "ext/rbs_extension/lexer.c" -yy175: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy176; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy176; - if (yych <= 'z') goto yy52; - } - } -yy176: -#line 74 "ext/rbs_extension/lexer.re" - { return next_token(state, kEND); } -#line 1538 "ext/rbs_extension/lexer.c" -yy177: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy219; - goto yy53; -yy178: - rbs_skip(state); - yych = peek(state); - if (yych == 's') goto yy220; - goto yy53; -yy179: - rbs_skip(state); - yych = peek(state); - if (yych == 'l') goto yy221; - goto yy53; -yy180: - rbs_skip(state); - yych = peek(state); - if (yych == 't') goto yy222; - goto yy53; -yy181: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy223; - goto yy53; -yy182: - rbs_skip(state); - yych = peek(state); - if (yych == 'u') goto yy224; - goto yy53; -yy183: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy184; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy184; - if (yych <= 'z') goto yy52; - } - } -yy184: -#line 82 "ext/rbs_extension/lexer.re" - { return next_token(state, kNIL); } -#line 1591 "ext/rbs_extension/lexer.c" -yy185: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy186; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy186; - if (yych <= 'z') goto yy52; - } - } -yy186: -#line 83 "ext/rbs_extension/lexer.re" - { return next_token(state, kOUT); } -#line 1614 "ext/rbs_extension/lexer.c" -yy187: - rbs_skip(state); - yych = peek(state); - if (yych == 'p') goto yy225; - goto yy53; -yy188: - rbs_skip(state); - yych = peek(state); - if (yych == 'v') goto yy226; - goto yy53; -yy189: - rbs_skip(state); - yych = peek(state); - if (yych == 'l') goto yy227; - goto yy53; -yy190: - rbs_skip(state); - yych = peek(state); - if (yych == 'f') goto yy228; - goto yy53; -yy191: - rbs_skip(state); - yych = peek(state); - if (yych == 'g') goto yy230; - goto yy53; -yy192: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy193; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy193; - if (yych <= 'z') goto yy52; - } - } -yy193: -#line 89 "ext/rbs_extension/lexer.re" - { return next_token(state, kTOP); } -#line 1662 "ext/rbs_extension/lexer.c" -yy194: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy231; - goto yy53; -yy195: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy233; - goto yy53; -yy196: - rbs_skip(state); - yych = peek(state); - if (yych == 'h') goto yy235; - goto yy53; -yy197: - rbs_skip(state); - yych = peek(state); - if (yych == 'y') goto yy236; - goto yy53; -yy198: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy199; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy199; - if (yych <= 'z') goto yy52; - } - } -yy199: -#line 95 "ext/rbs_extension/lexer.re" - { return next_token(state, kUSE); } -#line 1705 "ext/rbs_extension/lexer.c" -yy200: - rbs_skip(state); - yych = peek(state); - if (yych == 'd') goto yy237; - goto yy53; -yy201: - rbs_skip(state); - yych = peek(state); - if (yych <= '@') { - if (yych <= '/') goto yy68; - if (yych <= '9') goto yy239; - goto yy68; - } else { - if (yych <= 'F') goto yy239; - if (yych <= '`') goto yy68; - if (yych <= 'f') goto yy239; - goto yy68; - } -yy202: - rbs_skip(state); -#line 54 "ext/rbs_extension/lexer.re" - { return next_token(state, tANNOTATION); } -#line 1728 "ext/rbs_extension/lexer.c" -yy203: - rbs_skip(state); -#line 57 "ext/rbs_extension/lexer.re" - { return next_token(state, tANNOTATION); } -#line 1733 "ext/rbs_extension/lexer.c" -yy204: - rbs_skip(state); -#line 55 "ext/rbs_extension/lexer.re" - { return next_token(state, tANNOTATION); } -#line 1738 "ext/rbs_extension/lexer.c" -yy205: - rbs_skip(state); -#line 53 "ext/rbs_extension/lexer.re" - { return next_token(state, tANNOTATION); } -#line 1743 "ext/rbs_extension/lexer.c" -yy206: - rbs_skip(state); -#line 56 "ext/rbs_extension/lexer.re" - { return next_token(state, tANNOTATION); } -#line 1748 "ext/rbs_extension/lexer.c" -yy207: - rbs_skip(state); - yych = peek(state); - if (yych <= '@') { - if (yych <= '/') goto yy68; - if (yych <= '9') goto yy240; - goto yy68; - } else { - if (yych <= 'F') goto yy240; - if (yych <= '`') goto yy68; - if (yych <= 'f') goto yy240; - goto yy68; - } -yy208: - rbs_skip(state); - yych = peek(state); - if (yych <= '/') goto yy68; - if (yych <= '9') goto yy85; - if (yych <= '`') goto yy68; - if (yych <= 'f') goto yy85; - goto yy68; -yy209: - yyaccept = 6; - rbs_skip(state); - backup = *state; - yych = peek(state); - if (yych <= '\'') { - if (yych <= 0x00000000) goto yy155; - if (yych <= '&') goto yy88; - goto yy154; - } else { - if (yych == '\\') goto yy156; - goto yy88; - } -yy210: - rbs_skip(state); - yych = peek(state); - if (yych <= '>') { - if (yych <= '/') { - if (yych == '!') goto yy241; - } else { - if (yych <= '9') goto yy210; - if (yych == '=') goto yy241; - } - } else { - if (yych <= '^') { - if (yych <= '?') goto yy241; - if (yych <= '@') goto yy211; - if (yych <= 'Z') goto yy210; - } else { - if (yych == '`') goto yy211; - if (yych <= 'z') goto yy210; - } - } -yy211: -#line 124 "ext/rbs_extension/lexer.re" - { return next_token(state, tSYMBOL); } -#line 1806 "ext/rbs_extension/lexer.c" -yy212: - rbs_skip(state); - goto yy161; -yy213: - rbs_skip(state); - yych = peek(state); - if (yych == 'd') goto yy242; - goto yy109; -yy214: - rbs_skip(state); - yych = peek(state); - if (yych == 's') goto yy243; - goto yy53; -yy215: - rbs_skip(state); - yych = peek(state); - if (yych == '_') goto yy245; - goto yy53; -yy216: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy217; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy217; - if (yych <= 'z') goto yy52; - } - } -yy217: -#line 70 "ext/rbs_extension/lexer.re" - { return next_token(state, kBOOL); } -#line 1847 "ext/rbs_extension/lexer.c" -yy218: - rbs_skip(state); - yych = peek(state); - if (yych == 's') goto yy246; - goto yy53; -yy219: - rbs_skip(state); - yych = peek(state); - if (yych == 'n') goto yy248; - goto yy53; -yy220: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy249; - goto yy53; -yy221: - rbs_skip(state); - yych = peek(state); - if (yych == 'u') goto yy251; - goto yy53; -yy222: - rbs_skip(state); - yych = peek(state); - if (yych == 'a') goto yy252; - goto yy53; -yy223: - rbs_skip(state); - yych = peek(state); - if (yych == 'r') goto yy253; - goto yy53; -yy224: - rbs_skip(state); - yych = peek(state); - if (yych == 'l') goto yy254; - goto yy53; -yy225: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy255; - goto yy53; -yy226: - rbs_skip(state); - yych = peek(state); - if (yych == 'a') goto yy256; - goto yy53; -yy227: - rbs_skip(state); - yych = peek(state); - if (yych == 'i') goto yy257; - goto yy53; -yy228: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy229; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy229; - if (yych <= 'z') goto yy52; - } - } -yy229: -#line 87 "ext/rbs_extension/lexer.re" - { return next_token(state, kSELF); } -#line 1920 "ext/rbs_extension/lexer.c" -yy230: - rbs_skip(state); - yych = peek(state); - if (yych == 'l') goto yy258; - goto yy53; -yy231: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy232; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy232; - if (yych <= 'z') goto yy52; - } - } -yy232: -#line 90 "ext/rbs_extension/lexer.re" - { return next_token(state, kTRUE); } -#line 1948 "ext/rbs_extension/lexer.c" -yy233: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy234; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy234; - if (yych <= 'z') goto yy52; - } - } -yy234: -#line 91 "ext/rbs_extension/lexer.re" - { return next_token(state, kTYPE); } -#line 1971 "ext/rbs_extension/lexer.c" -yy235: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy259; - goto yy53; -yy236: - rbs_skip(state); - yych = peek(state); - if (yych == 'p') goto yy260; - goto yy53; -yy237: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy238; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy238; - if (yych <= 'z') goto yy52; - } - } -yy238: -#line 94 "ext/rbs_extension/lexer.re" - { return next_token(state, kVOID); } -#line 2004 "ext/rbs_extension/lexer.c" -yy239: - rbs_skip(state); - yych = peek(state); - if (yych <= '@') { - if (yych <= '/') goto yy68; - if (yych <= '9') goto yy261; - goto yy68; - } else { - if (yych <= 'F') goto yy261; - if (yych <= '`') goto yy68; - if (yych <= 'f') goto yy261; - goto yy68; - } -yy240: - rbs_skip(state); - yych = peek(state); - if (yych <= '@') { - if (yych <= '/') goto yy68; - if (yych <= '9') goto yy262; - goto yy68; - } else { - if (yych <= 'F') goto yy262; - if (yych <= '`') goto yy68; - if (yych <= 'f') goto yy262; - goto yy68; - } -yy241: - rbs_skip(state); - goto yy211; -yy242: - rbs_skip(state); - yych = peek(state); - if (yych == 'o') goto yy263; - goto yy109; -yy243: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy244; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy244; - if (yych <= 'z') goto yy52; - } - } -yy244: -#line 66 "ext/rbs_extension/lexer.re" - { return next_token(state, kALIAS); } -#line 2061 "ext/rbs_extension/lexer.c" -yy245: - rbs_skip(state); - yych = peek(state); - if (yych <= 'q') { - if (yych == 'a') goto yy264; - goto yy53; - } else { - if (yych <= 'r') goto yy265; - if (yych == 'w') goto yy266; - goto yy53; - } -yy246: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy247; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy247; - if (yych <= 'z') goto yy52; - } - } -yy247: -#line 72 "ext/rbs_extension/lexer.re" - { return next_token(state, kCLASS); } -#line 2095 "ext/rbs_extension/lexer.c" -yy248: - rbs_skip(state); - yych = peek(state); - if (yych == 'd') goto yy267; - goto yy53; -yy249: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy250; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy250; - if (yych <= 'z') goto yy52; - } - } -yy250: -#line 76 "ext/rbs_extension/lexer.re" - { return next_token(state, kFALSE); } -#line 2123 "ext/rbs_extension/lexer.c" -yy251: - rbs_skip(state); - yych = peek(state); - if (yych == 'd') goto yy269; - goto yy53; -yy252: - rbs_skip(state); - yych = peek(state); - if (yych == 'n') goto yy270; - goto yy53; -yy253: - rbs_skip(state); - yych = peek(state); - if (yych == 'f') goto yy271; - goto yy53; -yy254: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy272; - goto yy53; -yy255: - rbs_skip(state); - yych = peek(state); - if (yych == 'n') goto yy274; - goto yy53; -yy256: - rbs_skip(state); - yych = peek(state); - if (yych == 't') goto yy275; - goto yy53; -yy257: - rbs_skip(state); - yych = peek(state); - if (yych == 'c') goto yy276; - goto yy53; -yy258: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy278; - goto yy53; -yy259: - rbs_skip(state); - yych = peek(state); - if (yych == 'c') goto yy279; - goto yy53; -yy260: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy280; - goto yy53; -yy261: - rbs_skip(state); - yych = peek(state); - if (yych <= '@') { - if (yych <= '/') goto yy68; - if (yych <= '9') goto yy66; - goto yy68; - } else { - if (yych <= 'F') goto yy66; - if (yych <= '`') goto yy68; - if (yych <= 'f') goto yy66; - goto yy68; - } -yy262: - rbs_skip(state); - yych = peek(state); - if (yych <= '@') { - if (yych <= '/') goto yy68; - if (yych <= '9') goto yy281; - goto yy68; - } else { - if (yych <= 'F') goto yy281; - if (yych <= '`') goto yy68; - if (yych <= 'f') goto yy281; - goto yy68; - } -yy263: - rbs_skip(state); - yych = peek(state); - if (yych == '_') goto yy282; - goto yy109; -yy264: - rbs_skip(state); - yych = peek(state); - if (yych == 'c') goto yy283; - goto yy53; -yy265: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy284; - goto yy53; -yy266: - rbs_skip(state); - yych = peek(state); - if (yych == 'r') goto yy285; - goto yy53; -yy267: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy268; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy268; - if (yych <= 'z') goto yy52; - } - } -yy268: -#line 75 "ext/rbs_extension/lexer.re" - { return next_token(state, kEXTEND); } -#line 2242 "ext/rbs_extension/lexer.c" -yy269: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy286; - goto yy53; -yy270: - rbs_skip(state); - yych = peek(state); - if (yych == 'c') goto yy288; - goto yy53; -yy271: - rbs_skip(state); - yych = peek(state); - if (yych == 'a') goto yy289; - goto yy53; -yy272: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy273; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy273; - if (yych <= 'z') goto yy52; - } - } -yy273: -#line 81 "ext/rbs_extension/lexer.re" - { return next_token(state, kMODULE); } -#line 2280 "ext/rbs_extension/lexer.c" -yy274: - rbs_skip(state); - yych = peek(state); - if (yych == 'd') goto yy290; - goto yy53; -yy275: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy292; - goto yy53; -yy276: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy277; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy277; - if (yych <= 'z') goto yy52; - } - } -yy277: -#line 86 "ext/rbs_extension/lexer.re" - { return next_token(state, kPUBLIC); } -#line 2313 "ext/rbs_extension/lexer.c" -yy278: - rbs_skip(state); - yych = peek(state); - if (yych == 't') goto yy294; - goto yy53; -yy279: - rbs_skip(state); - yych = peek(state); - if (yych == 'k') goto yy295; - goto yy53; -yy280: - rbs_skip(state); - yych = peek(state); - if (yych == 'd') goto yy296; - goto yy53; -yy281: - rbs_skip(state); - yych = peek(state); - if (yych <= '@') { - if (yych <= '/') goto yy68; - if (yych <= '9') goto yy85; - goto yy68; - } else { - if (yych <= 'F') goto yy85; - if (yych <= '`') goto yy68; - if (yych <= 'f') goto yy85; - goto yy68; - } -yy282: - rbs_skip(state); - yych = peek(state); - if (yych == '_') goto yy298; - goto yy109; -yy283: - rbs_skip(state); - yych = peek(state); - if (yych == 'c') goto yy300; - goto yy53; -yy284: - rbs_skip(state); - yych = peek(state); - if (yych == 'a') goto yy301; - goto yy53; -yy285: - rbs_skip(state); - yych = peek(state); - if (yych == 'i') goto yy302; - goto yy53; -yy286: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy287; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy287; - if (yych <= 'z') goto yy52; - } - } -yy287: -#line 78 "ext/rbs_extension/lexer.re" - { return next_token(state, kINCLUDE); } -#line 2384 "ext/rbs_extension/lexer.c" -yy288: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy303; - goto yy53; -yy289: - rbs_skip(state); - yych = peek(state); - if (yych == 'c') goto yy305; - goto yy53; -yy290: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy291; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy291; - if (yych <= 'z') goto yy52; - } - } -yy291: -#line 84 "ext/rbs_extension/lexer.re" - { return next_token(state, kPREPEND); } -#line 2417 "ext/rbs_extension/lexer.c" -yy292: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy293; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy293; - if (yych <= 'z') goto yy52; - } - } -yy293: -#line 85 "ext/rbs_extension/lexer.re" - { return next_token(state, kPRIVATE); } -#line 2440 "ext/rbs_extension/lexer.c" -yy294: - rbs_skip(state); - yych = peek(state); - if (yych == 'o') goto yy306; - goto yy53; -yy295: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy307; - goto yy53; -yy296: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy297; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy297; - if (yych <= 'z') goto yy52; - } - } -yy297: -#line 93 "ext/rbs_extension/lexer.re" - { return next_token(state, kUNTYPED); } -#line 2473 "ext/rbs_extension/lexer.c" -yy298: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy108; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy299; - if (yych <= 'Z') goto yy108; - } else { - if (yych == '`') goto yy299; - if (yych <= 'z') goto yy108; - } - } -yy299: -#line 97 "ext/rbs_extension/lexer.re" - { return next_token(state, k__TODO__); } -#line 2496 "ext/rbs_extension/lexer.c" -yy300: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy308; - goto yy53; -yy301: - rbs_skip(state); - yych = peek(state); - if (yych == 'd') goto yy309; - goto yy53; -yy302: - rbs_skip(state); - yych = peek(state); - if (yych == 't') goto yy310; - goto yy53; -yy303: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy304; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy304; - if (yych <= 'z') goto yy52; - } - } -yy304: -#line 79 "ext/rbs_extension/lexer.re" - { return next_token(state, kINSTANCE); } -#line 2534 "ext/rbs_extension/lexer.c" -yy305: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy311; - goto yy53; -yy306: - rbs_skip(state); - yych = peek(state); - if (yych == 'n') goto yy313; - goto yy53; -yy307: - rbs_skip(state); - yych = peek(state); - if (yych == 'd') goto yy315; - goto yy53; -yy308: - rbs_skip(state); - yych = peek(state); - if (yych == 's') goto yy317; - goto yy53; -yy309: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy318; - goto yy53; -yy310: - rbs_skip(state); - yych = peek(state); - if (yych == 'e') goto yy319; - goto yy53; -yy311: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy312; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy312; - if (yych <= 'z') goto yy52; - } - } -yy312: -#line 80 "ext/rbs_extension/lexer.re" - { return next_token(state, kINTERFACE); } -#line 2587 "ext/rbs_extension/lexer.c" -yy313: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy314; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy314; - if (yych <= 'z') goto yy52; - } - } -yy314: -#line 88 "ext/rbs_extension/lexer.re" - { return next_token(state, kSINGLETON); } -#line 2610 "ext/rbs_extension/lexer.c" -yy315: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy316; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy316; - if (yych <= 'z') goto yy52; - } - } -yy316: -#line 92 "ext/rbs_extension/lexer.re" - { return next_token(state, kUNCHECKED); } -#line 2633 "ext/rbs_extension/lexer.c" -yy317: - rbs_skip(state); - yych = peek(state); - if (yych == 's') goto yy320; - goto yy53; -yy318: - rbs_skip(state); - yych = peek(state); - if (yych == 'r') goto yy321; - goto yy53; -yy319: - rbs_skip(state); - yych = peek(state); - if (yych == 'r') goto yy323; - goto yy53; -yy320: - rbs_skip(state); - yych = peek(state); - if (yych == 'o') goto yy325; - goto yy53; -yy321: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy322; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy322; - if (yych <= 'z') goto yy52; - } - } -yy322: -#line 68 "ext/rbs_extension/lexer.re" - { return next_token(state, kATTRREADER); } -#line 2676 "ext/rbs_extension/lexer.c" -yy323: - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy324; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy324; - if (yych <= 'z') goto yy52; - } - } -yy324: -#line 69 "ext/rbs_extension/lexer.re" - { return next_token(state, kATTRWRITER); } -#line 2699 "ext/rbs_extension/lexer.c" -yy325: - rbs_skip(state); - yych = peek(state); - if (yych != 'r') goto yy53; - rbs_skip(state); - yych = peek(state); - if (yych <= '=') { - if (yych <= '/') { - if (yych == '!') goto yy105; - } else { - if (yych <= '9') goto yy52; - if (yych >= '=') goto yy106; - } - } else { - if (yych <= '^') { - if (yych <= '@') goto yy326; - if (yych <= 'Z') goto yy52; - } else { - if (yych == '`') goto yy326; - if (yych <= 'z') goto yy52; - } - } -yy326: -#line 67 "ext/rbs_extension/lexer.re" - { return next_token(state, kATTRACCESSOR); } -#line 2725 "ext/rbs_extension/lexer.c" -} -#line 146 "ext/rbs_extension/lexer.re" - -} diff --git a/ext/rbs_extension/lexer.h b/ext/rbs_extension/lexer.h deleted file mode 100644 index 55a8a3994..000000000 --- a/ext/rbs_extension/lexer.h +++ /dev/null @@ -1,179 +0,0 @@ -#ifndef RBS__LEXER_H -#define RBS__LEXER_H - -enum TokenType { - NullType, /* (Nothing) */ - pEOF, /* EOF */ - ErrorToken, /* Error */ - - pLPAREN, /* ( */ - pRPAREN, /* ) */ - pCOLON, /* : */ - pCOLON2, /* :: */ - pLBRACKET, /* [ */ - pRBRACKET, /* ] */ - pLBRACE, /* { */ - pRBRACE, /* } */ - pHAT, /* ^ */ - pARROW, /* -> */ - pFATARROW, /* => */ - pCOMMA, /* , */ - pBAR, /* | */ - pAMP, /* & */ - pSTAR, /* * */ - pSTAR2, /* ** */ - pDOT, /* . */ - pDOT3, /* ... */ - pBANG, /* ! */ - pQUESTION, /* ? */ - pLT, /* < */ - pEQ, /* = */ - - kALIAS, /* alias */ - kATTRACCESSOR, /* attr_accessor */ - kATTRREADER, /* attr_reader */ - kATTRWRITER, /* attr_writer */ - kBOOL, /* bool */ - kBOT, /* bot */ - kCLASS, /* class */ - kDEF, /* def */ - kEND, /* end */ - kEXTEND, /* extend */ - kFALSE, /* false */ - kIN, /* in */ - kINCLUDE, /* include */ - kINSTANCE, /* instance */ - kINTERFACE, /* interface */ - kMODULE, /* module */ - kNIL, /* nil */ - kOUT, /* out */ - kPREPEND, /* prepend */ - kPRIVATE, /* private */ - kPUBLIC, /* public */ - kSELF, /* self */ - kSINGLETON, /* singleton */ - kTOP, /* top */ - kTRUE, /* true */ - kTYPE, /* type */ - kUNCHECKED, /* unchecked */ - kUNTYPED, /* untyped */ - kVOID, /* void */ - kUSE, /* use */ - kAS, /* as */ - k__TODO__, /* __todo__ */ - - tLIDENT, /* Identifiers starting with lower case */ - tUIDENT, /* Identifiers starting with upper case */ - tULIDENT, /* Identifiers starting with `_` followed by upper case */ - tULLIDENT, /* Identifiers starting with `_` followed by lower case */ - tGIDENT, /* Identifiers starting with `$` */ - tAIDENT, /* Identifiers starting with `@` */ - tA2IDENT, /* Identifiers starting with `@@` */ - tBANGIDENT, /* Identifiers ending with `!` */ - tEQIDENT, /* Identifiers ending with `=` */ - tQIDENT, /* Quoted identifier */ - pAREF_OPR, /* [] */ - tOPERATOR, /* Operator identifier */ - - tCOMMENT, /* Comment */ - tLINECOMMENT, /* Comment of all line */ - - tTRIVIA, /* Trivia tokens -- space and new line */ - - tDQSTRING, /* Double quoted string */ - tSQSTRING, /* Single quoted string */ - tINTEGER, /* Integer */ - tSYMBOL, /* Symbol */ - tDQSYMBOL, /* Double quoted symbol */ - tSQSYMBOL, /* Single quoted symbol */ - tANNOTATION, /* Annotation */ -}; - -/** - * The `byte_pos` (or `char_pos`) is the primary data. - * The rest are cache. - * - * They can be computed from `byte_pos` (or `char_pos`), but it needs full scan from the beginning of the string (depending on the encoding). - * */ -typedef struct { - int byte_pos; - int char_pos; - int line; - int column; -} position; - -typedef struct { - position start; - position end; -} range; - -typedef struct { - enum TokenType type; - range range; -} token; - -/** - * The lexer state is the curren token. - * - * ``` - * ... "a string token" - * ^ start position - * ^ current position - * ~~~~~~ Token => "a str - * ``` - * */ -typedef struct { - VALUE string; - int start_pos; /* The character position that defines the start of the input */ - int end_pos; /* The character position that defines the end of the input */ - position current; /* The current position */ - position start; /* The start position of the current token */ - bool first_token_of_line; /* This flag is used for tLINECOMMENT */ - unsigned int last_char; /* Last peeked character */ -} lexstate; - -extern token NullToken; -extern position NullPosition; -extern range NULL_RANGE; - -char *peek_token(lexstate *state, token tok); -int token_chars(token tok); -int token_bytes(token tok); - -#define null_position_p(pos) (pos.byte_pos == -1) -#define null_range_p(range) (range.start.byte_pos == -1) -#define nonnull_pos_or(pos1, pos2) (null_position_p(pos1) ? pos2 : pos1) -#define RANGE_BYTES(range) (range.end.byte_pos - range.start.byte_pos) - -const char *token_type_str(enum TokenType type); - -/** - * Read next character. - * */ -unsigned int peek(lexstate *state); - -/** - * Skip one character. - * */ -void rbs_skip(lexstate *state); - -/** - * Skip n characters. - * */ -void skipn(lexstate *state, size_t size); - -/** - * Return new token with given type. - * */ -token next_token(lexstate *state, enum TokenType type); - -/** - * Return new token with EOF type. - * */ -token next_eof_token(lexstate *state); - -token rbsparser_next_token(lexstate *state); - -void print_token(token tok); - -#endif diff --git a/ext/rbs_extension/lexer.re b/ext/rbs_extension/lexer.re deleted file mode 100644 index aa1b94746..000000000 --- a/ext/rbs_extension/lexer.re +++ /dev/null @@ -1,147 +0,0 @@ -#include "rbs_extension.h" - -token rbsparser_next_token(lexstate *state) { - lexstate backup; - - backup = *state; - - /*!re2c - re2c:flags:u = 1; - re2c:api:style = free-form; - re2c:flags:input = custom; - re2c:define:YYCTYPE = "unsigned int"; - re2c:define:YYPEEK = "peek(state)"; - re2c:define:YYSKIP = "rbs_skip(state);"; - re2c:define:YYBACKUP = "backup = *state;"; - re2c:define:YYRESTORE = "*state = backup;"; - re2c:yyfill:enable = 0; - - word = [a-zA-Z0-9_]; - - operator = "/" | "~" | "[]=" | "!" | "!=" | "!~" | "-" | "-@" | "+" | "+@" - | "==" | "===" | "=~" | "<<" | "<=" | "<=>" | ">" | ">=" | ">>" | "%"; - - "(" { return next_token(state, pLPAREN); } - ")" { return next_token(state, pRPAREN); } - "[" { return next_token(state, pLBRACKET); } - "]" { return next_token(state, pRBRACKET); } - "{" { return next_token(state, pLBRACE); } - "}" { return next_token(state, pRBRACE); } - "," { return next_token(state, pCOMMA); } - "|" { return next_token(state, pBAR); } - "^" { return next_token(state, pHAT); } - "&" { return next_token(state, pAMP); } - "?" { return next_token(state, pQUESTION); } - "*" { return next_token(state, pSTAR); } - "**" { return next_token(state, pSTAR2); } - "." { return next_token(state, pDOT); } - "..." { return next_token(state, pDOT3); } - "`" { return next_token(state, tOPERATOR); } - "`" [^ :\x00] [^`\x00]* "`" { return next_token(state, tQIDENT); } - "->" { return next_token(state, pARROW); } - "=>" { return next_token(state, pFATARROW); } - "=" { return next_token(state, pEQ); } - ":" { return next_token(state, pCOLON); } - "::" { return next_token(state, pCOLON2); } - "<" { return next_token(state, pLT); } - "[]" { return next_token(state, pAREF_OPR); } - operator { return next_token(state, tOPERATOR); } - - number = [0-9] [0-9_]*; - ("-"|"+")? number { return next_token(state, tINTEGER); } - - "%a{" [^}\x00]* "}" { return next_token(state, tANNOTATION); } - "%a(" [^)\x00]* ")" { return next_token(state, tANNOTATION); } - "%a[" [^\]\x00]* "]" { return next_token(state, tANNOTATION); } - "%a|" [^|\x00]* "|" { return next_token(state, tANNOTATION); } - "%a<" [^>\x00]* ">" { return next_token(state, tANNOTATION); } - - "#" (. \ [\x00])* { - return next_token( - state, - state->first_token_of_line ? tLINECOMMENT : tCOMMENT - ); - } - - "alias" { return next_token(state, kALIAS); } - "attr_accessor" { return next_token(state, kATTRACCESSOR); } - "attr_reader" { return next_token(state, kATTRREADER); } - "attr_writer" { return next_token(state, kATTRWRITER); } - "bool" { return next_token(state, kBOOL); } - "bot" { return next_token(state, kBOT); } - "class" { return next_token(state, kCLASS); } - "def" { return next_token(state, kDEF); } - "end" { return next_token(state, kEND); } - "extend" { return next_token(state, kEXTEND); } - "false" { return next_token(state, kFALSE); } - "in" { return next_token(state, kIN); } - "include" { return next_token(state, kINCLUDE); } - "instance" { return next_token(state, kINSTANCE); } - "interface" { return next_token(state, kINTERFACE); } - "module" { return next_token(state, kMODULE); } - "nil" { return next_token(state, kNIL); } - "out" { return next_token(state, kOUT); } - "prepend" { return next_token(state, kPREPEND); } - "private" { return next_token(state, kPRIVATE); } - "public" { return next_token(state, kPUBLIC); } - "self" { return next_token(state, kSELF); } - "singleton" { return next_token(state, kSINGLETON); } - "top" { return next_token(state, kTOP); } - "true" { return next_token(state, kTRUE); } - "type" { return next_token(state, kTYPE); } - "unchecked" { return next_token(state, kUNCHECKED); } - "untyped" { return next_token(state, kUNTYPED); } - "void" { return next_token(state, kVOID); } - "use" { return next_token(state, kUSE); } - "as" { return next_token(state, kAS); } - "__todo__" { return next_token(state, k__TODO__); } - - unicode_char = "\\u" [0-9a-fA-F]{4}; - oct_char = "\\x" [0-9a-f]{1,2}; - hex_char = "\\" [0-7]{1,3}; - - dqstring = ["] (unicode_char | oct_char | hex_char | "\\" [^xu] | [^\\"\x00])* ["]; - sqstring = ['] ("\\"['\\] | [^'\x00])* [']; - - dqstring { return next_token(state, tDQSTRING); } - sqstring { return next_token(state, tSQSTRING); } - ":" dqstring { return next_token(state, tDQSYMBOL); } - ":" sqstring { return next_token(state, tSQSYMBOL); } - - identifier = [a-zA-Z_] word* [!?=]?; - symbol_opr = ":|" | ":&" | ":/" | ":%" | ":~" | ":`" | ":^" - | ":==" | ":=~" | ":===" | ":!" | ":!=" | ":!~" - | ":<" | ":<=" | ":<<" | ":<=>" | ":>" | ":>=" | ":>>" - | ":-" | ":-@" | ":+" | ":+@" | ":*" | ":**" | ":[]" | ":[]="; - - global_ident = [0-9]+ - | "-" [a-zA-Z0-9_] - | [~*$?!@\\/;,.=:<>"&'`+] - | [^ \t\r\n:;=.,!"$%&()-+~|\\'[\]{}*/<>^\x00]+; - - ":" identifier { return next_token(state, tSYMBOL); } - ":@" identifier { return next_token(state, tSYMBOL); } - ":@@" identifier { return next_token(state, tSYMBOL); } - ":$" global_ident { return next_token(state, tSYMBOL); } - symbol_opr { return next_token(state, tSYMBOL); } - - [a-z] word* { return next_token(state, tLIDENT); } - [A-Z] word* { return next_token(state, tUIDENT); } - "_" [a-z0-9_] word* { return next_token(state, tULLIDENT); } - "_" [A-Z] word* { return next_token(state, tULIDENT); } - "_" { return next_token(state, tULLIDENT); } - [a-zA-Z_] word* "!" { return next_token(state, tBANGIDENT); } - [a-zA-Z_] word* "=" { return next_token(state, tEQIDENT); } - - "@" [a-zA-Z_] word* { return next_token(state, tAIDENT); } - "@@" [a-zA-Z_] word* { return next_token(state, tA2IDENT); } - - "$" global_ident { return next_token(state, tGIDENT); } - - skip = ([ \t]+|[\r\n]); - - skip { return next_token(state, tTRIVIA); } - "\x00" { return next_eof_token(state); } - * { return next_token(state, ErrorToken); } - */ -} diff --git a/ext/rbs_extension/lexstate.c b/ext/rbs_extension/lexstate.c deleted file mode 100644 index ed32fd06a..000000000 --- a/ext/rbs_extension/lexstate.c +++ /dev/null @@ -1,175 +0,0 @@ -#include "rbs_extension.h" - -static const char *RBS_TOKENTYPE_NAMES[] = { - "NullType", - "pEOF", - "ErrorToken", - - "pLPAREN", /* ( */ - "pRPAREN", /* ) */ - "pCOLON", /* : */ - "pCOLON2", /* :: */ - "pLBRACKET", /* [ */ - "pRBRACKET", /* ] */ - "pLBRACE", /* { */ - "pRBRACE", /* } */ - "pHAT", /* ^ */ - "pARROW", /* -> */ - "pFATARROW", /* => */ - "pCOMMA", /* , */ - "pBAR", /* | */ - "pAMP", /* & */ - "pSTAR", /* * */ - "pSTAR2", /* ** */ - "pDOT", /* . */ - "pDOT3", /* ... */ - "pBANG", /* ! */ - "pQUESTION", /* ? */ - "pLT", /* < */ - "pEQ", /* = */ - - "kALIAS", /* alias */ - "kATTRACCESSOR", /* attr_accessor */ - "kATTRREADER", /* attr_reader */ - "kATTRWRITER", /* attr_writer */ - "kBOOL", /* bool */ - "kBOT", /* bot */ - "kCLASS", /* class */ - "kDEF", /* def */ - "kEND", /* end */ - "kEXTEND", /* extend */ - "kFALSE", /* kFALSE */ - "kIN", /* in */ - "kINCLUDE", /* include */ - "kINSTANCE", /* instance */ - "kINTERFACE", /* interface */ - "kMODULE", /* module */ - "kNIL", /* nil */ - "kOUT", /* out */ - "kPREPEND", /* prepend */ - "kPRIVATE", /* private */ - "kPUBLIC", /* public */ - "kSELF", /* self */ - "kSINGLETON", /* singleton */ - "kTOP", /* top */ - "kTRUE", /* true */ - "kTYPE", /* type */ - "kUNCHECKED", /* unchecked */ - "kUNTYPED", /* untyped */ - "kVOID", /* void */ - "kUSE", /* use */ - "kAS", /* as */ - "k__TODO__", /* __todo__ */ - - "tLIDENT", /* Identifiers starting with lower case */ - "tUIDENT", /* Identifiers starting with upper case */ - "tULIDENT", /* Identifiers starting with `_` */ - "tULLIDENT", - "tGIDENT", /* Identifiers starting with `$` */ - "tAIDENT", /* Identifiers starting with `@` */ - "tA2IDENT", /* Identifiers starting with `@@` */ - "tBANGIDENT", - "tEQIDENT", - "tQIDENT", /* Quoted identifier */ - "pAREF_OPR", /* [] */ - "tOPERATOR", /* Operator identifier */ - - "tCOMMENT", - "tLINECOMMENT", - - "tTRIVIA", - - "tDQSTRING", /* Double quoted string */ - "tSQSTRING", /* Single quoted string */ - "tINTEGER", /* Integer */ - "tSYMBOL", /* Symbol */ - "tDQSYMBOL", - "tSQSYMBOL", - "tANNOTATION", /* Annotation */ -}; - -token NullToken = { NullType }; -position NullPosition = { -1, -1, -1, -1 }; -range NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } }; - -const char *token_type_str(enum TokenType type) { - return RBS_TOKENTYPE_NAMES[type]; -} - -int token_chars(token tok) { - return tok.range.end.char_pos - tok.range.start.char_pos; -} - -int token_bytes(token tok) { - return RANGE_BYTES(tok.range); -} - -unsigned int peek(lexstate *state) { - if (state->current.char_pos == state->end_pos) { - state->last_char = '\0'; - return 0; - } else { - unsigned int c = rb_enc_mbc_to_codepoint(RSTRING_PTR(state->string) + state->current.byte_pos, RSTRING_END(state->string), rb_enc_get(state->string)); - state->last_char = c; - return c; - } -} - -token next_token(lexstate *state, enum TokenType type) { - token t; - - t.type = type; - t.range.start = state->start; - t.range.end = state->current; - state->start = state->current; - if (type != tTRIVIA) { - state->first_token_of_line = false; - } - - return t; -} - -token next_eof_token(lexstate *state) { - if (state->current.byte_pos == RSTRING_LEN(state->string)+1) { - // End of String - token t; - t.type = pEOF; - t.range.start = state->start; - t.range.end = state->start; - state->start = state->current; - - return t; - } else { - // NULL byte in the middle of the string - return next_token(state, pEOF); - } -} - -void rbs_skip(lexstate *state) { - if (!state->last_char) { - peek(state); - } - int byte_len = rb_enc_codelen(state->last_char, rb_enc_get(state->string)); - - state->current.char_pos += 1; - state->current.byte_pos += byte_len; - - if (state->last_char == '\n') { - state->current.line += 1; - state->current.column = 0; - state->first_token_of_line = true; - } else { - state->current.column += 1; - } -} - -void skipn(lexstate *state, size_t size) { - for (size_t i = 0; i < size; i ++) { - peek(state); - rbs_skip(state); - } -} - -char *peek_token(lexstate *state, token tok) { - return RSTRING_PTR(state->string) + tok.range.start.byte_pos; -} diff --git a/ext/rbs_extension/location.c b/ext/rbs_extension/location.c deleted file mode 100644 index 5b251bbc6..000000000 --- a/ext/rbs_extension/location.c +++ /dev/null @@ -1,325 +0,0 @@ -#include "rbs_extension.h" - -#define RBS_LOC_REQUIRED_P(loc, i) ((loc)->children->required_p & (1 << (i))) -#define RBS_LOC_OPTIONAL_P(loc, i) (!RBS_LOC_REQUIRED_P((loc), (i))) -#define RBS_LOC_CHILDREN_SIZE(cap) (sizeof(rbs_loc_children) + sizeof(rbs_loc_entry) * ((cap) - 1)) -#define NULL_LOC_RANGE_P(rg) ((rg).start == -1) - -rbs_loc_range RBS_LOC_NULL_RANGE = { -1, -1 }; -VALUE RBS_Location; - -position rbs_loc_position(int char_pos) { - position pos = { 0, char_pos, -1, -1 }; - return pos; -} - -position rbs_loc_position3(int char_pos, int line, int column) { - position pos = { 0, char_pos, line, column }; - return pos; -} - -rbs_loc_range rbs_new_loc_range(range rg) { - rbs_loc_range r = { rg.start.char_pos, rg.end.char_pos }; - return r; -} - -static void check_children_max(unsigned short n) { - size_t max = sizeof(rbs_loc_entry_bitmap) * 8; - if (n > max) { - rb_raise(rb_eRuntimeError, "Too many children added to location: %d", n); - } -} - -void rbs_loc_alloc_children(rbs_loc *loc, unsigned short cap) { - check_children_max(cap); - - size_t s = RBS_LOC_CHILDREN_SIZE(cap); - loc->children = malloc(s); - - *loc->children = (rbs_loc_children) { - .len = 0, - .required_p = 0, - .cap = cap, - .entries = {{ 0 }}, - }; -} - -static void check_children_cap(rbs_loc *loc) { - if (loc->children == NULL) { - rbs_loc_alloc_children(loc, 1); - } else { - if (loc->children->len == loc->children->cap) { - check_children_max(loc->children->cap + 1); - size_t s = RBS_LOC_CHILDREN_SIZE(++loc->children->cap); - loc->children = realloc(loc->children, s); - } - } -} - -void rbs_loc_add_required_child(rbs_loc *loc, rbs_constant_id_t name, range r) { - rbs_loc_add_optional_child(loc, name, r); - - unsigned short last_index = loc->children->len - 1; - loc->children->required_p |= 1 << last_index; -} - -void rbs_loc_add_optional_child(rbs_loc *loc, rbs_constant_id_t name, range r) { - check_children_cap(loc); - - unsigned short i = loc->children->len++; - loc->children->entries[i] = (rbs_loc_entry) { - .name = name, - .rg = rbs_new_loc_range(r), - }; -} - -void rbs_loc_init(rbs_loc *loc, VALUE buffer, rbs_loc_range rg) { - *loc = (rbs_loc) { - .buffer = buffer, - .rg = rg, - .children = NULL, - }; -} - -void rbs_loc_free(rbs_loc *loc) { - free(loc->children); - ruby_xfree(loc); -} - -static void rbs_loc_mark(void *ptr) -{ - rbs_loc *loc = ptr; - rb_gc_mark(loc->buffer); -} - -static size_t rbs_loc_memsize(const void *ptr) { - const rbs_loc *loc = ptr; - if (loc->children == NULL) { - return sizeof(rbs_loc); - } else { - return sizeof(rbs_loc) + RBS_LOC_CHILDREN_SIZE(loc->children->cap); - } -} - -static rb_data_type_t location_type = { - "RBS::Location", - {rbs_loc_mark, (RUBY_DATA_FUNC)rbs_loc_free, rbs_loc_memsize}, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY -}; - -static VALUE location_s_allocate(VALUE klass) { - rbs_loc *loc; - VALUE obj = TypedData_Make_Struct(klass, rbs_loc, &location_type, loc); - - rbs_loc_init(loc, Qnil, RBS_LOC_NULL_RANGE); - - return obj; -} - -rbs_loc *rbs_check_location(VALUE obj) { - return rb_check_typeddata(obj, &location_type); -} - -static VALUE location_initialize(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos) { - rbs_loc *loc = rbs_check_location(self); - - int start = FIX2INT(start_pos); - int end = FIX2INT(end_pos); - - *loc = (rbs_loc) { - .buffer = buffer, - .rg = (rbs_loc_range) { start, end }, - .children = NULL, - }; - - return Qnil; -} - -static VALUE location_initialize_copy(VALUE self, VALUE other) { - rbs_loc *self_loc = rbs_check_location(self); - rbs_loc *other_loc = rbs_check_location(other); - - *self_loc = (rbs_loc) { - .buffer = other_loc->buffer, - .rg = other_loc->rg, - .children = NULL, - }; - - if (other_loc->children != NULL) { - rbs_loc_alloc_children(self_loc, other_loc->children->cap); - memcpy(self_loc->children, other_loc->children, RBS_LOC_CHILDREN_SIZE(other_loc->children->cap)); - } - - return Qnil; -} - -static VALUE location_buffer(VALUE self) { - rbs_loc *loc = rbs_check_location(self); - return loc->buffer; -} - -static VALUE location_start_pos(VALUE self) { - rbs_loc *loc = rbs_check_location(self); - return INT2FIX(loc->rg.start); -} - -static VALUE location_end_pos(VALUE self) { - rbs_loc *loc = rbs_check_location(self); - return INT2FIX(loc->rg.end); -} - -static rbs_constant_id_t rbs_constant_pool_insert_ruby_symbol(VALUE symbol) { - VALUE name = rb_sym2str(symbol); - - // Constants inserted here will never be freed, but that's acceptable because: - // 1. Most symbols passed into here will be the ones already inserted into the constant pool by `parser.c`. - // 2. Methods like `add_required_child` and `add_optional_child` will usually only get called with a few different symbols. - return rbs_constant_pool_insert_constant(RBS_GLOBAL_CONSTANT_POOL, (const uint8_t *) RSTRING_PTR(name), RSTRING_LEN(name)); -} - -static VALUE location_add_required_child(VALUE self, VALUE name, VALUE start, VALUE end) { - rbs_loc *loc = rbs_check_location(self); - - range rg; - rg.start = rbs_loc_position(FIX2INT(start)); - rg.end = rbs_loc_position(FIX2INT(end)); - - rbs_loc_add_required_child(loc, rbs_constant_pool_insert_ruby_symbol(name), rg); - - return Qnil; -} - -static VALUE location_add_optional_child(VALUE self, VALUE name, VALUE start, VALUE end) { - rbs_loc *loc = rbs_check_location(self); - - range rg; - rg.start = rbs_loc_position(FIX2INT(start)); - rg.end = rbs_loc_position(FIX2INT(end)); - - rbs_loc_add_optional_child(loc, rbs_constant_pool_insert_ruby_symbol(name), rg); - - return Qnil; -} - -static VALUE location_add_optional_no_child(VALUE self, VALUE name) { - rbs_loc *loc = rbs_check_location(self); - - rbs_loc_add_optional_child(loc, rbs_constant_pool_insert_ruby_symbol(name), NULL_RANGE); - - return Qnil; -} - -VALUE rbs_new_location(VALUE buffer, range rg) { - rbs_loc *loc; - VALUE obj = TypedData_Make_Struct(RBS_Location, rbs_loc, &location_type, loc); - - rbs_loc_init(loc, buffer, rbs_new_loc_range(rg)); - - return obj; -} - -static VALUE rbs_new_location_from_loc_range(VALUE buffer, rbs_loc_range rg) { - rbs_loc *loc; - VALUE obj = TypedData_Make_Struct(RBS_Location, rbs_loc, &location_type, loc); - - rbs_loc_init(loc, buffer, rg); - - return obj; -} - -static rbs_constant_id_t rbs_constant_pool_find_ruby_symbol(VALUE symbol) { - VALUE name = rb_sym2str(symbol); - - return rbs_constant_pool_find(RBS_GLOBAL_CONSTANT_POOL, (const uint8_t *) RSTRING_PTR(name), RSTRING_LEN(name)); -} - -static VALUE location_aref(VALUE self, VALUE name) { - rbs_loc *loc = rbs_check_location(self); - - rbs_constant_id_t id = rbs_constant_pool_find_ruby_symbol(name); - - if (loc->children != NULL && id != RBS_CONSTANT_ID_UNSET) { - for (unsigned short i = 0; i < loc->children->len; i++) { - if (loc->children->entries[i].name == id) { - rbs_loc_range result = loc->children->entries[i].rg; - - if (RBS_LOC_OPTIONAL_P(loc, i) && NULL_LOC_RANGE_P(result)) { - return Qnil; - } else { - return rbs_new_location_from_loc_range(loc->buffer, result); - } - } - } - } - - VALUE string = rb_funcall(name, rb_intern("to_s"), 0); - rb_raise(rb_eRuntimeError, "Unknown child name given: %s", RSTRING_PTR(string)); -} - -static VALUE rbs_constant_to_ruby_symbol(rbs_constant_t *constant) { - return ID2SYM(rb_intern2((const char *) constant->start, constant->length)); -} - -static VALUE location_optional_keys(VALUE self) { - VALUE keys = rb_ary_new(); - - rbs_loc *loc = rbs_check_location(self); - rbs_loc_children *children = loc->children; - if (children == NULL) { - return keys; - } - - for (unsigned short i = 0; i < children->len; i++) { - if (RBS_LOC_OPTIONAL_P(loc, i)) { - rbs_constant_t *key_id = rbs_constant_pool_id_to_constant(RBS_GLOBAL_CONSTANT_POOL, children->entries[i].name); - VALUE key_sym = rbs_constant_to_ruby_symbol(key_id); - rb_ary_push(keys, key_sym); - } - } - - return keys; -} - -static VALUE location_required_keys(VALUE self) { - VALUE keys = rb_ary_new(); - - rbs_loc *loc = rbs_check_location(self); - rbs_loc_children *children = loc->children; - if (children == NULL) { - return keys; - } - - for (unsigned short i = 0; i < children->len; i++) { - if (RBS_LOC_REQUIRED_P(loc, i)) { - rbs_constant_t *key_id = rbs_constant_pool_id_to_constant(RBS_GLOBAL_CONSTANT_POOL, children->entries[i].name); - VALUE key_sym = rbs_constant_to_ruby_symbol(key_id); - rb_ary_push(keys, key_sym); - } - } - - return keys; -} - -VALUE rbs_location_pp(VALUE buffer, const position *start_pos, const position *end_pos) { - range rg = { *start_pos, *end_pos }; - rg.start = *start_pos; - rg.end = *end_pos; - - return rbs_new_location(buffer, rg); -} - -void rbs__init_location(void) { - RBS_Location = rb_define_class_under(RBS, "Location", rb_cObject); - rb_define_alloc_func(RBS_Location, location_s_allocate); - rb_define_private_method(RBS_Location, "initialize", location_initialize, 3); - rb_define_private_method(RBS_Location, "initialize_copy", location_initialize_copy, 1); - rb_define_method(RBS_Location, "buffer", location_buffer, 0); - rb_define_method(RBS_Location, "start_pos", location_start_pos, 0); - rb_define_method(RBS_Location, "end_pos", location_end_pos, 0); - rb_define_method(RBS_Location, "_add_required_child", location_add_required_child, 3); - rb_define_method(RBS_Location, "_add_optional_child", location_add_optional_child, 3); - rb_define_method(RBS_Location, "_add_optional_no_child", location_add_optional_no_child, 1); - rb_define_method(RBS_Location, "_optional_keys", location_optional_keys, 0); - rb_define_method(RBS_Location, "_required_keys", location_required_keys, 0); - rb_define_method(RBS_Location, "[]", location_aref, 1); -} diff --git a/ext/rbs_extension/location.h b/ext/rbs_extension/location.h deleted file mode 100644 index 8a6c64b0b..000000000 --- a/ext/rbs_extension/location.h +++ /dev/null @@ -1,85 +0,0 @@ -#ifndef RBS_LOCATION_H -#define RBS_LOCATION_H - -#include "ruby.h" -#include "lexer.h" -#include "rbs/util/rbs_constant_pool.h" - -/** - * RBS::Location class - * */ -extern VALUE RBS_Location; - -typedef struct { - int start; - int end; -} rbs_loc_range; - -typedef struct { - rbs_constant_id_t name; - rbs_loc_range rg; -} rbs_loc_entry; - -typedef unsigned int rbs_loc_entry_bitmap; - -// The flexible array always allocates, but it's okay. -// This struct is not allocated when the `rbs_loc` doesn't have children. -typedef struct { - unsigned short len; - unsigned short cap; - rbs_loc_entry_bitmap required_p; - rbs_loc_entry entries[1]; -} rbs_loc_children; - -typedef struct { - VALUE buffer; - rbs_loc_range rg; - rbs_loc_children *children; // NULL when no children is allocated -} rbs_loc; - -/** - * Returns new RBS::Location object, with given buffer and range. - * */ -VALUE rbs_new_location(VALUE buffer, range rg); - -/** - * Return rbs_loc associated with the RBS::Location object. - * */ -rbs_loc *rbs_check_location(VALUE location); - -/** - * Allocate memory for child locations. - * - * Do not call twice for the same location. - * */ -void rbs_loc_alloc_children(rbs_loc *loc, unsigned short cap); - -/** - * Add a required child range with given name. - * - * Allocate memory for children with rbs_loc_alloc_children before calling this function. - * */ -void rbs_loc_add_required_child(rbs_loc *loc, rbs_constant_id_t name, range r); - -/** - * Add an optional child range with given name. - * - * Allocate memory for children with rbs_loc_alloc_children before calling this function. - * */ -void rbs_loc_add_optional_child(rbs_loc *loc, rbs_constant_id_t name, range r); - -/** - * Returns RBS::Location object with start/end positions. - * - * @param start_pos - * @param end_pos - * @return New RSS::Location object. - * */ -VALUE rbs_location_pp(VALUE buffer, const position *start_pos, const position *end_pos); - -/** - * Define RBS::Location class. - * */ -void rbs__init_location(); - -#endif diff --git a/ext/rbs_extension/main.c b/ext/rbs_extension/main.c index d0b4a3619..291694f75 100644 --- a/ext/rbs_extension/main.c +++ b/ext/rbs_extension/main.c @@ -1,24 +1,325 @@ #include "rbs_extension.h" +#include "rbs/util/rbs_assert.h" +#include "rbs/util/rbs_allocator.h" #include "rbs/util/rbs_constant_pool.h" +#include "ast_translation.h" +#include "legacy_location.h" +#include "rbs_string_bridging.h" #include "ruby/vm.h" -static -void Deinit_rbs_extension(ruby_vm_t *_) { - rbs_constant_pool_free(RBS_GLOBAL_CONSTANT_POOL); +/** + * Raises `RBS::ParsingError` or `RuntimeError` on `tok` with message constructed with given `fmt`. + * + * ``` + * foo.rbs:11:21...11:25: Syntax error: {message}, token=`{tok source}` ({tok type}) + * ``` + * */ +static NORETURN(void) raise_error(rbs_error_t *error, VALUE buffer) { + rbs_assert(error != NULL, "raise_error() called with NULL error"); + + if (!error->syntax_error) { + rb_raise(rb_eRuntimeError, "Unexpected error"); + } + + VALUE location = rbs_new_location(buffer, error->token.range); + VALUE type = rb_str_new_cstr(rbs_token_type_str(error->token.type)); + + VALUE rb_error = rb_funcall( + RBS_ParsingError, + rb_intern("new"), + 3, + location, + rb_str_new_cstr(error->message), + type + ); + + rb_exc_raise(rb_error); +} + +void raise_error_if_any(rbs_parser_t *parser, VALUE buffer) { + if (parser->error != NULL) { + raise_error(parser->error, buffer); + } +} + +/** + * Inserts the given array of type variables names into the parser's type variable table. + * @param parser + * @param variables A Ruby Array of Symbols, or nil. + */ +static void declare_type_variables(rbs_parser_t *parser, VALUE variables, VALUE buffer) { + if (NIL_P(variables)) return; // Nothing to do. + + if (!RB_TYPE_P(variables, T_ARRAY)) { + rbs_parser_free(parser); + rb_raise(rb_eTypeError, "wrong argument type %" PRIsVALUE " (must be an Array of Symbols or nil)", rb_obj_class(variables)); + } + + rbs_parser_push_typevar_table(parser, true); + + for (long i = 0; i < rb_array_len(variables); i++) { + VALUE symbol = rb_ary_entry(variables, i); + + if (!RB_TYPE_P(symbol, T_SYMBOL)) { + rbs_parser_free(parser); + rb_raise(rb_eTypeError, "Type variables Array contains invalid value %" PRIsVALUE " of type %" PRIsVALUE " (must be an Array of Symbols or nil)", rb_inspect(symbol), rb_obj_class(symbol)); + } + + VALUE name_str = rb_sym2str(symbol); + + rbs_constant_id_t id = rbs_constant_pool_insert_shared( + &parser->constant_pool, + (const uint8_t *) RSTRING_PTR(name_str), + RSTRING_LEN(name_str) + ); + + if (!rbs_parser_insert_typevar(parser, id)) { + raise_error(parser->error, buffer); + } + } +} + +struct parse_type_arg { + VALUE buffer; + rb_encoding *encoding; + rbs_parser_t *parser; + VALUE require_eof; +}; + +static VALUE ensure_free_parser(VALUE parser) { + rbs_parser_free((rbs_parser_t *) parser); + return Qnil; +} + +static VALUE parse_type_try(VALUE a) { + struct parse_type_arg *arg = (struct parse_type_arg *) a; + rbs_parser_t *parser = arg->parser; + + if (parser->next_token.type == pEOF) { + return Qnil; + } + + rbs_node_t *type; + rbs_parse_type(parser, &type); + + raise_error_if_any(parser, arg->buffer); + + if (RB_TEST(arg->require_eof)) { + rbs_parser_advance(parser); + if (parser->current_token.type != pEOF) { + rbs_parser_set_error(parser, parser->current_token, true, "expected a token `%s`", rbs_token_type_str(pEOF)); + raise_error(parser->error, arg->buffer); + } + } + + rbs_translation_context_t ctx = rbs_translation_context_create( + &parser->constant_pool, + arg->buffer, + arg->encoding + ); + + return rbs_struct_to_ruby_value(ctx, type); +} + +static rbs_lexer_t *alloc_lexer_from_buffer(rbs_allocator_t *allocator, VALUE string, rb_encoding *encoding, int start_pos, int end_pos) { + if (start_pos < 0 || end_pos < 0) { + rb_raise(rb_eArgError, "negative position range: %d...%d", start_pos, end_pos); + } + + const char *encoding_name = rb_enc_name(encoding); + + return rbs_lexer_new( + allocator, + rbs_string_from_ruby_string(string), + rbs_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name))), + start_pos, + end_pos + ); +} + +static rbs_parser_t *alloc_parser_from_buffer(VALUE buffer, int start_pos, int end_pos) { + if (start_pos < 0 || end_pos < 0) { + rb_raise(rb_eArgError, "negative position range: %d...%d", start_pos, end_pos); + } + + VALUE string = rb_funcall(buffer, rb_intern("content"), 0); + StringValue(string); + + rb_encoding *encoding = rb_enc_get(string); + const char *encoding_name = rb_enc_name(encoding); + + return rbs_parser_new( + rbs_string_from_ruby_string(string), + rbs_encoding_find((const uint8_t *) encoding_name, (const uint8_t *) (encoding_name + strlen(encoding_name))), + start_pos, + end_pos + ); +} + +static VALUE rbsparser_parse_type(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos, VALUE variables, VALUE require_eof) { + VALUE string = rb_funcall(buffer, rb_intern("content"), 0); + StringValue(string); + rb_encoding *encoding = rb_enc_get(string); + + rbs_parser_t *parser = alloc_parser_from_buffer(buffer, FIX2INT(start_pos), FIX2INT(end_pos)); + declare_type_variables(parser, variables, buffer); + struct parse_type_arg arg = { + .buffer = buffer, + .encoding = encoding, + .parser = parser, + .require_eof = require_eof + }; + + VALUE result = rb_ensure(parse_type_try, (VALUE) &arg, ensure_free_parser, (VALUE) parser); + + RB_GC_GUARD(string); + + return result; +} + +static VALUE parse_method_type_try(VALUE a) { + struct parse_type_arg *arg = (struct parse_type_arg *) a; + rbs_parser_t *parser = arg->parser; + + if (parser->next_token.type == pEOF) { + return Qnil; + } + + rbs_method_type_t *method_type = NULL; + rbs_parse_method_type(parser, &method_type); + + raise_error_if_any(parser, arg->buffer); + + if (RB_TEST(arg->require_eof)) { + rbs_parser_advance(parser); + if (parser->current_token.type != pEOF) { + rbs_parser_set_error(parser, parser->current_token, true, "expected a token `%s`", rbs_token_type_str(pEOF)); + raise_error(parser->error, arg->buffer); + } + } + + rbs_translation_context_t ctx = rbs_translation_context_create( + &parser->constant_pool, + arg->buffer, + arg->encoding + ); + + return rbs_struct_to_ruby_value(ctx, (rbs_node_t *) method_type); +} + +static VALUE rbsparser_parse_method_type(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos, VALUE variables, VALUE require_eof) { + VALUE string = rb_funcall(buffer, rb_intern("content"), 0); + StringValue(string); + rb_encoding *encoding = rb_enc_get(string); + + rbs_parser_t *parser = alloc_parser_from_buffer(buffer, FIX2INT(start_pos), FIX2INT(end_pos)); + declare_type_variables(parser, variables, buffer); + struct parse_type_arg arg = { + .buffer = buffer, + .encoding = encoding, + .parser = parser, + .require_eof = require_eof + }; + + VALUE result = rb_ensure(parse_method_type_try, (VALUE) &arg, ensure_free_parser, (VALUE) parser); + + RB_GC_GUARD(string); + + return result; +} + +static VALUE parse_signature_try(VALUE a) { + struct parse_type_arg *arg = (struct parse_type_arg *) a; + rbs_parser_t *parser = arg->parser; + + rbs_signature_t *signature = NULL; + rbs_parse_signature(parser, &signature); + + raise_error_if_any(parser, arg->buffer); + + rbs_translation_context_t ctx = rbs_translation_context_create( + &parser->constant_pool, + arg->buffer, + arg->encoding + ); + + return rbs_struct_to_ruby_value(ctx, (rbs_node_t *) signature); +} + +static VALUE rbsparser_parse_signature(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos) { + VALUE string = rb_funcall(buffer, rb_intern("content"), 0); + StringValue(string); + rb_encoding *encoding = rb_enc_get(string); + + rbs_parser_t *parser = alloc_parser_from_buffer(buffer, FIX2INT(start_pos), FIX2INT(end_pos)); + struct parse_type_arg arg = { + .buffer = buffer, + .encoding = encoding, + .parser = parser, + .require_eof = false + }; + + VALUE result = rb_ensure(parse_signature_try, (VALUE) &arg, ensure_free_parser, (VALUE) parser); + + RB_GC_GUARD(string); + + return result; +} + +static VALUE rbsparser_lex(VALUE self, VALUE buffer, VALUE end_pos) { + VALUE string = rb_funcall(buffer, rb_intern("content"), 0); + StringValue(string); + rb_encoding *encoding = rb_enc_get(string); + + rbs_allocator_t *allocator = rbs_allocator_init(); + rbs_lexer_t *lexer = alloc_lexer_from_buffer(allocator, string, encoding, 0, FIX2INT(end_pos)); + + VALUE results = rb_ary_new(); + rbs_token_t token = NullToken; + while (token.type != pEOF) { + token = rbs_lexer_next_token(lexer); + VALUE type = ID2SYM(rb_intern(rbs_token_type_str(token.type))); + VALUE location = rbs_new_location(buffer, token.range); + VALUE pair = rb_ary_new3(2, type, location); + rb_ary_push(results, pair); + } + + rbs_allocator_free(allocator); + RB_GC_GUARD(string); + + return results; +} + +void rbs__init_parser(void) { + RBS_Parser = rb_define_class_under(RBS, "Parser", rb_cObject); + rb_gc_register_mark_object(RBS_Parser); + + EMPTY_ARRAY = rb_obj_freeze(rb_ary_new()); + rb_gc_register_mark_object(EMPTY_ARRAY); + + EMPTY_HASH = rb_obj_freeze(rb_hash_new()); + rb_gc_register_mark_object(EMPTY_HASH); + + rb_define_singleton_method(RBS_Parser, "_parse_type", rbsparser_parse_type, 5); + rb_define_singleton_method(RBS_Parser, "_parse_method_type", rbsparser_parse_method_type, 5); + rb_define_singleton_method(RBS_Parser, "_parse_signature", rbsparser_parse_signature, 3); + rb_define_singleton_method(RBS_Parser, "_lex", rbsparser_lex, 2); +} + +static void Deinit_rbs_extension(ruby_vm_t *_) { + rbs_constant_pool_free(RBS_GLOBAL_CONSTANT_POOL); } -void -Init_rbs_extension(void) -{ +void Init_rbs_extension(void) { #ifdef HAVE_RB_EXT_RACTOR_SAFE - rb_ext_ractor_safe(true); + rb_ext_ractor_safe(true); #endif - rbs__init_constants(); - rbs__init_location(); - rbs__init_parser(); + rbs__init_constants(); + rbs__init_location(); + rbs__init_parser(); - /* Calculated based on the number of unique strings used with the `INTERN` macro in `parser.c`. + /* Calculated based on the number of unique strings used with the `INTERN` macro in `parser.c`. * * ```bash * grep -o 'INTERN("\([^"]*\)")' ext/rbs_extension/parser.c \ @@ -27,8 +328,8 @@ Init_rbs_extension(void) * | wc -l * ``` */ - const size_t num_uniquely_interned_strings = 26; - rbs_constant_pool_init(RBS_GLOBAL_CONSTANT_POOL, num_uniquely_interned_strings); + const size_t num_uniquely_interned_strings = 26; + rbs_constant_pool_init(RBS_GLOBAL_CONSTANT_POOL, num_uniquely_interned_strings); - ruby_vm_at_exit(Deinit_rbs_extension); + ruby_vm_at_exit(Deinit_rbs_extension); } diff --git a/ext/rbs_extension/parser.c b/ext/rbs_extension/parser.c deleted file mode 100644 index 77f583c41..000000000 --- a/ext/rbs_extension/parser.c +++ /dev/null @@ -1,2982 +0,0 @@ -#include "rbs_extension.h" -#include "rbs/util/rbs_constant_pool.h" - -#define INTERN(str) \ - rbs_constant_pool_insert_constant( \ - RBS_GLOBAL_CONSTANT_POOL, \ - (const uint8_t *) str, \ - strlen(str) \ - ) - -#define INTERN_TOKEN(parserstate, tok) \ - rb_intern3(\ - peek_token(parserstate->lexstate, tok),\ - token_bytes(tok),\ - rb_enc_get(parserstate->lexstate->string)\ - ) - -#define KEYWORD_CASES \ - case kBOOL:\ - case kBOT: \ - case kCLASS: \ - case kFALSE: \ - case kINSTANCE: \ - case kINTERFACE: \ - case kNIL: \ - case kSELF: \ - case kSINGLETON: \ - case kTOP: \ - case kTRUE: \ - case kVOID: \ - case kTYPE: \ - case kUNCHECKED: \ - case kIN: \ - case kOUT: \ - case kEND: \ - case kDEF: \ - case kINCLUDE: \ - case kEXTEND: \ - case kPREPEND: \ - case kALIAS: \ - case kMODULE: \ - case kATTRREADER: \ - case kATTRWRITER: \ - case kATTRACCESSOR: \ - case kPUBLIC: \ - case kPRIVATE: \ - case kUNTYPED: \ - case kUSE: \ - case kAS: \ - case k__TODO__: \ - /* nop */ - -typedef struct { - VALUE required_positionals; - VALUE optional_positionals; - VALUE rest_positionals; - VALUE trailing_positionals; - VALUE required_keywords; - VALUE optional_keywords; - VALUE rest_keywords; -} method_params; - -static VALUE EMPTY_ARRAY; -static VALUE EMPTY_HASH; - -static inline void melt_array(VALUE *array) { - if (*array == EMPTY_ARRAY) { - *array = rb_ary_new(); - } -} - -static inline void melt_hash(VALUE *hash) { - if (*hash == EMPTY_HASH) { - *hash = rb_hash_new(); - } -} - -static bool rbs_is_untyped_params(method_params *params) { - return NIL_P(params->required_positionals); -} - -// /** -// * Returns RBS::Location object of `current_token` of a parser state. -// * -// * @param state -// * @return New RBS::Location object. -// * */ -static VALUE rbs_location_current_token(parserstate *state) { - return rbs_location_pp( - state->buffer, - &state->current_token.range.start, - &state->current_token.range.end - ); -} - -static VALUE parse_optional(parserstate *state); -static VALUE parse_simple(parserstate *state); - -static VALUE string_of_loc(parserstate *state, position start, position end) { - return rb_enc_str_new( - RSTRING_PTR(state->lexstate->string) + start.byte_pos, - end.byte_pos - start.byte_pos, - rb_enc_get(state->lexstate->string) - ); -} - -/** - * Raises RuntimeError with "Unexpected error " message. - * */ -static NORETURN(void) rbs_abort(void) { - rb_raise( - rb_eRuntimeError, - "Unexpected error" - ); -} - -NORETURN(void) raise_syntax_error(parserstate *state, token tok, const char *fmt, ...) { - va_list args; - va_start(args, fmt); - VALUE message = rb_vsprintf(fmt, args); - va_end(args); - - VALUE location = rbs_new_location(state->buffer, tok.range); - VALUE type = rb_str_new_cstr(token_type_str(tok.type)); - - VALUE error = rb_funcall( - RBS_ParsingError, - rb_intern("new"), - 3, - location, - message, - type - ); - - rb_exc_raise(error); -} - -typedef enum { - CLASS_NAME = 1, - INTERFACE_NAME = 2, - ALIAS_NAME = 4 -} TypeNameKind; - -void parser_advance_no_gap(parserstate *state) { - if (state->current_token.range.end.byte_pos == state->next_token.range.start.byte_pos) { - parser_advance(state); - } else { - raise_syntax_error( - state, - state->next_token, - "unexpected token" - ); - } -} - -/* - type_name ::= {`::`} (tUIDENT `::`)* - | {(tUIDENT `::`)*} - | {} -*/ -static VALUE parse_type_name(parserstate *state, TypeNameKind kind, range *rg) { - VALUE absolute = Qfalse; - VALUE path = EMPTY_ARRAY; - - if (rg) { - rg->start = state->current_token.range.start; - } - - if (state->current_token.type == pCOLON2) { - absolute = Qtrue; - parser_advance_no_gap(state); - } - - while ( - state->current_token.type == tUIDENT - && state->next_token.type == pCOLON2 - && state->current_token.range.end.byte_pos == state->next_token.range.start.byte_pos - && state->next_token.range.end.byte_pos == state->next_token2.range.start.byte_pos - ) { - melt_array(&path); - rb_ary_push(path, ID2SYM(INTERN_TOKEN(state, state->current_token))); - - parser_advance(state); - parser_advance(state); - } - - VALUE namespace = rbs_namespace(path, absolute); - - switch (state->current_token.type) { - case tLIDENT: - if (kind & ALIAS_NAME) goto success; - goto error; - case tULIDENT: - if (kind & INTERFACE_NAME) goto success; - goto error; - case tUIDENT: - if (kind & CLASS_NAME) goto success; - goto error; - default: - goto error; - } - - success: { - if (rg) { - rg->end = state->current_token.range.end; - } - - return rbs_type_name(namespace, ID2SYM(INTERN_TOKEN(state, state->current_token))); - } - - error: { - VALUE ids = rb_ary_new(); - if (kind & ALIAS_NAME) { - rb_ary_push(ids, rb_str_new_literal("alias name")); - } - if (kind & INTERFACE_NAME) { - rb_ary_push(ids, rb_str_new_literal("interface name")); - } - if (kind & CLASS_NAME) { - rb_ary_push(ids, rb_str_new_literal("class/module/constant name")); - } - - VALUE string = rb_funcall(ids, rb_intern("join"), 1, rb_str_new_cstr(", ")); - - raise_syntax_error( - state, - state->current_token, - "expected one of %"PRIsVALUE, - string - ); - } -} - -/* - type_list ::= {} type `,` ... <`,`> eol - | {} type `,` ... `,` eol -*/ -static void parse_type_list(parserstate *state, enum TokenType eol, VALUE *types) { - while (true) { - melt_array(types); - rb_ary_push(*types, parse_type(state)); - - if (state->next_token.type == pCOMMA) { - parser_advance(state); - - if (state->next_token.type == eol) { - break; - } - } else { - if (state->next_token.type == eol) { - break; - } else { - raise_syntax_error( - state, - state->next_token, - "comma delimited type list is expected" - ); - } - } - } -} - -static bool is_keyword_token(enum TokenType type) { - switch (type) - { - case tLIDENT: - case tUIDENT: - case tULIDENT: - case tULLIDENT: - case tQIDENT: - case tBANGIDENT: - KEYWORD_CASES - return true; - default: - return false; - } -} - -/* - function_param ::= {} - | {} type -*/ -static VALUE parse_function_param(parserstate *state) { - range type_range; - type_range.start = state->next_token.range.start; - VALUE type = parse_type(state); - type_range.end = state->current_token.range.end; - - if (state->next_token.type == pCOMMA || state->next_token.type == pRPAREN) { - range param_range = type_range; - - VALUE location = rbs_new_location(state->buffer, param_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 1); - rbs_loc_add_optional_child(loc, INTERN("name"), NULL_RANGE); - - return rbs_function_param(type, Qnil, location); - } else { - range name_range = state->next_token.range; - - parser_advance(state); - - range param_range = { - .start = type_range.start, - .end = name_range.end, - }; - - if (!is_keyword_token(state->current_token.type)) { - raise_syntax_error( - state, - state->current_token, - "unexpected token for function parameter name" - ); - } - - VALUE name = rb_to_symbol(rbs_unquote_string(state, state->current_token.range, 0)); - VALUE location = rbs_new_location(state->buffer, param_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 1); - rbs_loc_add_optional_child(loc, INTERN("name"), name_range); - - return rbs_function_param(type, name, location); - } -} - -static ID intern_token_start_end(parserstate *state, token start_token, token end_token) { - return rb_intern3( - peek_token(state->lexstate, start_token), - end_token.range.end.byte_pos - start_token.range.start.byte_pos, - rb_enc_get(state->lexstate->string) - ); -} - -/* - keyword_key ::= {} `:` - | {} keyword <`?`> `:` -*/ -static VALUE parse_keyword_key(parserstate *state) { - parser_advance(state); - - if (state->next_token.type == pQUESTION) { - VALUE key = ID2SYM(intern_token_start_end(state, state->current_token, state->next_token)); - parser_advance(state); - return key; - } else { - return ID2SYM(INTERN_TOKEN(state, state->current_token)); - } -} - -/* - keyword ::= {} keyword `:` -*/ -static void parse_keyword(parserstate *state, VALUE *keywords, VALUE memo) { - VALUE key = parse_keyword_key(state); - - if (!NIL_P(rb_hash_aref(memo, key))) { - raise_syntax_error( - state, - state->current_token, - "duplicated keyword argument" - ); - } else { - rb_hash_aset(memo, key, Qtrue); - } - - parser_advance_assert(state, pCOLON); - VALUE param = parse_function_param(state); - - melt_hash(keywords); - rb_hash_aset(*keywords, key, param); - - return; -} - -/* -Returns true if keyword is given. - - is_keyword === {} KEYWORD `:` -*/ -static bool is_keyword(parserstate *state) { - if (is_keyword_token(state->next_token.type)) { - if (state->next_token2.type == pCOLON && state->next_token.range.end.byte_pos == state->next_token2.range.start.byte_pos) { - return true; - } - - if (state->next_token2.type == pQUESTION - && state->next_token3.type == pCOLON - && state->next_token.range.end.byte_pos == state->next_token2.range.start.byte_pos - && state->next_token2.range.end.byte_pos == state->next_token3.range.start.byte_pos) { - return true; - } - } - - return false; -} - -/* - params ::= {} `)` - | {} `?` `)` -- Untyped function params (assign params.required = nil) - | `)` - | `,` `)` - - required_params ::= {} function_param `,` - | {} - | {} - - optional_params ::= {} `?` function_param `,` - | {} `?` - | {} - - rest_params ::= {} `*` function_param `,` - | {} `*` - | {} - - trailing_params ::= {} function_param `,` - | {} - | {} - - keywords ::= {} required_keyword `,` - | {} `?` optional_keyword `,` - | {} `**` function_param `,` - | {} - | {} `?` - | {} `**` -*/ -static void parse_params(parserstate *state, method_params *params) { - if (state->next_token.type == pQUESTION && state->next_token2.type == pRPAREN) { - params->required_positionals = Qnil; - parser_advance(state); - return; - } - if (state->next_token.type == pRPAREN) { - return; - } - - VALUE memo = rb_hash_new(); - - while (true) { - VALUE param; - - switch (state->next_token.type) { - case pQUESTION: - goto PARSE_OPTIONAL_PARAMS; - case pSTAR: - goto PARSE_REST_PARAM; - case pSTAR2: - goto PARSE_KEYWORDS; - case pRPAREN: - goto EOP; - - default: - if (is_keyword(state)) { - goto PARSE_KEYWORDS; - } - - param = parse_function_param(state); - melt_array(¶ms->required_positionals); - rb_ary_push(params->required_positionals, param); - - break; - } - - if (!parser_advance_if(state, pCOMMA)) { - goto EOP; - } - } - -PARSE_OPTIONAL_PARAMS: - while (true) { - VALUE param; - - switch (state->next_token.type) { - case pQUESTION: - parser_advance(state); - - if (is_keyword(state)) { - parse_keyword(state, ¶ms->optional_keywords, memo); - parser_advance_if(state, pCOMMA); - goto PARSE_KEYWORDS; - } - - param = parse_function_param(state); - melt_array(¶ms->optional_positionals); - rb_ary_push(params->optional_positionals, param); - - break; - default: - goto PARSE_REST_PARAM; - } - - if (!parser_advance_if(state, pCOMMA)) { - goto EOP; - } - } - -PARSE_REST_PARAM: - if (state->next_token.type == pSTAR) { - parser_advance(state); - params->rest_positionals = parse_function_param(state); - - if (!parser_advance_if(state, pCOMMA)) { - goto EOP; - } - } - goto PARSE_TRAILING_PARAMS; - -PARSE_TRAILING_PARAMS: - while (true) { - VALUE param; - - switch (state->next_token.type) { - case pQUESTION: - goto PARSE_KEYWORDS; - case pSTAR: - goto EOP; - case pSTAR2: - goto PARSE_KEYWORDS; - case pRPAREN: - goto EOP; - - default: - if (is_keyword(state)) { - goto PARSE_KEYWORDS; - } - - param = parse_function_param(state); - melt_array(¶ms->trailing_positionals); - rb_ary_push(params->trailing_positionals, param); - - break; - } - - if (!parser_advance_if(state, pCOMMA)) { - goto EOP; - } - } - -PARSE_KEYWORDS: - while (true) { - switch (state->next_token.type) { - case pQUESTION: - parser_advance(state); - if (is_keyword(state)) { - parse_keyword(state, ¶ms->optional_keywords, memo); - } else { - raise_syntax_error( - state, - state->next_token, - "optional keyword argument type is expected" - ); - } - break; - - case pSTAR2: - parser_advance(state); - params->rest_keywords = parse_function_param(state); - break; - - case tUIDENT: - case tLIDENT: - case tQIDENT: - case tULIDENT: - case tULLIDENT: - case tBANGIDENT: - KEYWORD_CASES - if (is_keyword(state)) { - parse_keyword(state, ¶ms->required_keywords, memo); - } else { - raise_syntax_error( - state, - state->next_token, - "required keyword argument type is expected" - ); - } - break; - - default: - goto EOP; - } - - if (!parser_advance_if(state, pCOMMA)) { - goto EOP; - } - } - -EOP: - if (state->next_token.type != pRPAREN) { - raise_syntax_error( - state, - state->next_token, - "unexpected token for method type parameters" - ); - } - - return; -} - -/* - optional ::= {} - | {} simple_type <`?`> -*/ -static VALUE parse_optional(parserstate *state) { - range rg; - rg.start = state->next_token.range.start; - - VALUE type = parse_simple(state); - - if (state->next_token.type == pQUESTION) { - parser_advance(state); - rg.end = state->current_token.range.end; - VALUE location = rbs_new_location(state->buffer, rg); - return rbs_optional(type, location); - } else { - return type; - } -} - -static void initialize_method_params(method_params *params){ - *params = (method_params) { - .required_positionals = EMPTY_ARRAY, - .optional_positionals = EMPTY_ARRAY, - .rest_positionals = Qnil, - .trailing_positionals = EMPTY_ARRAY, - .required_keywords = EMPTY_HASH, - .optional_keywords = EMPTY_HASH, - .rest_keywords = Qnil, - }; -} - -/* - self_type_binding ::= {} <> - | {} `[` `self` `:` type <`]`> -*/ -static VALUE parse_self_type_binding(parserstate *state) { - if (state->next_token.type == pLBRACKET) { - parser_advance(state); - parser_advance_assert(state, kSELF); - parser_advance_assert(state, pCOLON); - VALUE type = parse_type(state); - parser_advance_assert(state, pRBRACKET); - return type; - } else { - return Qnil; - } -} - -/* - function ::= {} `(` params `)` self_type_binding? `{` `(` params `)` self_type_binding? `->` optional `}` `->` - | {} `(` params `)` self_type_binding? `->` - | {} self_type_binding? `{` `(` params `)` self_type_binding? `->` optional `}` `->` - | {} self_type_binding? `{` self_type_binding `->` optional `}` `->` - | {} self_type_binding? `->` -*/ -static void parse_function(parserstate *state, VALUE *function, VALUE *block, VALUE *function_self_type) { - method_params params; - initialize_method_params(¶ms); - - if (state->next_token.type == pLPAREN) { - parser_advance(state); - parse_params(state, ¶ms); - parser_advance_assert(state, pRPAREN); - } - - // Passing NULL to function_self_type means the function itself doesn't accept self type binding. (== method type) - if (function_self_type) { - *function_self_type = parse_self_type_binding(state); - } else { - // Parsing method type. untyped_params means it cannot have a block - if (rbs_is_untyped_params(¶ms)) { - if (state->next_token.type != pARROW) { - raise_syntax_error(state, state->next_token2, "A method type with untyped method parameter cannot have block"); - } - } - } - - VALUE required = Qtrue; - if (state->next_token.type == pQUESTION && state->next_token2.type == pLBRACE) { - // Optional block - required = Qfalse; - parser_advance(state); - } - if (state->next_token.type == pLBRACE) { - parser_advance(state); - - method_params block_params; - initialize_method_params(&block_params); - - if (state->next_token.type == pLPAREN) { - parser_advance(state); - parse_params(state, &block_params); - parser_advance_assert(state, pRPAREN); - } - - VALUE block_self_type = parse_self_type_binding(state); - - parser_advance_assert(state, pARROW); - VALUE block_return_type = parse_optional(state); - - VALUE block_function = Qnil; - if (rbs_is_untyped_params(&block_params)) { - block_function = rbs_untyped_function(block_return_type); - } else { - block_function = rbs_function( - block_params.required_positionals, - block_params.optional_positionals, - block_params.rest_positionals, - block_params.trailing_positionals, - block_params.required_keywords, - block_params.optional_keywords, - block_params.rest_keywords, - block_return_type - ); - } - - *block = rbs_block(block_function, required, block_self_type); - - parser_advance_assert(state, pRBRACE); - } - - parser_advance_assert(state, pARROW); - VALUE type = parse_optional(state); - - if (rbs_is_untyped_params(¶ms)) { - *function = rbs_untyped_function(type); - } else { - *function = rbs_function( - params.required_positionals, - params.optional_positionals, - params.rest_positionals, - params.trailing_positionals, - params.required_keywords, - params.optional_keywords, - params.rest_keywords, - type - ); - } -} - -/* - proc_type ::= {`^`} -*/ -static VALUE parse_proc_type(parserstate *state) { - position start = state->current_token.range.start; - VALUE function = Qnil; - VALUE block = Qnil; - VALUE proc_self = Qnil; - parse_function(state, &function, &block, &proc_self); - position end = state->current_token.range.end; - VALUE loc = rbs_location_pp(state->buffer, &start, &end); - - return rbs_proc(function, block, loc, proc_self); -} - -static void check_key_duplication(parserstate *state, VALUE fields, VALUE key) { - if (!NIL_P(rb_hash_aref(fields, key))) { - raise_syntax_error( - state, - state->current_token, - "duplicated record key" - ); - } -} - -/** - * ... `{` ... `}` ... - * > > - * */ -/* - record_attributes ::= {`{`} record_attribute... `}` - - record_attribute ::= {} keyword_token `:` - | {} literal_type `=>` -*/ -static VALUE parse_record_attributes(parserstate *state) { - VALUE fields = rb_hash_new(); - - if (state->next_token.type == pRBRACE) { - return fields; - } - - while (true) { - VALUE key, type, - value = rb_ary_new(), - required = Qtrue; - - if (state->next_token.type == pQUESTION) { - // { ?foo: type } syntax - required = Qfalse; - parser_advance(state); - } - - if (is_keyword(state)) { - // { foo: type } syntax - key = parse_keyword_key(state); - check_key_duplication(state, fields, key); - parser_advance_assert(state, pCOLON); - } else { - // { key => type } syntax - switch (state->next_token.type) { - case tSYMBOL: - case tSQSYMBOL: - case tDQSYMBOL: - case tSQSTRING: - case tDQSTRING: - case tINTEGER: - case kTRUE: - case kFALSE: { - key = rb_funcall(parse_simple(state), rb_intern("literal"), 0); - break; - } - default: - raise_syntax_error( - state, - state->next_token, - "unexpected record key token" - ); - } - check_key_duplication(state, fields, key); - parser_advance_assert(state, pFATARROW); - } - type = parse_type(state); - rb_ary_push(value, type); - rb_ary_push(value, required); - rb_hash_aset(fields, key, value); - - if (parser_advance_if(state, pCOMMA)) { - if (state->next_token.type == pRBRACE) { - break; - } - } else { - break; - } - } - return fields; -} - -/* - symbol ::= {} -*/ -static VALUE parse_symbol(parserstate *state) { - VALUE string = state->lexstate->string; - rb_encoding *enc = rb_enc_get(string); - - int offset_bytes = rb_enc_codelen(':', enc); - int bytes = token_bytes(state->current_token) - offset_bytes; - - VALUE literal; - - switch (state->current_token.type) - { - case tSYMBOL: { - char *buffer = peek_token(state->lexstate, state->current_token); - literal = ID2SYM(rb_intern3(buffer+offset_bytes, bytes, enc)); - break; - } - case tDQSYMBOL: - case tSQSYMBOL: { - literal = rb_funcall( - rbs_unquote_string(state, state->current_token.range, offset_bytes), - rb_intern("to_sym"), - 0 - ); - break; - } - default: - rbs_abort(); - } - - return rbs_literal( - literal, - rbs_location_current_token(state) - ); -} - -/* - instance_type ::= {type_name} - - type_args ::= {} <> /empty/ - | {} `[` type_list <`]`> - */ -static VALUE parse_instance_type(parserstate *state, bool parse_alias) { - TypeNameKind expected_kind = INTERFACE_NAME | CLASS_NAME; - if (parse_alias) { - expected_kind |= ALIAS_NAME; - } - - range name_range; - VALUE typename = parse_type_name(state, expected_kind, &name_range); - VALUE types = EMPTY_ARRAY; - - TypeNameKind kind; - if (state->current_token.type == tUIDENT) { - kind = CLASS_NAME; - } else if (state->current_token.type == tULIDENT) { - kind = INTERFACE_NAME; - } else if (state->current_token.type == tLIDENT) { - kind = ALIAS_NAME; - } else { - rbs_abort(); - } - - range args_range; - if (state->next_token.type == pLBRACKET) { - parser_advance(state); - args_range.start = state->current_token.range.start; - parse_type_list(state, pRBRACKET, &types); - parser_advance_assert(state, pRBRACKET); - args_range.end = state->current_token.range.end; - } else { - args_range = NULL_RANGE; - } - - range type_range = { - .start = name_range.start, - .end = nonnull_pos_or(args_range.end, name_range.end), - }; - - VALUE location = rbs_new_location(state->buffer, type_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 2); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_optional_child(loc, INTERN("args"), args_range); - - if (kind == CLASS_NAME) { - return rbs_class_instance(typename, types, location); - } else if (kind == INTERFACE_NAME) { - return rbs_interface(typename, types, location); - } else if (kind == ALIAS_NAME) { - return rbs_alias(typename, types, location); - } else { - return Qnil; - } -} - -/* - singleton_type ::= {`singleton`} `(` type_name <`)`> -*/ -static VALUE parse_singleton_type(parserstate *state) { - parser_assert(state, kSINGLETON); - - range type_range; - type_range.start = state->current_token.range.start; - parser_advance_assert(state, pLPAREN); - parser_advance(state); - - range name_range; - VALUE typename = parse_type_name(state, CLASS_NAME, &name_range); - - parser_advance_assert(state, pRPAREN); - type_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, type_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 1); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - - return rbs_class_singleton(typename, location); -} - -/* - simple ::= {} `(` type <`)`> - | {} - | {} - | {} class_instance `[` type_list <`]`> - | {} `singleton` `(` type_name <`)`> - | {} `[` type_list <`]`> - | {} `{` record_attributes <`}`> - | {} `^` -*/ -static VALUE parse_simple(parserstate *state) { - parser_advance(state); - - switch (state->current_token.type) { - case pLPAREN: { - VALUE type = parse_type(state); - parser_advance_assert(state, pRPAREN); - return type; - } - case kBOOL: { - return rbs_bases_bool(rbs_location_current_token(state)); - } - case kBOT: { - return rbs_bases_bottom(rbs_location_current_token(state)); - } - case kCLASS: { - return rbs_bases_class(rbs_location_current_token(state)); - } - case kINSTANCE: { - return rbs_bases_instance(rbs_location_current_token(state)); - } - case kNIL: { - return rbs_bases_nil(rbs_location_current_token(state)); - } - case kSELF: { - return rbs_bases_self(rbs_location_current_token(state)); - } - case kTOP: { - return rbs_bases_top(rbs_location_current_token(state)); - } - case kVOID: { - return rbs_bases_void(rbs_location_current_token(state)); - } - case kUNTYPED: { - return rbs_bases_any(false, rbs_location_current_token(state)); - } - case k__TODO__: { - return rbs_bases_any(true, rbs_location_current_token(state)); - } - case tINTEGER: { - VALUE literal = rb_funcall( - string_of_loc(state, state->current_token.range.start, state->current_token.range.end), - rb_intern("to_i"), - 0 - ); - return rbs_literal( - literal, - rbs_location_current_token(state) - ); - } - case kTRUE: { - return rbs_literal(Qtrue, rbs_location_current_token(state)); - } - case kFALSE: { - return rbs_literal(Qfalse, rbs_location_current_token(state)); - } - case tSQSTRING: - case tDQSTRING: { - VALUE literal = rbs_unquote_string(state, state->current_token.range, 0); - return rbs_literal( - literal, - rbs_location_current_token(state) - ); - } - case tSYMBOL: - case tSQSYMBOL: - case tDQSYMBOL: { - return parse_symbol(state); - } - case tUIDENT: { - const char *name_str = peek_token(state->lexstate, state->current_token); - size_t name_len = token_bytes(state->current_token); - - rbs_constant_id_t name = rbs_constant_pool_find(&state->constant_pool, (const uint8_t *) name_str, name_len); - - if (parser_typevar_member(state, name)) { - ID name = rb_intern3(name_str, name_len, rb_enc_get(state->lexstate->string)); - return rbs_variable(ID2SYM(name), rbs_location_current_token(state)); - } - // fallthrough for type name - } - case tULIDENT: // fallthrough - case tLIDENT: // fallthrough - case pCOLON2: { - return parse_instance_type(state, true); - } - case kSINGLETON: { - return parse_singleton_type(state); - } - case pLBRACKET: { - range rg; - rg.start = state->current_token.range.start; - VALUE types = EMPTY_ARRAY; - if (state->next_token.type != pRBRACKET) { - parse_type_list(state, pRBRACKET, &types); - } - parser_advance_assert(state, pRBRACKET); - rg.end = state->current_token.range.end; - - return rbs_tuple(types, rbs_new_location(state->buffer, rg)); - } - case pAREF_OPR: { - return rbs_tuple(EMPTY_ARRAY, rbs_new_location(state->buffer, state->current_token.range)); - } - case pLBRACE: { - position start = state->current_token.range.start; - VALUE fields = parse_record_attributes(state); - parser_advance_assert(state, pRBRACE); - position end = state->current_token.range.end; - VALUE location = rbs_location_pp(state->buffer, &start, &end); - return rbs_record(fields, location); - } - case pHAT: { - return parse_proc_type(state); - } - default: - raise_syntax_error( - state, - state->current_token, - "unexpected token for simple type" - ); - } -} - -/* - intersection ::= {} optional `&` ... '&' - | {} -*/ -static VALUE parse_intersection(parserstate *state) { - position start = state->next_token.range.start; - VALUE type = parse_optional(state); - if (state->next_token.type != pAMP) { - return type; - } - - VALUE intersection_types = rb_ary_new(); - rb_ary_push(intersection_types, type); - while (state->next_token.type == pAMP) { - parser_advance(state); - rb_ary_push(intersection_types, parse_optional(state)); - } - range rg = (range) { - .start = start, - .end = state->current_token.range.end, - }; - VALUE location = rbs_new_location(state->buffer, rg); - return rbs_intersection(intersection_types, location); -} - -/* - union ::= {} intersection '|' ... '|' - | {} -*/ -VALUE parse_type(parserstate *state) { - position start = state->next_token.range.start; - VALUE type = parse_intersection(state); - if (state->next_token.type != pBAR) { - return type; - } - - VALUE union_types = rb_ary_new(); - rb_ary_push(union_types, type); - while (state->next_token.type == pBAR) { - parser_advance(state); - rb_ary_push(union_types, parse_intersection(state)); - } - range rg = (range) { - .start = start, - .end = state->current_token.range.end, - }; - VALUE location = rbs_new_location(state->buffer, rg); - return rbs_union(union_types, location); -} - -/* - type_params ::= {} `[` type_param `,` ... <`]`> - | {<>} - - type_param ::= kUNCHECKED? (kIN|kOUT|) tUIDENT upper_bound? default_type? (module_type_params == true) - - type_param ::= tUIDENT upper_bound? default_type? (module_type_params == false) -*/ -static VALUE parse_type_params(parserstate *state, range *rg, bool module_type_params) { - VALUE params = EMPTY_ARRAY; - - bool required_param_allowed = true; - - if (state->next_token.type == pLBRACKET) { - parser_advance(state); - - rg->start = state->current_token.range.start; - - while (true) { - VALUE unchecked = Qfalse; - VALUE variance = ID2SYM(rb_intern("invariant")); - VALUE upper_bound = Qnil; - VALUE default_type = Qnil; - - range param_range; - param_range.start = state->next_token.range.start; - - range variance_range = NULL_RANGE; - range unchecked_range = NULL_RANGE; - if (module_type_params) { - if (state->next_token.type == kUNCHECKED) { - unchecked = Qtrue; - parser_advance(state); - unchecked_range = state->current_token.range; - } - - if (state->next_token.type == kIN || state->next_token.type == kOUT) { - switch (state->next_token.type) { - case kIN: - variance = ID2SYM(rb_intern("contravariant")); - break; - case kOUT: - variance = ID2SYM(rb_intern("covariant")); - break; - default: - rbs_abort(); - } - - parser_advance(state); - variance_range = state->current_token.range; - } - } - - parser_advance_assert(state, tUIDENT); - range name_range = state->current_token.range; - - rbs_constant_id_t id = rbs_constant_pool_insert_shared( - &state->constant_pool, - (const uint8_t *) peek_token(state->lexstate, state->current_token), - token_bytes(state->current_token) - ); - - VALUE name = ID2SYM(INTERN_TOKEN(state, state->current_token)); - - parser_insert_typevar(state, id); - - range upper_bound_range = NULL_RANGE; - if (state->next_token.type == pLT) { - parser_advance(state); - upper_bound_range.start = state->current_token.range.start; - upper_bound = parse_type(state); - upper_bound_range.end = state->current_token.range.end; - } - - range default_type_range = NULL_RANGE; - if (module_type_params) { - if (state->next_token.type == pEQ) { - parser_advance(state); - - default_type_range.start = state->current_token.range.start; - default_type = parse_type(state); - default_type_range.end = state->current_token.range.end; - - required_param_allowed = false; - } else { - if (!required_param_allowed) { - raise_syntax_error( - state, - state->current_token, - "required type parameter is not allowed after optional type parameter" - ); - } - } - } - - param_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, param_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 5); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_optional_child(loc, INTERN("variance"), variance_range); - rbs_loc_add_optional_child(loc, INTERN("unchecked"), unchecked_range); - rbs_loc_add_optional_child(loc, INTERN("upper_bound"), upper_bound_range); - rbs_loc_add_optional_child(loc, INTERN("default"), default_type_range); - - VALUE param = rbs_ast_type_param(name, variance, upper_bound, default_type, unchecked, location); - - melt_array(¶ms); - rb_ary_push(params, param); - - if (state->next_token.type == pCOMMA) { - parser_advance(state); - } - - if (state->next_token.type == pRBRACKET) { - break; - } - } - - parser_advance_assert(state, pRBRACKET); - rg->end = state->current_token.range.end; - } else { - *rg = NULL_RANGE; - } - - rb_funcall( - RBS_AST_TypeParam, - rb_intern("resolve_variables"), - 1, - params - ); - - return params; -} - -/* - method_type ::= {} type_params - */ -VALUE parse_method_type(parserstate *state) { - parser_push_typevar_table(state, false); - - range rg; - rg.start = state->next_token.range.start; - - range params_range = NULL_RANGE; - VALUE type_params = parse_type_params(state, ¶ms_range, false); - - range type_range; - type_range.start = state->next_token.range.start; - - VALUE function = Qnil; - VALUE block = Qnil; - parse_function(state, &function, &block, NULL); - - rg.end = state->current_token.range.end; - type_range.end = rg.end; - - parser_pop_typevar_table(state); - - VALUE location = rbs_new_location(state->buffer, rg); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 2); - rbs_loc_add_required_child(loc, INTERN("type"), type_range); - rbs_loc_add_optional_child(loc, INTERN("type_params"), params_range); - - return rbs_method_type( - type_params, - function, - block, - location - ); -} - -/* - global_decl ::= {tGIDENT} `:` -*/ -static VALUE parse_global_decl(parserstate *state, VALUE annotations) { - range decl_range; - decl_range.start = state->current_token.range.start; - - VALUE comment = get_comment(state, decl_range.start.line); - range name_range = state->current_token.range; - VALUE typename = ID2SYM(INTERN_TOKEN(state, state->current_token)); - - parser_advance_assert(state, pCOLON); - range colon_range = state->current_token.range; - - VALUE type = parse_type(state); - decl_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, decl_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 2); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_required_child(loc, INTERN("colon"), colon_range); - - return rbs_ast_decl_global(typename, type, location, comment, annotations); -} - -/* - const_decl ::= {const_name} `:` -*/ -static VALUE parse_const_decl(parserstate *state, VALUE annotations) { - range decl_range; - - decl_range.start = state->current_token.range.start; - VALUE comment = get_comment(state, decl_range.start.line); - - range name_range; - VALUE typename = parse_type_name(state, CLASS_NAME, &name_range); - - parser_advance_assert(state, pCOLON); - range colon_range = state->current_token.range; - - VALUE type = parse_type(state); - decl_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, decl_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 2); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_required_child(loc, INTERN("colon"), colon_range); - - return rbs_ast_decl_constant(typename, type, location, comment, annotations); -} - -/* - type_decl ::= {kTYPE} alias_name `=` -*/ -static VALUE parse_type_decl(parserstate *state, position comment_pos, VALUE annotations) { - parser_push_typevar_table(state, true); - - range decl_range; - decl_range.start = state->current_token.range.start; - comment_pos = nonnull_pos_or(comment_pos, decl_range.start); - - range keyword_range = state->current_token.range; - - parser_advance(state); - - range name_range; - VALUE typename = parse_type_name(state, ALIAS_NAME, &name_range); - - range params_range; - VALUE type_params = parse_type_params(state, ¶ms_range, true); - - parser_advance_assert(state, pEQ); - range eq_range = state->current_token.range; - - VALUE type = parse_type(state); - decl_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, decl_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 4); - rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_optional_child(loc, INTERN("type_params"), params_range); - rbs_loc_add_required_child(loc, INTERN("eq"), eq_range); - - parser_pop_typevar_table(state); - - return rbs_ast_decl_type_alias( - typename, - type_params, - type, - annotations, - location, - get_comment(state, comment_pos.line) - ); -} - -/* - annotation ::= {} -*/ -static VALUE parse_annotation(parserstate *state) { - VALUE content = rb_funcall(state->buffer, rb_intern("content"), 0); - rb_encoding *enc = rb_enc_get(content); - - range rg = state->current_token.range; - - int offset_bytes = rb_enc_codelen('%', enc) + rb_enc_codelen('a', enc); - - unsigned int open_char = rb_enc_mbc_to_codepoint( - RSTRING_PTR(state->lexstate->string) + rg.start.byte_pos + offset_bytes, - RSTRING_END(state->lexstate->string), - enc - ); - - unsigned int close_char; - - switch (open_char) { - case '{': - close_char = '}'; - break; - case '(': - close_char = ')'; - break; - case '[': - close_char = ']'; - break; - case '<': - close_char = '>'; - break; - case '|': - close_char = '|'; - break; - default: - rbs_abort(); - } - - int open_bytes = rb_enc_codelen(open_char, enc); - int close_bytes = rb_enc_codelen(close_char, enc); - - char *buffer = RSTRING_PTR(state->lexstate->string) + rg.start.byte_pos + offset_bytes + open_bytes; - VALUE string = rb_enc_str_new( - buffer, - rg.end.byte_pos - rg.start.byte_pos - offset_bytes - open_bytes - close_bytes, - enc - ); - rb_funcall(string, rb_intern("strip!"), 0); - - VALUE location = rbs_location_current_token(state); - - return rbs_ast_annotation(string, location); -} - -/* - annotations ::= {} annotation ... - | {<>} -*/ -static void parse_annotations(parserstate *state, VALUE *annotations, position *annot_pos) { - *annot_pos = NullPosition; - - while (true) { - if (state->next_token.type == tANNOTATION) { - parser_advance(state); - - if (null_position_p((*annot_pos))) { - *annot_pos = state->current_token.range.start; - } - - melt_array(annotations); - rb_ary_push(*annotations, parse_annotation(state)); - } else { - break; - } - } -} - -/* - method_name ::= {} - | {} (IDENT | keyword)~<`?`> -*/ -static VALUE parse_method_name(parserstate *state, range *range) { - parser_advance(state); - - switch (state->current_token.type) - { - case tUIDENT: - case tLIDENT: - case tULIDENT: - case tULLIDENT: - KEYWORD_CASES - if (state->next_token.type == pQUESTION && state->current_token.range.end.byte_pos == state->next_token.range.start.byte_pos) { - range->start = state->current_token.range.start; - range->end = state->next_token.range.end; - parser_advance(state); - - ID id = rb_intern3( - RSTRING_PTR(state->lexstate->string) + range->start.byte_pos, - range->end.byte_pos - range->start.byte_pos, - rb_enc_get(state->lexstate->string) - ); - - return ID2SYM(id); - } else { - *range = state->current_token.range; - return ID2SYM(INTERN_TOKEN(state, state->current_token)); - } - - case tBANGIDENT: - case tEQIDENT: - *range = state->current_token.range; - return ID2SYM(INTERN_TOKEN(state, state->current_token)); - - case tQIDENT: { - return rb_to_symbol(rbs_unquote_string(state, state->current_token.range, 0)); - } - - case pBAR: - case pHAT: - case pAMP: - case pSTAR: - case pSTAR2: - case pLT: - case pAREF_OPR: - case tOPERATOR: - *range = state->current_token.range; - return ID2SYM(INTERN_TOKEN(state, state->current_token)); - - default: - raise_syntax_error( - state, - state->current_token, - "unexpected token for method name" - ); - } -} - -typedef enum { - INSTANCE_KIND, - SINGLETON_KIND, - INSTANCE_SINGLETON_KIND -} InstanceSingletonKind; - -/* - instance_singleton_kind ::= {<>} - | {} kSELF <`.`> - | {} kSELF~`?` <`.`> - - @param allow_selfq `true` to accept `self?` kind. -*/ -static InstanceSingletonKind parse_instance_singleton_kind(parserstate *state, bool allow_selfq, range *rg) { - InstanceSingletonKind kind = INSTANCE_KIND; - - if (state->next_token.type == kSELF) { - range self_range = state->next_token.range; - - if (state->next_token2.type == pDOT) { - parser_advance(state); - parser_advance(state); - kind = SINGLETON_KIND; - } else if ( - state->next_token2.type == pQUESTION - && state->next_token.range.end.char_pos == state->next_token2.range.start.char_pos - && state->next_token3.type == pDOT - && allow_selfq) { - parser_advance(state); - parser_advance(state); - parser_advance(state); - kind = INSTANCE_SINGLETON_KIND; - } - - *rg = (range) { - .start = self_range.start, - .end = state->current_token.range.end, - }; - } else { - *rg = NULL_RANGE; - } - - return kind; -} - -/** - * def_member ::= {kDEF} method_name `:` - * | {kPRIVATE} kDEF method_name `:` - * | {kPUBLIC} kDEF method_name `:` - * - * method_types ::= {} - * | {} <`...`> - * | {} method_type `|` - * - * @param instance_only `true` to reject singleton method definition. - * @param accept_overload `true` to accept overloading (...) definition. - * */ -static VALUE parse_member_def(parserstate *state, bool instance_only, bool accept_overload, position comment_pos, VALUE annotations) { - range member_range; - member_range.start = state->current_token.range.start; - comment_pos = nonnull_pos_or(comment_pos, member_range.start); - - VALUE comment = get_comment(state, comment_pos.line); - - range visibility_range; - VALUE visibility; - switch (state->current_token.type) - { - case kPRIVATE: { - visibility_range = state->current_token.range; - visibility = ID2SYM(rb_intern("private")); - member_range.start = visibility_range.start; - parser_advance(state); - break; - } - case kPUBLIC: { - visibility_range = state->current_token.range; - visibility = ID2SYM(rb_intern("public")); - member_range.start = visibility_range.start; - parser_advance(state); - break; - } - default: - visibility_range = NULL_RANGE; - visibility = Qnil; - break; - } - - range keyword_range = state->current_token.range; - - range kind_range; - InstanceSingletonKind kind; - if (instance_only) { - kind_range = NULL_RANGE; - kind = INSTANCE_KIND; - } else { - kind = parse_instance_singleton_kind(state, NIL_P(visibility), &kind_range); - } - - range name_range; - VALUE name = parse_method_name(state, &name_range); - VALUE overloads = rb_ary_new(); - VALUE overloading = Qfalse; - - if (state->next_token.type == pDOT && RB_SYM2ID(name) == rb_intern("self?")) { - raise_syntax_error( - state, - state->next_token, - "`self?` method cannot have visibility" - ); - } else { - parser_advance_assert(state, pCOLON); - } - - parser_push_typevar_table(state, kind != INSTANCE_KIND); - - range overloading_range = NULL_RANGE; - bool loop = true; - while (loop) { - VALUE annotations = EMPTY_ARRAY; - position overload_annot_pos = NullPosition; - - if (state->next_token.type == tANNOTATION) { - parse_annotations(state, &annotations, &overload_annot_pos); - } - - switch (state->next_token.type) { - case pLPAREN: - case pARROW: - case pLBRACE: - case pLBRACKET: - case pQUESTION: - { - VALUE method_type = parse_method_type(state); - rb_ary_push(overloads, rbs_ast_members_method_definition_overload(annotations, method_type)); - member_range.end = state->current_token.range.end; - break; - } - - case pDOT3: - if (accept_overload) { - overloading = Qtrue; - parser_advance(state); - loop = false; - overloading_range = state->current_token.range; - member_range.end = overloading_range.end; - break; - } else { - raise_syntax_error( - state, - state->next_token, - "unexpected overloading method definition" - ); - } - - default: - raise_syntax_error( - state, - state->next_token, - "unexpected token for method type" - ); - } - - if (state->next_token.type == pBAR) { - parser_advance(state); - } else { - loop = false; - } - } - - parser_pop_typevar_table(state); - - VALUE k; - switch (kind) { - case INSTANCE_KIND: - k = ID2SYM(rb_intern("instance")); - break; - case SINGLETON_KIND: - k = ID2SYM(rb_intern("singleton")); - break; - case INSTANCE_SINGLETON_KIND: - k = ID2SYM(rb_intern("singleton_instance")); - break; - default: - rbs_abort(); - } - - VALUE location = rbs_new_location(state->buffer, member_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 5); - rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_optional_child(loc, INTERN("kind"), kind_range); - rbs_loc_add_optional_child(loc, INTERN("overloading"), overloading_range); - rbs_loc_add_optional_child(loc, INTERN("visibility"), visibility_range); - - return rbs_ast_members_method_definition( - name, - k, - overloads, - annotations, - location, - comment, - overloading, - visibility - ); -} - -/** - * class_instance_name ::= {} - * | {} class_name `[` type args <`]`> - * - * @param kind - * */ -void class_instance_name(parserstate *state, TypeNameKind kind, VALUE *name, VALUE *args, range *name_range, range *args_range) { - parser_advance(state); - - *name = parse_type_name(state, kind, name_range); - - if (state->next_token.type == pLBRACKET) { - parser_advance(state); - args_range->start = state->current_token.range.start; - parse_type_list(state, pRBRACKET, args); - parser_advance_assert(state, pRBRACKET); - args_range->end = state->current_token.range.end; - } else { - *args_range = NULL_RANGE; - } -} - -/** - * mixin_member ::= {kINCLUDE} - * | {kPREPEND} - * | {kEXTEND} - * - * @param from_interface `true` when the member is in an interface. - * */ -static VALUE parse_mixin_member(parserstate *state, bool from_interface, position comment_pos, VALUE annotations) { - range member_range; - member_range.start = state->current_token.range.start; - comment_pos = nonnull_pos_or(comment_pos, member_range.start); - - enum TokenType type = state->current_token.type; - range keyword_range = state->current_token.range; - - bool reset_typevar_scope; - switch (type) - { - case kINCLUDE: - reset_typevar_scope = false; - break; - case kEXTEND: - reset_typevar_scope = true; - break; - case kPREPEND: - reset_typevar_scope = false; - break; - default: - rbs_abort(); - } - - if (from_interface) { - if (state->current_token.type != kINCLUDE) { - raise_syntax_error( - state, - state->current_token, - "unexpected mixin in interface declaration" - ); - } - } - - parser_push_typevar_table(state, reset_typevar_scope); - - VALUE name; - VALUE args = EMPTY_ARRAY; - range name_range; - range args_range = NULL_RANGE; - class_instance_name( - state, - from_interface ? INTERFACE_NAME : (INTERFACE_NAME | CLASS_NAME), - &name, &args, &name_range, &args_range - ); - - parser_pop_typevar_table(state); - - member_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, member_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 3); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); - rbs_loc_add_optional_child(loc, INTERN("args"), args_range); - - VALUE comment = get_comment(state, comment_pos.line); - switch (type) - { - case kINCLUDE: - return rbs_ast_members_include(name, args, annotations, location, comment); - case kEXTEND: - return rbs_ast_members_extend(name, args, annotations, location, comment); - case kPREPEND: - return rbs_ast_members_prepend(name, args, annotations, location, comment); - default: - rbs_abort(); - } -} - -/** - * @code - * alias_member ::= {kALIAS} method_name - * | {kALIAS} kSELF `.` method_name kSELF `.` - * @endcode - * - * @param[in] instance_only `true` to reject `self.` alias. - * */ -static VALUE parse_alias_member(parserstate *state, bool instance_only, position comment_pos, VALUE annotations) { - range member_range; - member_range.start = state->current_token.range.start; - range keyword_range = state->current_token.range; - - comment_pos = nonnull_pos_or(comment_pos, member_range.start); - VALUE comment = get_comment(state, comment_pos.line); - - VALUE kind, new_name, old_name; - range new_kind_range, old_kind_range, new_name_range, old_name_range; - if (!instance_only && state->next_token.type == kSELF) { - kind = ID2SYM(rb_intern("singleton")); - - new_kind_range.start = state->next_token.range.start; - new_kind_range.end = state->next_token2.range.end; - parser_advance_assert(state, kSELF); - parser_advance_assert(state, pDOT); - new_name = parse_method_name(state, &new_name_range); - - old_kind_range.start = state->next_token.range.start; - old_kind_range.end = state->next_token2.range.end; - parser_advance_assert(state, kSELF); - parser_advance_assert(state, pDOT); - old_name = parse_method_name(state, &old_name_range); - } else { - kind = ID2SYM(rb_intern("instance")); - new_name = parse_method_name(state, &new_name_range); - old_name = parse_method_name(state, &old_name_range); - - new_kind_range = NULL_RANGE; - old_kind_range = NULL_RANGE; - } - - member_range.end = state->current_token.range.end; - VALUE location = rbs_new_location(state->buffer, member_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 5); - rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); - rbs_loc_add_required_child(loc, INTERN("new_name"), new_name_range); - rbs_loc_add_required_child(loc, INTERN("old_name"), old_name_range); - rbs_loc_add_optional_child(loc, INTERN("new_kind"), new_kind_range); - rbs_loc_add_optional_child(loc, INTERN("old_kind"), old_kind_range); - - return rbs_ast_members_alias( - new_name, - old_name, - kind, - annotations, - location, - comment - ); -} - -/* - variable_member ::= {tAIDENT} `:` - | {kSELF} `.` tAIDENT `:` - | {tA2IDENT} `:` -*/ -static VALUE parse_variable_member(parserstate *state, position comment_pos, VALUE annotations) { - if (rb_array_len(annotations) > 0) { - raise_syntax_error( - state, - state->current_token, - "annotation cannot be given to variable members" - ); - } - - range member_range; - member_range.start = state->current_token.range.start; - comment_pos = nonnull_pos_or(comment_pos, member_range.start); - VALUE comment = get_comment(state, comment_pos.line); - - switch (state->current_token.type) - { - case tAIDENT: { - range name_range = state->current_token.range; - VALUE name = ID2SYM(INTERN_TOKEN(state, state->current_token)); - - parser_advance_assert(state, pCOLON); - range colon_range = state->current_token.range; - - VALUE type = parse_type(state); - member_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, member_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 3); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_required_child(loc, INTERN("colon"), colon_range); - rbs_loc_add_optional_child(loc, INTERN("kind"), NULL_RANGE); - - return rbs_ast_members_instance_variable(name, type, location, comment); - } - case tA2IDENT: { - range name_range = state->current_token.range; - VALUE name = ID2SYM(INTERN_TOKEN(state, state->current_token)); - - parser_advance_assert(state, pCOLON); - range colon_range = state->current_token.range; - - parser_push_typevar_table(state, true); - VALUE type = parse_type(state); - parser_pop_typevar_table(state); - member_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, member_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 3); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_required_child(loc, INTERN("colon"), colon_range); - rbs_loc_add_optional_child(loc, INTERN("kind"), NULL_RANGE); - - return rbs_ast_members_class_variable(name, type, location, comment); - } - case kSELF: { - range kind_range = { - .start = state->current_token.range.start, - .end = state->next_token.range.end - }; - - parser_advance_assert(state, pDOT); - parser_advance_assert(state, tAIDENT); - - range name_range = state->current_token.range; - VALUE name = ID2SYM(INTERN_TOKEN(state, state->current_token)); - - parser_advance_assert(state, pCOLON); - range colon_range = state->current_token.range; - - parser_push_typevar_table(state, true); - VALUE type = parse_type(state); - parser_pop_typevar_table(state); - member_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, member_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 3); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_required_child(loc, INTERN("colon"), colon_range); - rbs_loc_add_optional_child(loc, INTERN("kind"), kind_range); - - return rbs_ast_members_class_instance_variable(name, type, location, comment); - } - default: - rbs_abort(); - } -} - -/* - visibility_member ::= {<`public`>} - | {<`private`>} -*/ -static VALUE parse_visibility_member(parserstate *state, VALUE annotations) { - if (rb_array_len(annotations) > 0) { - raise_syntax_error( - state, - state->current_token, - "annotation cannot be given to visibility members" - ); - } - - VALUE location = rbs_new_location(state->buffer, state->current_token.range); - - switch (state->current_token.type) - { - case kPUBLIC: - return rbs_ast_members_public(location); - case kPRIVATE: - return rbs_ast_members_private(location); - default: - rbs_abort(); - } -} - -/* - attribute_member ::= {attr_keyword} attr_name attr_var `:` - | {visibility} attr_keyword attr_name attr_var `:` - | {attr_keyword} `self` `.` attr_name attr_var `:` - | {visibility} attr_keyword `self` `.` attr_name attr_var `:` - - attr_keyword ::= `attr_reader` | `attr_writer` | `attr_accessor` - - visibility ::= `public` | `private` - - attr_var ::= # empty - | `(` tAIDENT `)` # Ivar name - | `(` `)` # No variable -*/ -static VALUE parse_attribute_member(parserstate *state, position comment_pos, VALUE annotations) { - range member_range; - member_range.start = state->current_token.range.start; - comment_pos = nonnull_pos_or(comment_pos, member_range.start); - VALUE comment = get_comment(state, comment_pos.line); - - VALUE visibility; - range visibility_range; - switch (state->current_token.type) - { - case kPRIVATE: - visibility = ID2SYM(rb_intern("private")); - visibility_range = state->current_token.range; - parser_advance(state); - break; - case kPUBLIC: - visibility = ID2SYM(rb_intern("public")); - visibility_range = state->current_token.range; - parser_advance(state); - break; - default: - visibility = Qnil; - visibility_range = NULL_RANGE; - break; - } - - enum TokenType attr_type = state->current_token.type; - range keyword_range = state->current_token.range; - - range kind_range; - InstanceSingletonKind is_kind = parse_instance_singleton_kind(state, false, &kind_range); - VALUE kind = ID2SYM(rb_intern((is_kind == INSTANCE_KIND) ? "instance" : "singleton")); - - range name_range; - VALUE attr_name = parse_method_name(state, &name_range); - - VALUE ivar_name; - range ivar_range, ivar_name_range; - if (state->next_token.type == pLPAREN) { - parser_advance_assert(state, pLPAREN); - ivar_range.start = state->current_token.range.start; - - if (parser_advance_if(state, tAIDENT)) { - ivar_name = ID2SYM(INTERN_TOKEN(state, state->current_token)); - ivar_name_range = state->current_token.range; - } else { - ivar_name = Qfalse; - ivar_name_range = NULL_RANGE; - } - - parser_advance_assert(state, pRPAREN); - ivar_range.end = state->current_token.range.end; - } else { - ivar_range = NULL_RANGE; - ivar_name = Qnil; - ivar_name_range = NULL_RANGE; - } - - parser_advance_assert(state, pCOLON); - range colon_range = state->current_token.range; - - parser_push_typevar_table(state, is_kind == SINGLETON_KIND); - VALUE type = parse_type(state); - parser_pop_typevar_table(state); - member_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, member_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 7); - rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_required_child(loc, INTERN("colon"), colon_range); - rbs_loc_add_optional_child(loc, INTERN("kind"), kind_range); - rbs_loc_add_optional_child(loc, INTERN("ivar"), ivar_range); - rbs_loc_add_optional_child(loc, INTERN("ivar_name"), ivar_name_range); - rbs_loc_add_optional_child(loc, INTERN("visibility"), visibility_range); - - switch (attr_type) - { - case kATTRREADER: - return rbs_ast_members_attr_reader(attr_name, type, ivar_name, kind, annotations, location, comment, visibility); - case kATTRWRITER: - return rbs_ast_members_attr_writer(attr_name, type, ivar_name, kind, annotations, location, comment, visibility); - case kATTRACCESSOR: - return rbs_ast_members_attr_accessor(attr_name, type, ivar_name, kind, annotations, location, comment, visibility); - default: - rbs_abort(); - } -} - -/* - interface_members ::= {} ... kEND - - interface_member ::= def_member (instance method only && no overloading) - | mixin_member (interface only) - | alias_member (instance only) -*/ -static VALUE parse_interface_members(parserstate *state) { - VALUE members = EMPTY_ARRAY; - - while (state->next_token.type != kEND) { - VALUE annotations = EMPTY_ARRAY; - position annot_pos = NullPosition; - - parse_annotations(state, &annotations, &annot_pos); - - parser_advance(state); - - VALUE member; - switch (state->current_token.type) { - case kDEF: { - member = parse_member_def(state, true, true, annot_pos, annotations); - break; - } - - case kINCLUDE: - case kEXTEND: - case kPREPEND: { - member = parse_mixin_member(state, true, annot_pos, annotations); - break; - } - - case kALIAS: { - member = parse_alias_member(state, true, annot_pos, annotations); - break; - } - - default: - raise_syntax_error( - state, - state->current_token, - "unexpected token for interface declaration member" - ); - } - - melt_array(&members); - rb_ary_push(members, member); - } - - return members; -} - -/* - interface_decl ::= {`interface`} interface_name module_type_params interface_members -*/ -static VALUE parse_interface_decl(parserstate *state, position comment_pos, VALUE annotations) { - parser_push_typevar_table(state, true); - - range member_range; - member_range.start = state->current_token.range.start; - comment_pos = nonnull_pos_or(comment_pos, member_range.start); - - range keyword_range = state->current_token.range; - - parser_advance(state); - - range name_range; - VALUE name = parse_type_name(state, INTERFACE_NAME, &name_range); - range type_params_range; - VALUE params = parse_type_params(state, &type_params_range, true); - VALUE members = parse_interface_members(state); - - parser_advance_assert(state, kEND); - range end_range = state->current_token.range; - member_range.end = end_range.end; - - parser_pop_typevar_table(state); - - VALUE location = rbs_new_location(state->buffer, member_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 4); - rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_required_child(loc, INTERN("end"), end_range); - rbs_loc_add_optional_child(loc, INTERN("type_params"), type_params_range); - - return rbs_ast_decl_interface( - name, - params, - members, - annotations, - location, - get_comment(state, comment_pos.line) - ); -} - -/* - module_self_types ::= {`:`} module_self_type `,` ... `,` - - module_self_type ::= - | module_name `[` type_list <`]`> -*/ -static void parse_module_self_types(parserstate *state, VALUE *array) { - while (true) { - parser_advance(state); - - range self_range; - self_range.start = state->current_token.range.start; - range name_range; - VALUE module_name = parse_type_name(state, CLASS_NAME | INTERFACE_NAME, &name_range); - self_range.end = name_range.end; - - VALUE args = EMPTY_ARRAY; - range args_range = NULL_RANGE; - if (state->next_token.type == pLBRACKET) { - parser_advance(state); - args_range.start = state->current_token.range.start; - parse_type_list(state, pRBRACKET, &args); - parser_advance(state); - self_range.end = args_range.end = state->current_token.range.end; - } - - VALUE location = rbs_new_location(state->buffer, self_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 2); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_optional_child(loc, INTERN("args"), args_range); - - VALUE self_type = rbs_ast_decl_module_self(module_name, args, location); - melt_array(array); - rb_ary_push(*array, self_type); - - if (state->next_token.type == pCOMMA) { - parser_advance(state); - } else { - break; - } - } -} - -static VALUE parse_nested_decl(parserstate *state, const char *nested_in, position annot_pos, VALUE annotations); - -/* - module_members ::= {} ... kEND - - module_member ::= def_member - | variable_member - | mixin_member - | alias_member - | attribute_member - | `public` - | `private` -*/ -static VALUE parse_module_members(parserstate *state) { - VALUE members = EMPTY_ARRAY; - - while (state->next_token.type != kEND) { - VALUE annotations = EMPTY_ARRAY; - position annot_pos = NullPosition; - parse_annotations(state, &annotations, &annot_pos); - - parser_advance(state); - - VALUE member; - switch (state->current_token.type) - { - case kDEF: { - member = parse_member_def(state, false, true, annot_pos, annotations); - break; - } - - case kINCLUDE: - case kEXTEND: - case kPREPEND: { - member = parse_mixin_member(state, false, annot_pos, annotations); - break; - } - - case kALIAS: { - member = parse_alias_member(state, false, annot_pos, annotations); - break; - } - - case tAIDENT: - case tA2IDENT: - case kSELF: { - member = parse_variable_member(state, annot_pos, annotations); - break; - } - - case kATTRREADER: - case kATTRWRITER: - case kATTRACCESSOR: { - member = parse_attribute_member(state, annot_pos, annotations); - break; - } - - case kPUBLIC: - case kPRIVATE: - if (state->next_token.range.start.line == state->current_token.range.start.line) { - switch (state->next_token.type) - { - case kDEF: { - member = parse_member_def(state, false, true, annot_pos, annotations); - break; - } - case kATTRREADER: - case kATTRWRITER: - case kATTRACCESSOR: { - member = parse_attribute_member(state, annot_pos, annotations); - break; - } - default: - raise_syntax_error(state, state->next_token, "method or attribute definition is expected after visibility modifier"); - } - } else { - member = parse_visibility_member(state, annotations); - } - break; - - default: - member = parse_nested_decl(state, "module", annot_pos, annotations); - break; - } - - melt_array(&members); - rb_ary_push(members, member); - } - - return members; -} - -/* - module_decl ::= {module_name} module_type_params module_members - | {module_name} module_name module_type_params `:` module_self_types module_members -*/ -static VALUE parse_module_decl0(parserstate *state, range keyword_range, VALUE module_name, range name_range, VALUE comment, VALUE annotations) { - parser_push_typevar_table(state, true); - - range decl_range; - decl_range.start = keyword_range.start; - range type_params_range; - VALUE type_params = parse_type_params(state, &type_params_range, true); - - VALUE self_types = EMPTY_ARRAY; - range colon_range; - range self_types_range; - if (state->next_token.type == pCOLON) { - parser_advance(state); - colon_range = state->current_token.range; - self_types_range.start = state->next_token.range.start; - parse_module_self_types(state, &self_types); - self_types_range.end = state->current_token.range.end; - } else { - colon_range = NULL_RANGE; - self_types_range = NULL_RANGE; - } - - VALUE members = parse_module_members(state); - - parser_advance_assert(state, kEND); - range end_range = state->current_token.range; - decl_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, decl_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 6); - rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_required_child(loc, INTERN("end"), end_range); - rbs_loc_add_optional_child(loc, INTERN("type_params"), type_params_range); - rbs_loc_add_optional_child(loc, INTERN("colon"), colon_range); - rbs_loc_add_optional_child(loc, INTERN("self_types"), self_types_range); - - parser_pop_typevar_table(state); - - return rbs_ast_decl_module( - module_name, - type_params, - self_types, - members, - annotations, - location, - comment - ); -} - -/* - module_decl ::= {`module`} module_name `=` old_module_name - | {`module`} module_name module_decl0 - -*/ -static VALUE parse_module_decl(parserstate *state, position comment_pos, VALUE annotations) { - range keyword_range = state->current_token.range; - - comment_pos = nonnull_pos_or(comment_pos, state->current_token.range.start); - VALUE comment = get_comment(state, comment_pos.line); - - parser_advance(state); - range module_name_range; - VALUE module_name = parse_type_name(state, CLASS_NAME, &module_name_range); - - if (state->next_token.type == pEQ) { - range eq_range = state->next_token.range; - parser_advance(state); - parser_advance(state); - - range old_name_range; - VALUE old_name = parse_type_name(state, CLASS_NAME, &old_name_range); - - range decl_range = { - .start = keyword_range.start, - .end = old_name_range.end - }; - - VALUE location = rbs_new_location(state->buffer, decl_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 4); - rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); - rbs_loc_add_required_child(loc, INTERN("new_name"), module_name_range); - rbs_loc_add_required_child(loc, INTERN("eq"), eq_range); - rbs_loc_add_optional_child(loc, INTERN("old_name"), old_name_range); - - return rbs_ast_decl_module_alias(module_name, old_name, location, comment, annotations); - } else { - return parse_module_decl0(state, keyword_range, module_name, module_name_range, comment, annotations); - } -} - -/* - class_decl_super ::= {} `<` - | {<>} -*/ -static VALUE parse_class_decl_super(parserstate *state, range *lt_range) { - if (parser_advance_if(state, pLT)) { - *lt_range = state->current_token.range; - - range super_range; - super_range.start = state->next_token.range.start; - - VALUE name; - VALUE args = EMPTY_ARRAY; - range name_range, args_range; - class_instance_name(state, CLASS_NAME, &name, &args, &name_range, &args_range); - - super_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, super_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 2); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_optional_child(loc, INTERN("args"), args_range); - - return rbs_ast_decl_class_super(name, args, location); - } else { - *lt_range = NULL_RANGE; - return Qnil; - } -} - -/* - class_decl ::= {class_name} type_params class_decl_super class_members <`end`> -*/ -static VALUE parse_class_decl0(parserstate *state, range keyword_range, VALUE name, range name_range, VALUE comment, VALUE annotations) { - parser_push_typevar_table(state, true); - - range decl_range; - decl_range.start = keyword_range.start; - - range type_params_range; - VALUE type_params = parse_type_params(state, &type_params_range, true); - - range lt_range; - VALUE super = parse_class_decl_super(state, <_range); - - VALUE members = parse_module_members(state); - - parser_advance_assert(state, kEND); - - range end_range = state->current_token.range; - - decl_range.end = end_range.end; - - parser_pop_typevar_table(state); - - VALUE location = rbs_new_location(state->buffer, decl_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 5); - rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); - rbs_loc_add_required_child(loc, INTERN("name"), name_range); - rbs_loc_add_required_child(loc, INTERN("end"), end_range); - rbs_loc_add_optional_child(loc, INTERN("type_params"), type_params_range); - rbs_loc_add_optional_child(loc, INTERN("lt"), lt_range); - - return rbs_ast_decl_class( - name, - type_params, - super, - members, - annotations, - location, - comment - ); -} - -/* - class_decl ::= {`class`} class_name `=` - | {`class`} class_name -*/ -static VALUE parse_class_decl(parserstate *state, position comment_pos, VALUE annotations) { - range keyword_range = state->current_token.range; - - comment_pos = nonnull_pos_or(comment_pos, state->current_token.range.start); - VALUE comment = get_comment(state, comment_pos.line); - - parser_advance(state); - range class_name_range; - VALUE class_name = parse_type_name(state, CLASS_NAME, &class_name_range); - - if (state->next_token.type == pEQ) { - range eq_range = state->next_token.range; - parser_advance(state); - parser_advance(state); - - range old_name_range; - VALUE old_name = parse_type_name(state, CLASS_NAME, &old_name_range); - - range decl_range = { - .start = keyword_range.start, - .end = old_name_range.end, - }; - - VALUE location = rbs_new_location(state->buffer, decl_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 4); - rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); - rbs_loc_add_required_child(loc, INTERN("new_name"), class_name_range); - rbs_loc_add_required_child(loc, INTERN("eq"), eq_range); - rbs_loc_add_optional_child(loc, INTERN("old_name"), old_name_range); - - return rbs_ast_decl_class_alias(class_name, old_name, location, comment, annotations); - } else { - return parse_class_decl0(state, keyword_range, class_name, class_name_range, comment, annotations); - } -} - -/* - nested_decl ::= {} - | {} - | {} - | {} - | {} -*/ -static VALUE parse_nested_decl(parserstate *state, const char *nested_in, position annot_pos, VALUE annotations) { - parser_push_typevar_table(state, true); - - VALUE decl; - switch (state->current_token.type) { - case tUIDENT: - case pCOLON2: { - decl = parse_const_decl(state, annotations); - break; - } - case tGIDENT: { - decl = parse_global_decl(state, annotations); - break; - } - case kTYPE: { - decl = parse_type_decl(state, annot_pos, annotations); - break; - } - case kINTERFACE: { - decl = parse_interface_decl(state, annot_pos, annotations); - break; - } - case kMODULE: { - decl = parse_module_decl(state, annot_pos, annotations); - break; - } - case kCLASS: { - decl = parse_class_decl(state, annot_pos, annotations); - break; - } - default: - raise_syntax_error( - state, - state->current_token, - "unexpected token for class/module declaration member" - ); - } - - parser_pop_typevar_table(state); - - return decl; -} - -static VALUE parse_decl(parserstate *state) { - VALUE annotations = EMPTY_ARRAY; - position annot_pos = NullPosition; - - parse_annotations(state, &annotations, &annot_pos); - - parser_advance(state); - switch (state->current_token.type) { - case tUIDENT: - case pCOLON2: { - return parse_const_decl(state, annotations); - } - case tGIDENT: { - return parse_global_decl(state, annotations); - } - case kTYPE: { - return parse_type_decl(state, annot_pos, annotations); - } - case kINTERFACE: { - return parse_interface_decl(state, annot_pos, annotations); - } - case kMODULE: { - return parse_module_decl(state, annot_pos, annotations); - } - case kCLASS: { - return parse_class_decl(state, annot_pos, annotations); - } - default: - raise_syntax_error( - state, - state->current_token, - "cannot start a declaration" - ); - } -} - -/* - namespace ::= {} (`::`)? (`tUIDENT` `::`)* `tUIDENT` <`::`> - | {} <> (empty -- returns empty namespace) -*/ -static VALUE parse_namespace(parserstate *state, range *rg) { - bool is_absolute = false; - - if (state->next_token.type == pCOLON2) { - *rg = (range) { - .start = state->next_token.range.start, - .end = state->next_token.range.end, - }; - is_absolute = true; - - parser_advance(state); - } - - VALUE path = EMPTY_ARRAY; - - while (true) { - if (state->next_token.type == tUIDENT && state->next_token2.type == pCOLON2) { - melt_array(&path); - rb_ary_push(path, ID2SYM(INTERN_TOKEN(state, state->next_token))); - if (null_position_p(rg->start)) { - rg->start = state->next_token.range.start; - } - rg->end = state->next_token2.range.end; - parser_advance(state); - parser_advance(state); - } else { - break; - } - } - - return rbs_namespace(path, is_absolute ? Qtrue : Qfalse); -} - -/* - use_clauses ::= {} use_clause `,` ... `,` - - use_clause ::= {} namespace - | {} namespace tUIDENT `as` - | {} namespace -*/ -static void parse_use_clauses(parserstate *state, VALUE clauses) { - while (true) { - range namespace_range = NULL_RANGE; - VALUE namespace = parse_namespace(state, &namespace_range); - - switch (state->next_token.type) - { - case tLIDENT: - case tULIDENT: - case tUIDENT: { - parser_advance(state); - - enum TokenType ident_type = state->current_token.type; - - range type_name_range = null_range_p(namespace_range) - ? state->current_token.range - : (range) { .start = namespace_range.start, .end = state->current_token.range.end }; - - VALUE type_name = rbs_type_name(namespace, ID2SYM(INTERN_TOKEN(state, state->current_token))); - - range keyword_range = NULL_RANGE; - range new_name_range = NULL_RANGE; - - VALUE new_name = Qnil; - range clause_range = type_name_range; - if (state->next_token.type == kAS) { - parser_advance(state); - keyword_range = state->current_token.range; - - if (ident_type == tUIDENT) parser_advance_assert(state, tUIDENT); - if (ident_type == tLIDENT) parser_advance_assert(state, tLIDENT); - if (ident_type == tULIDENT) parser_advance_assert(state, tULIDENT); - - new_name = ID2SYM(INTERN_TOKEN(state, state->current_token)); - new_name_range = state->current_token.range; - clause_range.end = new_name_range.end; - } - - VALUE location = rbs_new_location(state->buffer, clause_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 3); - rbs_loc_add_required_child(loc, INTERN("type_name"), type_name_range); - rbs_loc_add_optional_child(loc, INTERN("keyword"), keyword_range); - rbs_loc_add_optional_child(loc, INTERN("new_name"), new_name_range); - - rb_ary_push(clauses, rbs_ast_directives_use_single_clause(type_name, new_name, location)); - - break; - } - case pSTAR: - { - range clause_range = namespace_range; - parser_advance(state); - - range star_range = state->current_token.range; - clause_range.end = star_range.end; - - VALUE location = rbs_new_location(state->buffer, clause_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 2); - rbs_loc_add_required_child(loc, INTERN("namespace"), namespace_range); - rbs_loc_add_required_child(loc, INTERN("star"), star_range); - - rb_ary_push(clauses, rbs_ast_directives_use_wildcard_clause(namespace, location)); - - break; - } - default: - raise_syntax_error( - state, - state->next_token, - "use clause is expected" - ); - } - - if (state->next_token.type == pCOMMA) { - parser_advance(state); - } else { - break; - } - } - - return; -} - -/* - use_directive ::= {} `use` - */ -static VALUE parse_use_directive(parserstate *state) { - if (state->next_token.type == kUSE) { - parser_advance(state); - - range keyword_range = state->current_token.range; - - VALUE clauses = rb_ary_new(); - parse_use_clauses(state, clauses); - - range directive_range = keyword_range; - directive_range.end = state->current_token.range.end; - - VALUE location = rbs_new_location(state->buffer, directive_range); - rbs_loc *loc = rbs_check_location(location); - rbs_loc_alloc_children(loc, 1); - rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); - - return rbs_ast_directives_use(clauses, location); - } else { - return Qnil; - } -} - -VALUE parse_signature(parserstate *state) { - VALUE dirs = EMPTY_ARRAY; - VALUE decls = EMPTY_ARRAY; - - while (state->next_token.type == kUSE) { - melt_array(&dirs); - rb_ary_push(dirs, parse_use_directive(state)); - } - - while (state->next_token.type != pEOF) { - melt_array(&decls); - rb_ary_push(decls, parse_decl(state)); - } - - VALUE ret = rb_ary_new(); - rb_ary_push(ret, dirs); - rb_ary_push(ret, decls); - return ret; -} - -struct parse_type_arg { - parserstate *parser; - VALUE require_eof; -}; - -static VALUE -ensure_free_parser(VALUE parser) { - free_parser((parserstate *)parser); - return Qnil; -} - -static VALUE -parse_type_try(VALUE a) { - struct parse_type_arg *arg = (struct parse_type_arg *)a; - - if (arg->parser->next_token.type == pEOF) { - return Qnil; - } - - VALUE type = parse_type(arg->parser); - - if (RB_TEST(arg->require_eof)) { - parser_advance_assert(arg->parser, pEOF); - } - - return type; -} - -static VALUE -rbsparser_parse_type(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos, VALUE variables, VALUE require_eof) -{ - VALUE string = rb_funcall(buffer, rb_intern("content"), 0); - StringValue(string); - lexstate *lexer = alloc_lexer(string, FIX2INT(start_pos), FIX2INT(end_pos)); - parserstate *parser = alloc_parser(buffer, lexer, FIX2INT(start_pos), FIX2INT(end_pos), variables); - struct parse_type_arg arg = { - parser, - require_eof - }; - return rb_ensure(parse_type_try, (VALUE)&arg, ensure_free_parser, (VALUE)parser); -} - -static VALUE -parse_method_type_try(VALUE a) { - struct parse_type_arg *arg = (struct parse_type_arg *)a; - - if (arg->parser->next_token.type == pEOF) { - return Qnil; - } - - VALUE method_type = parse_method_type(arg->parser); - - if (RB_TEST(arg->require_eof)) { - parser_advance_assert(arg->parser, pEOF); - } - - return method_type; -} - -static VALUE -rbsparser_parse_method_type(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos, VALUE variables, VALUE require_eof) -{ - VALUE string = rb_funcall(buffer, rb_intern("content"), 0); - StringValue(string); - lexstate *lexer = alloc_lexer(string, FIX2INT(start_pos), FIX2INT(end_pos)); - parserstate *parser = alloc_parser(buffer, lexer, FIX2INT(start_pos), FIX2INT(end_pos), variables); - struct parse_type_arg arg = { - parser, - require_eof - }; - return rb_ensure(parse_method_type_try, (VALUE)&arg, ensure_free_parser, (VALUE)parser); -} - -static VALUE -parse_signature_try(VALUE a) { - parserstate *parser = (parserstate *)a; - return parse_signature(parser); -} - -static VALUE -rbsparser_parse_signature(VALUE self, VALUE buffer, VALUE start_pos, VALUE end_pos) -{ - VALUE string = rb_funcall(buffer, rb_intern("content"), 0); - StringValue(string); - lexstate *lexer = alloc_lexer(string, FIX2INT(start_pos), FIX2INT(end_pos)); - parserstate *parser = alloc_parser(buffer, lexer, FIX2INT(start_pos), FIX2INT(end_pos), Qnil); - return rb_ensure(parse_signature_try, (VALUE)parser, ensure_free_parser, (VALUE)parser); -} - -static VALUE -rbsparser_lex(VALUE self, VALUE buffer, VALUE end_pos) { - VALUE string = rb_funcall(buffer, rb_intern("content"), 0); - StringValue(string); - lexstate *lexer = alloc_lexer(string, 0, FIX2INT(end_pos)); - VALUE results = rb_ary_new(); - - token token = NullToken; - while (token.type != pEOF) { - token = rbsparser_next_token(lexer); - VALUE type = ID2SYM(rb_intern(token_type_str(token.type))); - VALUE location = rbs_new_location(buffer, token.range); - VALUE pair = rb_ary_new3(2, type, location); - rb_ary_push(results, pair); - } - - free(lexer); - - return results; -} - -void rbs__init_parser(void) { - RBS_Parser = rb_define_class_under(RBS, "Parser", rb_cObject); - rb_gc_register_mark_object(RBS_Parser); - - VALUE empty_array = rb_obj_freeze(rb_ary_new()); - rb_gc_register_mark_object(empty_array); - EMPTY_ARRAY = empty_array; - - VALUE empty_hash = rb_obj_freeze(rb_hash_new()); - rb_gc_register_mark_object(empty_hash); - EMPTY_HASH = empty_hash; - - rb_define_singleton_method(RBS_Parser, "_parse_type", rbsparser_parse_type, 5); - rb_define_singleton_method(RBS_Parser, "_parse_method_type", rbsparser_parse_method_type, 5); - rb_define_singleton_method(RBS_Parser, "_parse_signature", rbsparser_parse_signature, 3); - rb_define_singleton_method(RBS_Parser, "_lex", rbsparser_lex, 2); -} diff --git a/ext/rbs_extension/parser.h b/ext/rbs_extension/parser.h deleted file mode 100644 index ea496d7ae..000000000 --- a/ext/rbs_extension/parser.h +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef RBS__PARSER_H -#define RBS__PARSER_H - -#include "ruby.h" -#include "parserstate.h" - -/** - * RBS::Parser class - * */ -extern VALUE RBS_Parser; - -VALUE parse_type(parserstate *state); -VALUE parse_method_type(parserstate *state); -VALUE parse_signature(parserstate *state); - -void rbs__init_parser(); - -#endif diff --git a/ext/rbs_extension/parserstate.c b/ext/rbs_extension/parserstate.c deleted file mode 100644 index d252eb54b..000000000 --- a/ext/rbs_extension/parserstate.c +++ /dev/null @@ -1,411 +0,0 @@ -#include "rbs_extension.h" -#include "rbs/util/rbs_constant_pool.h" - -#define RESET_TABLE_P(table) (table->size == 0) - -id_table *alloc_empty_table(void) { - id_table *table = malloc(sizeof(id_table)); - - *table = (id_table) { - .size = 10, - .count = 0, - .ids = calloc(10, sizeof(rbs_constant_id_t)), - .next = NULL, - }; - - return table; -} - -id_table *alloc_reset_table(void) { - id_table *table = malloc(sizeof(id_table)); - - *table = (id_table) { - .size = 0, - .count = 0, - .ids = NULL, - .next = NULL, - }; - - return table; -} - -id_table *parser_push_typevar_table(parserstate *state, bool reset) { - if (reset) { - id_table *table = alloc_reset_table(); - table->next = state->vars; - state->vars = table; - } - - id_table *table = alloc_empty_table(); - table->next = state->vars; - state->vars = table; - - return table; -} - -void parser_pop_typevar_table(parserstate *state) { - id_table *table; - - if (state->vars) { - table = state->vars; - state->vars = table->next; - free(table->ids); - free(table); - } else { - rb_raise(rb_eRuntimeError, "Cannot pop empty table"); - } - - if (state->vars && RESET_TABLE_P(state->vars)) { - table = state->vars; - state->vars = table->next; - free(table); - } -} - -void parser_insert_typevar(parserstate *state, rbs_constant_id_t id) { - id_table *table = state->vars; - - if (RESET_TABLE_P(table)) { - rb_raise(rb_eRuntimeError, "Cannot insert to reset table"); - } - - if (table->size == table->count) { - // expand - rbs_constant_id_t *ptr = table->ids; - table->size += 10; - table->ids = calloc(table->size, sizeof(rbs_constant_id_t)); - memcpy(table->ids, ptr, sizeof(rbs_constant_id_t) * table->count); - free(ptr); - } - - table->ids[table->count++] = id; -} - -bool parser_typevar_member(parserstate *state, rbs_constant_id_t id) { - id_table *table = state->vars; - - while (table && !RESET_TABLE_P(table)) { - for (size_t i = 0; i < table->count; i++) { - if (table->ids[i] == id) { - return true; - } - } - - table = table->next; - } - - return false; -} - -void print_parser(parserstate *state) { - printf(" current_token = %s (%d...%d)\n", token_type_str(state->current_token.type), state->current_token.range.start.char_pos, state->current_token.range.end.char_pos); - printf(" next_token = %s (%d...%d)\n", token_type_str(state->next_token.type), state->next_token.range.start.char_pos, state->next_token.range.end.char_pos); - printf(" next_token2 = %s (%d...%d)\n", token_type_str(state->next_token2.type), state->next_token2.range.start.char_pos, state->next_token2.range.end.char_pos); - printf(" next_token3 = %s (%d...%d)\n", token_type_str(state->next_token3.type), state->next_token3.range.start.char_pos, state->next_token3.range.end.char_pos); -} - -void parser_advance(parserstate *state) { - state->current_token = state->next_token; - state->next_token = state->next_token2; - state->next_token2 = state->next_token3; - - while (true) { - if (state->next_token3.type == pEOF) { - break; - } - - state->next_token3 = rbsparser_next_token(state->lexstate); - - if (state->next_token3.type == tCOMMENT) { - // skip - } else if (state->next_token3.type == tLINECOMMENT) { - insert_comment_line(state, state->next_token3); - } else if (state->next_token3.type == tTRIVIA) { - //skip - } else { - break; - } - } -} - -/** - * Advance token if _next_ token is `type`. - * Ensures one token advance and `state->current_token.type == type`, or current token not changed. - * - * @returns true if token advances, false otherwise. - **/ -bool parser_advance_if(parserstate *state, enum TokenType type) { - if (state->next_token.type == type) { - parser_advance(state); - return true; - } else { - return false; - } -} - -void parser_assert(parserstate *state, enum TokenType type) { - if (state->current_token.type != type) { - raise_syntax_error( - state, - state->current_token, - "expected a token `%s`", - token_type_str(type) - ); - } -} - -void parser_advance_assert(parserstate *state, enum TokenType type) { - parser_advance(state); - parser_assert(state, type); -} - -void print_token(token tok) { - printf( - "%s char=%d...%d\n", - token_type_str(tok.type), - tok.range.start.char_pos, - tok.range.end.char_pos - ); -} - -void insert_comment_line(parserstate *state, token tok) { - int prev_line = tok.range.start.line - 1; - - comment *com = comment_get_comment(state->last_comment, prev_line); - - if (com) { - comment_insert_new_line(com, tok); - } else { - state->last_comment = alloc_comment(tok, state->last_comment); - } -} - -VALUE get_comment(parserstate *state, int subject_line) { - int comment_line = subject_line - 1; - - comment *com = comment_get_comment(state->last_comment, comment_line); - - if (com) { - return comment_to_ruby(com, state->buffer); - } else { - return Qnil; - } -} - -comment *alloc_comment(token comment_token, comment *last_comment) { - comment *new_comment = malloc(sizeof(comment)); - - *new_comment = (comment) { - .start = comment_token.range.start, - .end = comment_token.range.end, - - .line_size = 0, - .line_count = 0, - .tokens = NULL, - - .next_comment = last_comment, - }; - - comment_insert_new_line(new_comment, comment_token); - - return new_comment; -} - -void free_comment(comment *com) { - if (com->next_comment) { - free_comment(com->next_comment); - } - - free(com->tokens); - free(com); -} - -void comment_insert_new_line(comment *com, token comment_token) { - if (com->line_count == 0) { - com->start = comment_token.range.start; - } - - if (com->line_count == com->line_size) { - com->line_size += 10; - - if (com->tokens) { - token *p = com->tokens; - com->tokens = calloc(com->line_size, sizeof(token)); - memcpy(com->tokens, p, sizeof(token) * com->line_count); - free(p); - } else { - com->tokens = calloc(com->line_size, sizeof(token)); - } - } - - com->tokens[com->line_count++] = comment_token; - com->end = comment_token.range.end; -} - -comment *comment_get_comment(comment *com, int line) { - if (com == NULL) { - return NULL; - } - - if (com->end.line < line) { - return NULL; - } - - if (com->end.line == line) { - return com; - } - - return comment_get_comment(com->next_comment, line); -} - -VALUE comment_to_ruby(comment *com, VALUE buffer) { - VALUE content = rb_funcall(buffer, rb_intern("content"), 0); - rb_encoding *enc = rb_enc_get(content); - VALUE string = rb_enc_str_new_cstr("", enc); - - int hash_bytes = rb_enc_codelen('#', enc); - int space_bytes = rb_enc_codelen(' ', enc); - - for (size_t i = 0; i < com->line_count; i++) { - token tok = com->tokens[i]; - - char *comment_start = RSTRING_PTR(content) + tok.range.start.byte_pos + hash_bytes; - int comment_bytes = RANGE_BYTES(tok.range) - hash_bytes; - unsigned char c = rb_enc_mbc_to_codepoint(comment_start, RSTRING_END(content), enc); - - if (c == ' ') { - comment_start += space_bytes; - comment_bytes -= space_bytes; - } - - rb_str_cat(string, comment_start, comment_bytes); - rb_str_cat_cstr(string, "\n"); - } - - return rbs_ast_comment( - string, - rbs_location_pp(buffer, &com->start, &com->end) - ); -} - -lexstate *alloc_lexer(VALUE string, int start_pos, int end_pos) { - if (start_pos < 0 || end_pos < 0) { - rb_raise(rb_eArgError, "negative position range: %d...%d", start_pos, end_pos); - } - - lexstate *lexer = malloc(sizeof(lexstate)); - - position start_position = (position) { - .byte_pos = 0, - .char_pos = 0, - .line = 1, - .column = 0, - }; - - *lexer = (lexstate) { - .string = string, - .start_pos = start_pos, - .end_pos = end_pos, - .current = start_position, - .start = { 0 }, - .first_token_of_line = false, - .last_char = 0, - }; - - skipn(lexer, start_pos); - lexer->start = lexer->current; - lexer->first_token_of_line = lexer->current.column == 0; - - return lexer; -} - -parserstate *alloc_parser(VALUE buffer, lexstate *lexer, int start_pos, int end_pos, VALUE variables) { - parserstate *parser = malloc(sizeof(parserstate)); - - *parser = (parserstate) { - .lexstate = lexer, - - .current_token = NullToken, - .next_token = NullToken, - .next_token2 = NullToken, - .next_token3 = NullToken, - .buffer = buffer, - - .vars = NULL, - .last_comment = NULL, - - .constant_pool = { 0 }, - }; - - // The parser's constant pool is mainly used for storing the names of type variables, which usually aren't many. - // Below are some statistics gathered from the current test suite. We can see that 56% of parsers never add to their - // constant pool at all. The initial capacity needs to be a power of 2. Picking 2 means that we won't need to realloc - // in 85% of cases. - // - // TODO: recalculate these statistics based on a real world codebase, rather than the test suite. - // - // | Size | Count | Cumulative | % Coverage | - // |------|-------|------------|------------| - // | 0 | 7,862 | 7,862 | 56% | - // | 1 | 3,196 | 11,058 | 79% | - // | 2 | 778 | 12,719 | 85% | - // | 3 | 883 | 11,941 | 91% | - // | 4 | 478 | 13,197 | 95% | - // | 5 | 316 | 13,513 | 97% | - // | 6 | 288 | 13,801 | 99% | - // | 7 | 144 | 13,945 | 100% | - const size_t initial_pool_capacity = 2; - rbs_constant_pool_init(&parser->constant_pool, initial_pool_capacity); - - parser_advance(parser); - parser_advance(parser); - parser_advance(parser); - - if (!NIL_P(variables)) { - if (!RB_TYPE_P(variables, T_ARRAY)) { - free_parser(parser); - rb_raise(rb_eTypeError, - "wrong argument type %"PRIsVALUE" (must be array or nil)", - rb_obj_class(variables)); - } - - parser_push_typevar_table(parser, true); - - for (long i = 0; i < rb_array_len(variables); i++) { - VALUE symbol = rb_ary_entry(variables, i); - VALUE name = rb_sym2str(symbol); - - rbs_constant_id_t id = rbs_constant_pool_insert_shared( - &parser->constant_pool, - (const uint8_t *) RSTRING_PTR(name), - RSTRING_LEN(name) - ); - - parser_insert_typevar(parser, id); - } - } - - return parser; -} - -void free_typevar_tables(id_table *table) { - while (table != NULL) { - id_table *next = table->next; - if (table->ids != NULL) { - free(table->ids); - } - free(table); - table = next; - } -} - -void free_parser(parserstate *parser) { - free(parser->lexstate); - if (parser->last_comment) { - free_comment(parser->last_comment); - } - - free_typevar_tables(parser->vars); - rbs_constant_pool_free(&parser->constant_pool); - free(parser); -} diff --git a/ext/rbs_extension/parserstate.h b/ext/rbs_extension/parserstate.h deleted file mode 100644 index d4c5d17a5..000000000 --- a/ext/rbs_extension/parserstate.h +++ /dev/null @@ -1,163 +0,0 @@ -#ifndef RBS__PARSERSTATE_H -#define RBS__PARSERSTATE_H - -#include - -#include "lexer.h" -#include "location.h" - -/** - * id_table represents a set of RBS constant IDs. - * This is used to manage the set of bound variables. - * */ -typedef struct id_table { - size_t size; - size_t count; - rbs_constant_id_t *ids; - struct id_table *next; -} id_table; - -/** - * comment represents a sequence of comment lines. - * - * # Comment for the method. - * # - * # ```rb - * # object.foo() # Do something - * # ``` - * # - * def foo: () -> void - * - * A comment object represents the six lines of comments. - * */ -typedef struct comment { - position start; - position end; - - size_t line_size; - size_t line_count; - token *tokens; - - struct comment *next_comment; -} comment; - -/** - * An RBS parser is a LL(3) parser. - * */ -typedef struct { - lexstate *lexstate; - - token current_token; - token next_token; /* The first lookahead token */ - token next_token2; /* The second lookahead token */ - token next_token3; /* The third lookahead token */ - VALUE buffer; - - id_table *vars; /* Known type variables */ - comment *last_comment; /* Last read comment */ - - rbs_constant_pool_t constant_pool; -} parserstate; - -comment *alloc_comment(token comment_token, comment *last_comment); -void free_comment(comment *com); -void comment_insert_new_line(comment *com, token comment_token); -comment *comment_get_comment(comment *com, int line); -VALUE comment_to_ruby(comment *com, VALUE buffer); - -/** - * Insert new table entry. - * Setting `reset` inserts a _reset_ entry, which stops searching. - * - * ``` - * class Foo[A] - * ^^^ <= push new table with reset - * def foo: [B] () -> [A, B] - * ^^^ <= push new table without reset - * - * class Baz[C] - * ^^^ <= push new table with reset - * end - * end - * ``` - * */ -id_table *parser_push_typevar_table(parserstate *state, bool reset); -void parser_pop_typevar_table(parserstate *state); -/** - * Insert new type variable into the latest table. - * */ -void parser_insert_typevar(parserstate *state, rbs_constant_id_t id); - -/** - * Returns true if given type variable is recorded in the table. - * If not found, it goes one table up, if it's not a reset table. - * Or returns false, if it's a reset table. - * */ -bool parser_typevar_member(parserstate *state, rbs_constant_id_t id); - -/** - * Allocate new lexstate object. - * - * ``` - * VALUE string = rb_funcall(buffer, rb_intern("content"), 0); - * alloc_lexer(string, 0, 31) // New lexstate with buffer content - * ``` - * */ -lexstate *alloc_lexer(VALUE string, int start_pos, int end_pos); - -/** - * Allocate new parserstate object. - * - * ``` - * alloc_parser(buffer, lexer, 0, 1, variables) // New parserstate with variables - * alloc_parser(buffer, lexer, 3, 5, Qnil) // New parserstate without variables - * ``` - * */ -parserstate *alloc_parser(VALUE buffer, lexstate *lexer, int start_pos, int end_pos, VALUE variables); -void free_parser(parserstate *parser); -/** - * Advance one token. - * */ -void parser_advance(parserstate *state); - -/** - * @brief Raises an exception if `current_token->type != type`. - * - * @param state - * @param type - */ -void parser_assert(parserstate *state, enum TokenType type); - -/** - * Advance one token, and assert the current token type. - * Raises an exception if `current_token->type != type`. - * */ -void parser_advance_assert(parserstate *state, enum TokenType type); - -/** - * Advance one token if the next_token is a token of the type. - * */ -bool parser_advance_if(parserstate *state, enum TokenType type); -void print_parser(parserstate *state); - -/** - * Insert new comment line token. - * */ -void insert_comment_line(parserstate *state, token token); - -/** - * Returns a RBS::Comment object associated with an subject at `subject_line`. - * - * ```rbs - * # Comment1 - * class Foo # This is the subject line for Comment1 - * - * # Comment2 - * %a{annotation} # This is the subject line for Comment2 - * def foo: () -> void - * end - * ``` - * */ -VALUE get_comment(parserstate *state, int subject_line); - -#endif diff --git a/ext/rbs_extension/rbs_extension.h b/ext/rbs_extension/rbs_extension.h index aeebf3f58..966cb47b3 100644 --- a/ext/rbs_extension/rbs_extension.h +++ b/ext/rbs_extension/rbs_extension.h @@ -1,31 +1,16 @@ #include +#include "compat.h" +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_BEGIN #include "ruby.h" #include "ruby/re.h" #include "ruby/encoding.h" +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_END +#include "class_constants.h" #include "rbs.h" -#include "lexer.h" -#include "parser.h" /** - * Receives `parserstate` and `range`, which represents a string token or symbol token, and returns a string VALUE. - * - * Input token | Output string - * ------------+------------- - * "foo\\n" | foo\n - * 'foo' | foo - * `bar` | bar - * :"baz\\t" | baz\t - * :'baz' | baz + * RBS::Parser class * */ -VALUE rbs_unquote_string(parserstate *state, range rg, int offset_bytes); - -/** - * Raises RBS::ParsingError on `tok` with message constructed with given `fmt`. - * - * ``` - * foo.rbs:11:21...11:25: Syntax error: {message}, token=`{tok source}` ({tok type}) - * ``` - * */ -PRINTF_ARGS(NORETURN(void) raise_syntax_error(parserstate *state, token tok, const char *fmt, ...), 3, 4); +extern VALUE RBS_Parser; diff --git a/ext/rbs_extension/rbs_string_bridging.c b/ext/rbs_extension/rbs_string_bridging.c new file mode 100644 index 000000000..7770f7907 --- /dev/null +++ b/ext/rbs_extension/rbs_string_bridging.c @@ -0,0 +1,9 @@ +#include "rbs_string_bridging.h" + +rbs_string_t rbs_string_from_ruby_string(VALUE ruby_string) { + return rbs_string_new(StringValueCStr(ruby_string), RSTRING_END(ruby_string)); +} + +VALUE rbs_string_to_ruby_string(rbs_string_t *self, rb_encoding *encoding) { + return rb_enc_str_new(self->start, rbs_string_len(*self), encoding); +} diff --git a/ext/rbs_extension/rbs_string_bridging.h b/ext/rbs_extension/rbs_string_bridging.h new file mode 100644 index 000000000..cd5b7f85f --- /dev/null +++ b/ext/rbs_extension/rbs_string_bridging.h @@ -0,0 +1,24 @@ +#ifndef RBS__RBS_STRING_BRIDGING_H +#define RBS__RBS_STRING_BRIDGING_H + +#include "compat.h" + +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_BEGIN +#include "ruby.h" +#include "ruby/encoding.h" +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_END + +#include "rbs/string.h" + +/** + * @returns A new shared rbs_string_t from the given Ruby string, which points into the given Ruby String's memory, + * and does not need to be `free()`ed. However, the Ruby String needs to be kept alive for the duration of the rbs_string_t. + */ +rbs_string_t rbs_string_from_ruby_string(VALUE ruby_string); + +/** + * Returns a new Ruby string from the given rbs_string_t. + */ +VALUE rbs_string_to_ruby_string(rbs_string_t *self, rb_encoding *encoding); + +#endif diff --git a/ext/rbs_extension/unescape.c b/ext/rbs_extension/unescape.c deleted file mode 100644 index f78661e4d..000000000 --- a/ext/rbs_extension/unescape.c +++ /dev/null @@ -1,32 +0,0 @@ -#include "rbs_extension.h" - -VALUE rbs_unquote_string(parserstate *state, range rg, int offset_bytes) { - VALUE string = state->lexstate->string; - rb_encoding *enc = rb_enc_get(string); - - unsigned int first_char = rb_enc_mbc_to_codepoint( - RSTRING_PTR(string) + rg.start.byte_pos + offset_bytes, - RSTRING_END(string), - enc - ); - - int byte_length = rg.end.byte_pos - rg.start.byte_pos - offset_bytes; - - if (first_char == '"' || first_char == '\'' || first_char == '`') { - int bs = rb_enc_codelen(first_char, enc); - offset_bytes += bs; - byte_length -= 2 * bs; - } - - char *buffer = RSTRING_PTR(state->lexstate->string) + rg.start.byte_pos + offset_bytes; - VALUE str = rb_enc_str_new(buffer, byte_length, enc); - - return rb_funcall( - RBS_Types_Literal, - rb_intern("unescape_string"), - 2, - str, - first_char == '\"' ? Qtrue : Qfalse - ); -} - diff --git a/include/rbs.h b/include/rbs.h index da76e3535..8de185498 100644 --- a/include/rbs.h +++ b/include/rbs.h @@ -1,7 +1,6 @@ #ifndef RBS_H #define RBS_H -#include "rbs/constants.h" -#include "rbs/ruby_objs.h" +#include "rbs/parser.h" #endif diff --git a/include/rbs/ast.h b/include/rbs/ast.h new file mode 100644 index 000000000..a56898684 --- /dev/null +++ b/include/rbs/ast.h @@ -0,0 +1,687 @@ +/*----------------------------------------------------------------------------*/ +/* This file is generated by the templates/template.rb script and should not */ +/* be modified manually. */ +/* To change the template see */ +/* templates/include/rbs/ast.h.erb */ +/*----------------------------------------------------------------------------*/ + +#ifndef RBS__AST_H +#define RBS__AST_H + +#include "rbs/util/rbs_allocator.h" +#include "rbs/util/rbs_constant_pool.h" +#include "string.h" +#include "location.h" + +enum rbs_node_type { + RBS_AST_ANNOTATION = 1, + RBS_AST_BOOL = 2, + RBS_AST_COMMENT = 3, + RBS_AST_DECLARATIONS_CLASS = 4, + RBS_AST_DECLARATIONS_CLASS_SUPER = 5, + RBS_AST_DECLARATIONS_CLASS_ALIAS = 6, + RBS_AST_DECLARATIONS_CONSTANT = 7, + RBS_AST_DECLARATIONS_GLOBAL = 8, + RBS_AST_DECLARATIONS_INTERFACE = 9, + RBS_AST_DECLARATIONS_MODULE = 10, + RBS_AST_DECLARATIONS_MODULE_SELF = 11, + RBS_AST_DECLARATIONS_MODULE_ALIAS = 12, + RBS_AST_DECLARATIONS_TYPE_ALIAS = 13, + RBS_AST_DIRECTIVES_USE = 14, + RBS_AST_DIRECTIVES_USE_SINGLE_CLAUSE = 15, + RBS_AST_DIRECTIVES_USE_WILDCARD_CLAUSE = 16, + RBS_AST_INTEGER = 17, + RBS_AST_MEMBERS_ALIAS = 18, + RBS_AST_MEMBERS_ATTR_ACCESSOR = 19, + RBS_AST_MEMBERS_ATTR_READER = 20, + RBS_AST_MEMBERS_ATTR_WRITER = 21, + RBS_AST_MEMBERS_CLASS_INSTANCE_VARIABLE = 22, + RBS_AST_MEMBERS_CLASS_VARIABLE = 23, + RBS_AST_MEMBERS_EXTEND = 24, + RBS_AST_MEMBERS_INCLUDE = 25, + RBS_AST_MEMBERS_INSTANCE_VARIABLE = 26, + RBS_AST_MEMBERS_METHOD_DEFINITION = 27, + RBS_AST_MEMBERS_METHOD_DEFINITION_OVERLOAD = 28, + RBS_AST_MEMBERS_PREPEND = 29, + RBS_AST_MEMBERS_PRIVATE = 30, + RBS_AST_MEMBERS_PUBLIC = 31, + RBS_AST_STRING = 32, + RBS_AST_TYPE_PARAM = 33, + RBS_METHOD_TYPE = 34, + RBS_NAMESPACE = 35, + RBS_SIGNATURE = 36, + RBS_TYPE_NAME = 37, + RBS_TYPES_ALIAS = 38, + RBS_TYPES_BASES_ANY = 39, + RBS_TYPES_BASES_BOOL = 40, + RBS_TYPES_BASES_BOTTOM = 41, + RBS_TYPES_BASES_CLASS = 42, + RBS_TYPES_BASES_INSTANCE = 43, + RBS_TYPES_BASES_NIL = 44, + RBS_TYPES_BASES_SELF = 45, + RBS_TYPES_BASES_TOP = 46, + RBS_TYPES_BASES_VOID = 47, + RBS_TYPES_BLOCK = 48, + RBS_TYPES_CLASS_INSTANCE = 49, + RBS_TYPES_CLASS_SINGLETON = 50, + RBS_TYPES_FUNCTION = 51, + RBS_TYPES_FUNCTION_PARAM = 52, + RBS_TYPES_INTERFACE = 53, + RBS_TYPES_INTERSECTION = 54, + RBS_TYPES_LITERAL = 55, + RBS_TYPES_OPTIONAL = 56, + RBS_TYPES_PROC = 57, + RBS_TYPES_RECORD = 58, + RBS_TYPES_RECORD_FIELD_TYPE = 59, + RBS_TYPES_TUPLE = 60, + RBS_TYPES_UNION = 61, + RBS_TYPES_UNTYPED_FUNCTION = 62, + RBS_TYPES_VARIABLE = 63, + RBS_KEYWORD, + RBS_AST_SYMBOL, +}; + +typedef struct rbs_node { + enum rbs_node_type type; + rbs_location_t *location; +} rbs_node_t; + +const char *rbs_node_type_name(rbs_node_t *node); + +/* rbs_node_list_node */ + +typedef struct rbs_node_list_node { + rbs_node_t *node; + struct rbs_node_list_node *next; +} rbs_node_list_node_t; + +typedef struct rbs_node_list { + rbs_allocator_t *allocator; + rbs_node_list_node_t *head; + rbs_node_list_node_t *tail; + size_t length; +} rbs_node_list_t; + +rbs_node_list_t *rbs_node_list_new(rbs_allocator_t *); + +void rbs_node_list_append(rbs_node_list_t *list, rbs_node_t *node); + +/* rbs_hash */ + +typedef struct rbs_hash_node { + rbs_node_t *key; + rbs_node_t *value; + struct rbs_hash_node *next; +} rbs_hash_node_t; + +typedef struct rbs_hash { + rbs_allocator_t *allocator; + rbs_hash_node_t *head; + rbs_hash_node_t *tail; + size_t length; +} rbs_hash_t; + +rbs_hash_t *rbs_hash_new(rbs_allocator_t *); + +void rbs_hash_set(rbs_hash_t *hash, rbs_node_t *key, rbs_node_t *value); + +rbs_hash_node_t *rbs_hash_find(rbs_hash_t *hash, rbs_node_t *key); + +rbs_node_t *rbs_hash_get(rbs_hash_t *hash, rbs_node_t *key); + +/* rbs_ast_node */ + +typedef struct rbs_ast_annotation { + rbs_node_t base; + + rbs_string_t string; +} rbs_ast_annotation_t; + +typedef struct rbs_ast_bool { + rbs_node_t base; + + bool value; +} rbs_ast_bool_t; + +typedef struct rbs_ast_comment { + rbs_node_t base; + + rbs_string_t string; +} rbs_ast_comment_t; + +typedef struct rbs_ast_declarations_class { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node_list *type_params; + struct rbs_ast_declarations_class_super *super_class; + struct rbs_node_list *members; + struct rbs_node_list *annotations; + struct rbs_ast_comment *comment; +} rbs_ast_declarations_class_t; + +typedef struct rbs_ast_declarations_class_super { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node_list *args; +} rbs_ast_declarations_class_super_t; + +typedef struct rbs_ast_declarations_class_alias { + rbs_node_t base; + + struct rbs_type_name *new_name; + struct rbs_type_name *old_name; + struct rbs_ast_comment *comment; + struct rbs_node_list *annotations; +} rbs_ast_declarations_class_alias_t; + +typedef struct rbs_ast_declarations_constant { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node *type; + struct rbs_ast_comment *comment; + struct rbs_node_list *annotations; +} rbs_ast_declarations_constant_t; + +typedef struct rbs_ast_declarations_global { + rbs_node_t base; + + struct rbs_ast_symbol *name; + struct rbs_node *type; + struct rbs_ast_comment *comment; + struct rbs_node_list *annotations; +} rbs_ast_declarations_global_t; + +typedef struct rbs_ast_declarations_interface { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node_list *type_params; + struct rbs_node_list *members; + struct rbs_node_list *annotations; + struct rbs_ast_comment *comment; +} rbs_ast_declarations_interface_t; + +typedef struct rbs_ast_declarations_module { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node_list *type_params; + struct rbs_node_list *self_types; + struct rbs_node_list *members; + struct rbs_node_list *annotations; + struct rbs_ast_comment *comment; +} rbs_ast_declarations_module_t; + +typedef struct rbs_ast_declarations_module_self { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node_list *args; +} rbs_ast_declarations_module_self_t; + +typedef struct rbs_ast_declarations_module_alias { + rbs_node_t base; + + struct rbs_type_name *new_name; + struct rbs_type_name *old_name; + struct rbs_ast_comment *comment; + struct rbs_node_list *annotations; +} rbs_ast_declarations_module_alias_t; + +typedef struct rbs_ast_declarations_type_alias { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node_list *type_params; + struct rbs_node *type; + struct rbs_node_list *annotations; + struct rbs_ast_comment *comment; +} rbs_ast_declarations_type_alias_t; + +typedef struct rbs_ast_directives_use { + rbs_node_t base; + + struct rbs_node_list *clauses; +} rbs_ast_directives_use_t; + +typedef struct rbs_ast_directives_use_single_clause { + rbs_node_t base; + + struct rbs_type_name *type_name; + struct rbs_ast_symbol *new_name; +} rbs_ast_directives_use_single_clause_t; + +typedef struct rbs_ast_directives_use_wildcard_clause { + rbs_node_t base; + + struct rbs_namespace *rbs_namespace; +} rbs_ast_directives_use_wildcard_clause_t; + +typedef struct rbs_ast_integer { + rbs_node_t base; + + rbs_string_t string_representation; +} rbs_ast_integer_t; + +typedef struct rbs_ast_members_alias { + rbs_node_t base; + + struct rbs_ast_symbol *new_name; + struct rbs_ast_symbol *old_name; + struct rbs_keyword *kind; + struct rbs_node_list *annotations; + struct rbs_ast_comment *comment; +} rbs_ast_members_alias_t; + +typedef struct rbs_ast_members_attr_accessor { + rbs_node_t base; + + struct rbs_ast_symbol *name; + struct rbs_node *type; + struct rbs_node *ivar_name; + struct rbs_keyword *kind; + struct rbs_node_list *annotations; + struct rbs_ast_comment *comment; + struct rbs_keyword *visibility; +} rbs_ast_members_attr_accessor_t; + +typedef struct rbs_ast_members_attr_reader { + rbs_node_t base; + + struct rbs_ast_symbol *name; + struct rbs_node *type; + struct rbs_node *ivar_name; + struct rbs_keyword *kind; + struct rbs_node_list *annotations; + struct rbs_ast_comment *comment; + struct rbs_keyword *visibility; +} rbs_ast_members_attr_reader_t; + +typedef struct rbs_ast_members_attr_writer { + rbs_node_t base; + + struct rbs_ast_symbol *name; + struct rbs_node *type; + struct rbs_node *ivar_name; + struct rbs_keyword *kind; + struct rbs_node_list *annotations; + struct rbs_ast_comment *comment; + struct rbs_keyword *visibility; +} rbs_ast_members_attr_writer_t; + +typedef struct rbs_ast_members_class_instance_variable { + rbs_node_t base; + + struct rbs_ast_symbol *name; + struct rbs_node *type; + struct rbs_ast_comment *comment; +} rbs_ast_members_class_instance_variable_t; + +typedef struct rbs_ast_members_class_variable { + rbs_node_t base; + + struct rbs_ast_symbol *name; + struct rbs_node *type; + struct rbs_ast_comment *comment; +} rbs_ast_members_class_variable_t; + +typedef struct rbs_ast_members_extend { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node_list *args; + struct rbs_node_list *annotations; + struct rbs_ast_comment *comment; +} rbs_ast_members_extend_t; + +typedef struct rbs_ast_members_include { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node_list *args; + struct rbs_node_list *annotations; + struct rbs_ast_comment *comment; +} rbs_ast_members_include_t; + +typedef struct rbs_ast_members_instance_variable { + rbs_node_t base; + + struct rbs_ast_symbol *name; + struct rbs_node *type; + struct rbs_ast_comment *comment; +} rbs_ast_members_instance_variable_t; + +typedef struct rbs_ast_members_method_definition { + rbs_node_t base; + + struct rbs_ast_symbol *name; + struct rbs_keyword *kind; + struct rbs_node_list *overloads; + struct rbs_node_list *annotations; + struct rbs_ast_comment *comment; + bool overloading; + struct rbs_keyword *visibility; +} rbs_ast_members_method_definition_t; + +typedef struct rbs_ast_members_method_definition_overload { + rbs_node_t base; + + struct rbs_node_list *annotations; + struct rbs_node *method_type; +} rbs_ast_members_method_definition_overload_t; + +typedef struct rbs_ast_members_prepend { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node_list *args; + struct rbs_node_list *annotations; + struct rbs_ast_comment *comment; +} rbs_ast_members_prepend_t; + +typedef struct rbs_ast_members_private { + rbs_node_t base; + +} rbs_ast_members_private_t; + +typedef struct rbs_ast_members_public { + rbs_node_t base; + +} rbs_ast_members_public_t; + +typedef struct rbs_ast_string { + rbs_node_t base; + + rbs_string_t string; +} rbs_ast_string_t; + +typedef struct rbs_ast_type_param { + rbs_node_t base; + + struct rbs_ast_symbol *name; + struct rbs_keyword *variance; + struct rbs_node *upper_bound; + struct rbs_node *default_type; + bool unchecked; +} rbs_ast_type_param_t; + +typedef struct rbs_method_type { + rbs_node_t base; + + struct rbs_node_list *type_params; + struct rbs_node *type; + struct rbs_types_block *block; +} rbs_method_type_t; + +typedef struct rbs_namespace { + rbs_node_t base; + + struct rbs_node_list *path; + bool absolute; +} rbs_namespace_t; + +typedef struct rbs_signature { + rbs_node_t base; + + struct rbs_node_list *directives; + struct rbs_node_list *declarations; +} rbs_signature_t; + +typedef struct rbs_type_name { + rbs_node_t base; + + struct rbs_namespace *rbs_namespace; + struct rbs_ast_symbol *name; +} rbs_type_name_t; + +typedef struct rbs_types_alias { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node_list *args; +} rbs_types_alias_t; + +typedef struct rbs_types_bases_any { + rbs_node_t base; + + bool todo; +} rbs_types_bases_any_t; + +typedef struct rbs_types_bases_bool { + rbs_node_t base; + +} rbs_types_bases_bool_t; + +typedef struct rbs_types_bases_bottom { + rbs_node_t base; + +} rbs_types_bases_bottom_t; + +typedef struct rbs_types_bases_class { + rbs_node_t base; + +} rbs_types_bases_class_t; + +typedef struct rbs_types_bases_instance { + rbs_node_t base; + +} rbs_types_bases_instance_t; + +typedef struct rbs_types_bases_nil { + rbs_node_t base; + +} rbs_types_bases_nil_t; + +typedef struct rbs_types_bases_self { + rbs_node_t base; + +} rbs_types_bases_self_t; + +typedef struct rbs_types_bases_top { + rbs_node_t base; + +} rbs_types_bases_top_t; + +typedef struct rbs_types_bases_void { + rbs_node_t base; + +} rbs_types_bases_void_t; + +typedef struct rbs_types_block { + rbs_node_t base; + + struct rbs_node *type; + bool required; + struct rbs_node *self_type; +} rbs_types_block_t; + +typedef struct rbs_types_class_instance { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node_list *args; +} rbs_types_class_instance_t; + +typedef struct rbs_types_class_singleton { + rbs_node_t base; + + struct rbs_type_name *name; +} rbs_types_class_singleton_t; + +typedef struct rbs_types_function { + rbs_node_t base; + + struct rbs_node_list *required_positionals; + struct rbs_node_list *optional_positionals; + struct rbs_node *rest_positionals; + struct rbs_node_list *trailing_positionals; + struct rbs_hash *required_keywords; + struct rbs_hash *optional_keywords; + struct rbs_node *rest_keywords; + struct rbs_node *return_type; +} rbs_types_function_t; + +typedef struct rbs_types_function_param { + rbs_node_t base; + + struct rbs_node *type; + struct rbs_ast_symbol *name; +} rbs_types_function_param_t; + +typedef struct rbs_types_interface { + rbs_node_t base; + + struct rbs_type_name *name; + struct rbs_node_list *args; +} rbs_types_interface_t; + +typedef struct rbs_types_intersection { + rbs_node_t base; + + struct rbs_node_list *types; +} rbs_types_intersection_t; + +typedef struct rbs_types_literal { + rbs_node_t base; + + struct rbs_node *literal; +} rbs_types_literal_t; + +typedef struct rbs_types_optional { + rbs_node_t base; + + struct rbs_node *type; +} rbs_types_optional_t; + +typedef struct rbs_types_proc { + rbs_node_t base; + + struct rbs_node *type; + struct rbs_types_block *block; + struct rbs_node *self_type; +} rbs_types_proc_t; + +typedef struct rbs_types_record { + rbs_node_t base; + + struct rbs_hash *all_fields; +} rbs_types_record_t; + +typedef struct rbs_types_record_field_type { + rbs_node_t base; + + struct rbs_node *type; + bool required; +} rbs_types_record_field_type_t; + +typedef struct rbs_types_tuple { + rbs_node_t base; + + struct rbs_node_list *types; +} rbs_types_tuple_t; + +typedef struct rbs_types_union { + rbs_node_t base; + + struct rbs_node_list *types; +} rbs_types_union_t; + +typedef struct rbs_types_untyped_function { + rbs_node_t base; + + struct rbs_node *return_type; +} rbs_types_untyped_function_t; + +typedef struct rbs_types_variable { + rbs_node_t base; + + struct rbs_ast_symbol *name; +} rbs_types_variable_t; + +/// `rbs_keyword_t` models RBS keywords like "private", "instance", "covariant", etc. +/// These are stored in the global constant pool, and get surfaced to Ruby as `Symbol`s, +/// just like `rbs_ast_symbol_t`s. +typedef struct rbs_keyword { + rbs_node_t base; + rbs_constant_id_t constant_id; +} rbs_keyword_t; + +rbs_keyword_t *rbs_keyword_new(rbs_allocator_t *, rbs_location_t *, rbs_constant_id_t); + +/// `rbs_ast_symbol_t` models user-defined identifiers like class names, method names, etc. +/// These get stored in the parser's own constant pool, and get surfaced to Ruby as `Symbol`s. +typedef struct rbs_ast_symbol { + rbs_node_t base; + rbs_constant_id_t constant_id; +} rbs_ast_symbol_t; + +rbs_ast_symbol_t *rbs_ast_symbol_new(rbs_allocator_t *, rbs_location_t *, rbs_constant_pool_t *, rbs_constant_id_t); + +rbs_ast_annotation_t *rbs_ast_annotation_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_string_t string); +rbs_ast_bool_t *rbs_ast_bool_new(rbs_allocator_t *allocator, rbs_location_t *location, bool value); +rbs_ast_comment_t *rbs_ast_comment_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_string_t string); +rbs_ast_declarations_class_t *rbs_ast_declarations_class_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *type_params, rbs_ast_declarations_class_super_t *super_class, rbs_node_list_t *members, rbs_node_list_t *annotations, rbs_ast_comment_t *comment); +rbs_ast_declarations_class_super_t *rbs_ast_declarations_class_super_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args); +rbs_ast_declarations_class_alias_t *rbs_ast_declarations_class_alias_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *new_name, rbs_type_name_t *old_name, rbs_ast_comment_t *comment, rbs_node_list_t *annotations); +rbs_ast_declarations_constant_t *rbs_ast_declarations_constant_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_t *type, rbs_ast_comment_t *comment, rbs_node_list_t *annotations); +rbs_ast_declarations_global_t *rbs_ast_declarations_global_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_ast_comment_t *comment, rbs_node_list_t *annotations); +rbs_ast_declarations_interface_t *rbs_ast_declarations_interface_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *type_params, rbs_node_list_t *members, rbs_node_list_t *annotations, rbs_ast_comment_t *comment); +rbs_ast_declarations_module_t *rbs_ast_declarations_module_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *type_params, rbs_node_list_t *self_types, rbs_node_list_t *members, rbs_node_list_t *annotations, rbs_ast_comment_t *comment); +rbs_ast_declarations_module_self_t *rbs_ast_declarations_module_self_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args); +rbs_ast_declarations_module_alias_t *rbs_ast_declarations_module_alias_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *new_name, rbs_type_name_t *old_name, rbs_ast_comment_t *comment, rbs_node_list_t *annotations); +rbs_ast_declarations_type_alias_t *rbs_ast_declarations_type_alias_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *type_params, rbs_node_t *type, rbs_node_list_t *annotations, rbs_ast_comment_t *comment); +rbs_ast_directives_use_t *rbs_ast_directives_use_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *clauses); +rbs_ast_directives_use_single_clause_t *rbs_ast_directives_use_single_clause_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *type_name, rbs_ast_symbol_t *new_name); +rbs_ast_directives_use_wildcard_clause_t *rbs_ast_directives_use_wildcard_clause_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_namespace_t *rbs_namespace); +rbs_ast_integer_t *rbs_ast_integer_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_string_t string_representation); +rbs_ast_members_alias_t *rbs_ast_members_alias_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *new_name, rbs_ast_symbol_t *old_name, rbs_keyword_t *kind, rbs_node_list_t *annotations, rbs_ast_comment_t *comment); +rbs_ast_members_attr_accessor_t *rbs_ast_members_attr_accessor_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_node_t *ivar_name, rbs_keyword_t *kind, rbs_node_list_t *annotations, rbs_ast_comment_t *comment, rbs_keyword_t *visibility); +rbs_ast_members_attr_reader_t *rbs_ast_members_attr_reader_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_node_t *ivar_name, rbs_keyword_t *kind, rbs_node_list_t *annotations, rbs_ast_comment_t *comment, rbs_keyword_t *visibility); +rbs_ast_members_attr_writer_t *rbs_ast_members_attr_writer_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_node_t *ivar_name, rbs_keyword_t *kind, rbs_node_list_t *annotations, rbs_ast_comment_t *comment, rbs_keyword_t *visibility); +rbs_ast_members_class_instance_variable_t *rbs_ast_members_class_instance_variable_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_ast_comment_t *comment); +rbs_ast_members_class_variable_t *rbs_ast_members_class_variable_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_ast_comment_t *comment); +rbs_ast_members_extend_t *rbs_ast_members_extend_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args, rbs_node_list_t *annotations, rbs_ast_comment_t *comment); +rbs_ast_members_include_t *rbs_ast_members_include_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args, rbs_node_list_t *annotations, rbs_ast_comment_t *comment); +rbs_ast_members_instance_variable_t *rbs_ast_members_instance_variable_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_ast_comment_t *comment); +rbs_ast_members_method_definition_t *rbs_ast_members_method_definition_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_keyword_t *kind, rbs_node_list_t *overloads, rbs_node_list_t *annotations, rbs_ast_comment_t *comment, bool overloading, rbs_keyword_t *visibility); +rbs_ast_members_method_definition_overload_t *rbs_ast_members_method_definition_overload_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *annotations, rbs_node_t *method_type); +rbs_ast_members_prepend_t *rbs_ast_members_prepend_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args, rbs_node_list_t *annotations, rbs_ast_comment_t *comment); +rbs_ast_members_private_t *rbs_ast_members_private_new(rbs_allocator_t *allocator, rbs_location_t *location); +rbs_ast_members_public_t *rbs_ast_members_public_new(rbs_allocator_t *allocator, rbs_location_t *location); +rbs_ast_string_t *rbs_ast_string_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_string_t string); +rbs_ast_type_param_t *rbs_ast_type_param_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_keyword_t *variance, rbs_node_t *upper_bound, rbs_node_t *default_type, bool unchecked); +rbs_method_type_t *rbs_method_type_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *type_params, rbs_node_t *type, rbs_types_block_t *block); +rbs_namespace_t *rbs_namespace_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *path, bool absolute); +rbs_signature_t *rbs_signature_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *directives, rbs_node_list_t *declarations); +rbs_type_name_t *rbs_type_name_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_namespace_t *rbs_namespace, rbs_ast_symbol_t *name); +rbs_types_alias_t *rbs_types_alias_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args); +rbs_types_bases_any_t *rbs_types_bases_any_new(rbs_allocator_t *allocator, rbs_location_t *location, bool todo); +rbs_types_bases_bool_t *rbs_types_bases_bool_new(rbs_allocator_t *allocator, rbs_location_t *location); +rbs_types_bases_bottom_t *rbs_types_bases_bottom_new(rbs_allocator_t *allocator, rbs_location_t *location); +rbs_types_bases_class_t *rbs_types_bases_class_new(rbs_allocator_t *allocator, rbs_location_t *location); +rbs_types_bases_instance_t *rbs_types_bases_instance_new(rbs_allocator_t *allocator, rbs_location_t *location); +rbs_types_bases_nil_t *rbs_types_bases_nil_new(rbs_allocator_t *allocator, rbs_location_t *location); +rbs_types_bases_self_t *rbs_types_bases_self_new(rbs_allocator_t *allocator, rbs_location_t *location); +rbs_types_bases_top_t *rbs_types_bases_top_new(rbs_allocator_t *allocator, rbs_location_t *location); +rbs_types_bases_void_t *rbs_types_bases_void_new(rbs_allocator_t *allocator, rbs_location_t *location); +rbs_types_block_t *rbs_types_block_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *type, bool required, rbs_node_t *self_type); +rbs_types_class_instance_t *rbs_types_class_instance_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args); +rbs_types_class_singleton_t *rbs_types_class_singleton_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name); +rbs_types_function_t *rbs_types_function_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *required_positionals, rbs_node_list_t *optional_positionals, rbs_node_t *rest_positionals, rbs_node_list_t *trailing_positionals, rbs_hash_t *required_keywords, rbs_hash_t *optional_keywords, rbs_node_t *rest_keywords, rbs_node_t *return_type); +rbs_types_function_param_t *rbs_types_function_param_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *type, rbs_ast_symbol_t *name); +rbs_types_interface_t *rbs_types_interface_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args); +rbs_types_intersection_t *rbs_types_intersection_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *types); +rbs_types_literal_t *rbs_types_literal_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *literal); +rbs_types_optional_t *rbs_types_optional_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *type); +rbs_types_proc_t *rbs_types_proc_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *type, rbs_types_block_t *block, rbs_node_t *self_type); +rbs_types_record_t *rbs_types_record_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_hash_t *all_fields); +rbs_types_record_field_type_t *rbs_types_record_field_type_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *type, bool required); +rbs_types_tuple_t *rbs_types_tuple_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *types); +rbs_types_union_t *rbs_types_union_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *types); +rbs_types_untyped_function_t *rbs_types_untyped_function_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *return_type); +rbs_types_variable_t *rbs_types_variable_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name); + +#endif diff --git a/include/rbs/defines.h b/include/rbs/defines.h new file mode 100644 index 000000000..f193ab5f4 --- /dev/null +++ b/include/rbs/defines.h @@ -0,0 +1,77 @@ +/** + * @file defines.h + * + * Macro definitions used throughout the rbs library. + * + * This file should be included first by any *.h or *.c in rbs for consistency + * and to ensure that the macros are defined before they are used. + */ + +#ifndef RBS_DEFINES_H +#define RBS_DEFINES_H + +/*********************************************************************************************************************** + * Copied+modified subset of Prism's `include/prism/defines.h` * + **********************************************************************************************************************/ + +/** + * Certain compilers support specifying that a function accepts variadic + * parameters that look like printf format strings to provide a better developer + * experience when someone is using the function. This macro does that in a + * compiler-agnostic way. + */ +#if defined(__GNUC__) +#if defined(__MINGW_PRINTF_FORMAT) +#define RBS_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(__MINGW_PRINTF_FORMAT, string_index, argument_index))) +#else +#define RBS_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((format(printf, string_index, argument_index))) +#endif +#elif defined(__clang__) +#define RBS_ATTRIBUTE_FORMAT(string_index, argument_index) __attribute__((__format__(__printf__, string_index, argument_index))) +#else +#define RBS_ATTRIBUTE_FORMAT(string_index, argument_index) +#endif + +/** + * Support RBS_LIKELY and RBS_UNLIKELY to help the compiler optimize its + * branch predication. + */ +#if defined(__GNUC__) || defined(__clang__) +/** The compiler should predicate that this branch will be taken. */ +#define RBS_LIKELY(x) __builtin_expect(!!(x), 1) + +/** The compiler should predicate that this branch will not be taken. */ +#define RBS_UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +/** Void because this platform does not support branch prediction hints. */ +#define RBS_LIKELY(x) (x) + +/** Void because this platform does not support branch prediction hints. */ +#define RBS_UNLIKELY(x) (x) +#endif + +/** + * We use -Wimplicit-fallthrough to guard potentially unintended fall-through between cases of a switch. + * Use RBS_FALLTHROUGH to explicitly annotate cases where the fallthrough is intentional. + */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202311L // C23 or later +#define RBS_FALLTHROUGH [[fallthrough]]; +#elif defined(__GNUC__) || defined(__clang__) +#define RBS_FALLTHROUGH __attribute__((fallthrough)); +#elif defined(_MSC_VER) +#define RBS_FALLTHROUGH __fallthrough; +#else +#define RBS_FALLTHROUGH +#endif + +/*********************************************************************************************************************** + * Custom defines for RBS * + **********************************************************************************************************************/ + +#if defined(_MSC_VER) +#define NODISCARD _Check_return_ +#else +#define NODISCARD __attribute__((warn_unused_result)) +#endif + +#endif diff --git a/include/rbs/lexer.h b/include/rbs/lexer.h new file mode 100644 index 000000000..ed7bfa2d6 --- /dev/null +++ b/include/rbs/lexer.h @@ -0,0 +1,199 @@ +#ifndef RBS__LEXER_H +#define RBS__LEXER_H + +#include "string.h" +#include "util/rbs_encoding.h" + +enum RBSTokenType { + NullType, /* (Nothing) */ + pEOF, /* EOF */ + ErrorToken, /* Error */ + + pLPAREN, /* ( */ + pRPAREN, /* ) */ + pCOLON, /* : */ + pCOLON2, /* :: */ + pLBRACKET, /* [ */ + pRBRACKET, /* ] */ + pLBRACE, /* { */ + pRBRACE, /* } */ + pHAT, /* ^ */ + pARROW, /* -> */ + pFATARROW, /* => */ + pCOMMA, /* , */ + pBAR, /* | */ + pAMP, /* & */ + pSTAR, /* * */ + pSTAR2, /* ** */ + pDOT, /* . */ + pDOT3, /* ... */ + pBANG, /* ! */ + pQUESTION, /* ? */ + pLT, /* < */ + pEQ, /* = */ + + kALIAS, /* alias */ + kATTRACCESSOR, /* attr_accessor */ + kATTRREADER, /* attr_reader */ + kATTRWRITER, /* attr_writer */ + kBOOL, /* bool */ + kBOT, /* bot */ + kCLASS, /* class */ + kDEF, /* def */ + kEND, /* end */ + kEXTEND, /* extend */ + kFALSE, /* false */ + kIN, /* in */ + kINCLUDE, /* include */ + kINSTANCE, /* instance */ + kINTERFACE, /* interface */ + kMODULE, /* module */ + kNIL, /* nil */ + kOUT, /* out */ + kPREPEND, /* prepend */ + kPRIVATE, /* private */ + kPUBLIC, /* public */ + kSELF, /* self */ + kSINGLETON, /* singleton */ + kTOP, /* top */ + kTRUE, /* true */ + kTYPE, /* type */ + kUNCHECKED, /* unchecked */ + kUNTYPED, /* untyped */ + kVOID, /* void */ + kUSE, /* use */ + kAS, /* as */ + k__TODO__, /* __todo__ */ + + tLIDENT, /* Identifiers starting with lower case */ + tUIDENT, /* Identifiers starting with upper case */ + tULIDENT, /* Identifiers starting with `_` followed by upper case */ + tULLIDENT, /* Identifiers starting with `_` followed by lower case */ + tGIDENT, /* Identifiers starting with `$` */ + tAIDENT, /* Identifiers starting with `@` */ + tA2IDENT, /* Identifiers starting with `@@` */ + tBANGIDENT, /* Identifiers ending with `!` */ + tEQIDENT, /* Identifiers ending with `=` */ + tQIDENT, /* Quoted identifier */ + pAREF_OPR, /* [] */ + tOPERATOR, /* Operator identifier */ + + tCOMMENT, /* Comment */ + tLINECOMMENT, /* Comment of all line */ + + tTRIVIA, /* Trivia tokens -- space and new line */ + + tDQSTRING, /* Double quoted string */ + tSQSTRING, /* Single quoted string */ + tINTEGER, /* Integer */ + tSYMBOL, /* Symbol */ + tDQSYMBOL, /* Double quoted symbol */ + tSQSYMBOL, /* Single quoted symbol */ + tANNOTATION, /* Annotation */ +}; + +/** + * The `byte_pos` (or `char_pos`) is the primary data. + * The rest are cache. + * + * They can be computed from `byte_pos` (or `char_pos`), but it needs full scan from the beginning of the string (depending on the encoding). + * */ +typedef struct { + int byte_pos; + int char_pos; + int line; + int column; +} rbs_position_t; + +typedef struct { + rbs_position_t start; + rbs_position_t end; +} rbs_range_t; + +typedef struct { + enum RBSTokenType type; + rbs_range_t range; +} rbs_token_t; + +/** + * The lexer state is the curren token. + * + * ``` + #. 0.1.2.3.4.5.6.7.8.9.0.1.2.3.4.5.6 + * ... " a s t r i n g t o k e n " + * ^ start position (0) + * ^ current position (6) + * ^ current character ('i', bytes = 1) + * ~~~~~~~~~~~ Token => "a str + * ``` + * */ +typedef struct { + rbs_string_t string; + int start_pos; /* The character position that defines the start of the input */ + int end_pos; /* The character position that defines the end of the input */ + rbs_position_t current; /* The current position: just before the current_character */ + rbs_position_t start; /* The start position of the current token */ + + unsigned int current_code_point; /* Current character code point */ + size_t current_character_bytes; /* Current character byte length (0 or 1~4) */ + + bool first_token_of_line; /* This flag is used for tLINECOMMENT */ + + const rbs_encoding_t *encoding; +} rbs_lexer_t; + +extern const rbs_token_t NullToken; +extern const rbs_position_t NullPosition; +extern const rbs_range_t NULL_RANGE; + +char *rbs_peek_token(rbs_lexer_t *lexer, rbs_token_t tok); +int rbs_token_chars(rbs_token_t tok); +int rbs_token_bytes(rbs_token_t tok); + +#define rbs_null_position_p(pos) (pos.byte_pos == -1) +#define rbs_null_range_p(range) (range.start.byte_pos == -1) +#define rbs_nonnull_pos_or(pos1, pos2) (rbs_null_position_p(pos1) ? pos2 : pos1) +#define RBS_RANGE_BYTES(range) (range.end.byte_pos - range.start.byte_pos) + +const char *rbs_token_type_str(enum RBSTokenType type); + +/** + * Returns the next character. + * */ +unsigned int rbs_peek(rbs_lexer_t *lexer); + +/** + * Advances the current position by one character. + * */ +void rbs_skip(rbs_lexer_t *lexer); + +/** + * Read next character and store the codepoint and byte length to the given pointers. + * + * This doesn't update the lexer state. + * Returns `true` if succeeded, or `false` if reached to EOF. + * */ +bool rbs_next_char(rbs_lexer_t *lexer, unsigned int *codepoint, size_t *bytes); + +/** + * Skip n characters. + * */ +void rbs_skipn(rbs_lexer_t *lexer, size_t size); + +/** + * Return new rbs_token_t with given type. + * */ +rbs_token_t rbs_next_token(rbs_lexer_t *lexer, enum RBSTokenType type); + +/** + * Return new rbs_token_t with EOF type. + * */ +rbs_token_t rbs_next_eof_token(rbs_lexer_t *lexer); + +rbs_token_t rbs_lexer_next_token(rbs_lexer_t *lexer); + +void rbs_print_token(rbs_token_t tok); + +void rbs_print_lexer(rbs_lexer_t *lexer); + +#endif diff --git a/include/rbs/location.h b/include/rbs/location.h new file mode 100644 index 000000000..3ca26c017 --- /dev/null +++ b/include/rbs/location.h @@ -0,0 +1,59 @@ +#ifndef RBS__RBS_LOCATION_H +#define RBS__RBS_LOCATION_H + +#include "lexer.h" + +#include "rbs/util/rbs_constant_pool.h" +#include "rbs/util/rbs_allocator.h" + +typedef struct { + int start; + int end; +} rbs_loc_range; + +typedef struct { + rbs_constant_id_t name; + rbs_loc_range rg; +} rbs_loc_entry; + +typedef unsigned int rbs_loc_entry_bitmap; + +// The flexible array always allocates, but it's okay. +// This struct is not allocated when the `rbs_loc` doesn't have children. +typedef struct { + unsigned short len; + unsigned short cap; + rbs_loc_entry_bitmap required_p; + rbs_loc_entry entries[1]; +} rbs_loc_children; + +typedef struct rbs_location { + rbs_range_t rg; + rbs_loc_children *children; +} rbs_location_t; + +typedef struct rbs_location_list_node { + rbs_location_t *loc; + struct rbs_location_list_node *next; +} rbs_location_list_node_t; + +typedef struct rbs_location_list { + rbs_allocator_t *allocator; + rbs_location_list_node_t *head; + rbs_location_list_node_t *tail; + size_t length; +} rbs_location_list_t; + +void rbs_loc_alloc_children(rbs_allocator_t *, rbs_location_t *loc, size_t capacity); +void rbs_loc_add_required_child(rbs_location_t *loc, rbs_constant_id_t name, rbs_range_t r); +void rbs_loc_add_optional_child(rbs_location_t *loc, rbs_constant_id_t name, rbs_range_t r); + +/** + * Allocate new rbs_location_t object through the given allocator. + * */ +rbs_location_t *rbs_location_new(rbs_allocator_t *, rbs_range_t rg); + +rbs_location_list_t *rbs_location_list_new(rbs_allocator_t *allocator); +void rbs_location_list_append(rbs_location_list_t *list, rbs_location_t *loc); + +#endif diff --git a/include/rbs/parser.h b/include/rbs/parser.h new file mode 100644 index 000000000..91f7510e9 --- /dev/null +++ b/include/rbs/parser.h @@ -0,0 +1,133 @@ +#ifndef RBS__PARSER_H +#define RBS__PARSER_H + +#include "rbs/defines.h" +#include "rbs/util/rbs_allocator.h" +#include "rbs/util/rbs_constant_pool.h" +#include "rbs/lexer.h" +#include "rbs/ast.h" + +#include +#include + +/** + * comment represents a sequence of comment lines. + * + * # Comment for the method. + * # + * # ```rb + * # object.foo() # Do something + * # ``` + * # + * def foo: () -> void + * + * A comment object represents the six lines of comments. + * */ +typedef struct rbs_comment_t { + rbs_position_t start; + rbs_position_t end; + + size_t line_tokens_capacity; + size_t line_tokens_count; + rbs_token_t *line_tokens; + + struct rbs_comment_t *next_comment; +} rbs_comment_t; + +typedef struct rbs_error_t { + char *message; + rbs_token_t token; + bool syntax_error; +} rbs_error_t; + +/** + * An RBS parser is a LL(3) parser. + * */ +typedef struct { + rbs_lexer_t *rbs_lexer_t; + + rbs_token_t current_token; + rbs_token_t next_token; /* The first lookahead token */ + rbs_token_t next_token2; /* The second lookahead token */ + rbs_token_t next_token3; /* The third lookahead token */ + + struct id_table *vars; /* Known type variables */ + rbs_comment_t *last_comment; /* Last read comment */ + + rbs_constant_pool_t constant_pool; + rbs_allocator_t *allocator; + rbs_error_t *error; +} rbs_parser_t; + +/** + * Insert new table entry. + * Setting `reset` inserts a _reset_ entry, which stops searching. + * + * ``` + * class Foo[A] + * ^^^ <= push new table with reset + * def foo: [B] () -> [A, B] + * ^^^ <= push new table without reset + * + * class Baz[C] + * ^^^ <= push new table with reset + * end + * end + * ``` + * */ +void rbs_parser_push_typevar_table(rbs_parser_t *parser, bool reset); + +/** + * Insert new type variable into the latest table. + * */ +NODISCARD bool rbs_parser_insert_typevar(rbs_parser_t *parser, rbs_constant_id_t id); + +/** + * Allocate new rbs_lexer_t object. + * + * ``` + * VALUE string = rb_funcall(buffer, rb_intern("content"), 0); + * rbs_lexer_new(string, 0, 31) // New rbs_lexer_t with buffer content + * ``` + * */ +rbs_lexer_t *rbs_lexer_new(rbs_allocator_t *, rbs_string_t string, const rbs_encoding_t *encoding, int start_pos, int end_pos); + +/** + * Allocate new rbs_parser_t object. + * + * ``` + * rbs_parser_new(buffer, string, encoding, 0, 1); + * ``` + * */ +rbs_parser_t *rbs_parser_new(rbs_string_t string, const rbs_encoding_t *encoding, int start_pos, int end_pos); +void rbs_parser_free(rbs_parser_t *parser); + +/** + * Advance one token. + * */ +void rbs_parser_advance(rbs_parser_t *parser); + +void rbs_parser_print(rbs_parser_t *parser); + +/** + * Returns a RBS::Comment object associated with an subject at `subject_line`. + * + * ```rbs + * # Comment1 + * class Foo # This is the subject line for Comment1 + * + * # Comment2 + * %a{annotation} # This is the subject line for Comment2 + * def foo: () -> void + * end + * ``` + * */ +rbs_ast_comment_t *rbs_parser_get_comment(rbs_parser_t *parser, int subject_line); + +void rbs_parser_set_error(rbs_parser_t *parser, rbs_token_t tok, bool syntax_error, const char *fmt, ...) RBS_ATTRIBUTE_FORMAT(4, 5); + +bool rbs_parse_type(rbs_parser_t *parser, rbs_node_t **type); +bool rbs_parse_method_type(rbs_parser_t *parser, rbs_method_type_t **method_type); +bool rbs_parse_signature(rbs_parser_t *parser, rbs_signature_t **signature); + +#endif diff --git a/include/rbs/ruby_objs.h b/include/rbs/ruby_objs.h deleted file mode 100644 index 102e57d40..000000000 --- a/include/rbs/ruby_objs.h +++ /dev/null @@ -1,72 +0,0 @@ -/*----------------------------------------------------------------------------*/ -/* This file is generated by the templates/template.rb script and should not */ -/* be modified manually. */ -/* To change the template see */ -/* templates/include/rbs/ruby_objs.h.erb */ -/*----------------------------------------------------------------------------*/ - -#ifndef RBS__RUBY_OBJS_H -#define RBS__RUBY_OBJS_H - -#include "ruby.h" - -VALUE rbs_ast_annotation(VALUE string, VALUE location); -VALUE rbs_ast_comment(VALUE string, VALUE location); -VALUE rbs_ast_decl_class(VALUE name, VALUE type_params, VALUE super_class, VALUE members, VALUE annotations, VALUE location, VALUE comment); -VALUE rbs_ast_decl_class_super(VALUE name, VALUE args, VALUE location); -VALUE rbs_ast_decl_class_alias(VALUE new_name, VALUE old_name, VALUE location, VALUE comment, VALUE annotations); -VALUE rbs_ast_decl_constant(VALUE name, VALUE type, VALUE location, VALUE comment, VALUE annotations); -VALUE rbs_ast_decl_global(VALUE name, VALUE type, VALUE location, VALUE comment, VALUE annotations); -VALUE rbs_ast_decl_interface(VALUE name, VALUE type_params, VALUE members, VALUE annotations, VALUE location, VALUE comment); -VALUE rbs_ast_decl_module(VALUE name, VALUE type_params, VALUE self_types, VALUE members, VALUE annotations, VALUE location, VALUE comment); -VALUE rbs_ast_decl_module_self(VALUE name, VALUE args, VALUE location); -VALUE rbs_ast_decl_module_alias(VALUE new_name, VALUE old_name, VALUE location, VALUE comment, VALUE annotations); -VALUE rbs_ast_decl_type_alias(VALUE name, VALUE type_params, VALUE type, VALUE annotations, VALUE location, VALUE comment); -VALUE rbs_ast_directives_use(VALUE clauses, VALUE location); -VALUE rbs_ast_directives_use_single_clause(VALUE type_name, VALUE new_name, VALUE location); -VALUE rbs_ast_directives_use_wildcard_clause(VALUE namespace, VALUE location); -VALUE rbs_ast_members_alias(VALUE new_name, VALUE old_name, VALUE kind, VALUE annotations, VALUE location, VALUE comment); -VALUE rbs_ast_members_attr_accessor(VALUE name, VALUE type, VALUE ivar_name, VALUE kind, VALUE annotations, VALUE location, VALUE comment, VALUE visibility); -VALUE rbs_ast_members_attr_reader(VALUE name, VALUE type, VALUE ivar_name, VALUE kind, VALUE annotations, VALUE location, VALUE comment, VALUE visibility); -VALUE rbs_ast_members_attr_writer(VALUE name, VALUE type, VALUE ivar_name, VALUE kind, VALUE annotations, VALUE location, VALUE comment, VALUE visibility); -VALUE rbs_ast_members_class_instance_variable(VALUE name, VALUE type, VALUE location, VALUE comment); -VALUE rbs_ast_members_class_variable(VALUE name, VALUE type, VALUE location, VALUE comment); -VALUE rbs_ast_members_extend(VALUE name, VALUE args, VALUE annotations, VALUE location, VALUE comment); -VALUE rbs_ast_members_include(VALUE name, VALUE args, VALUE annotations, VALUE location, VALUE comment); -VALUE rbs_ast_members_instance_variable(VALUE name, VALUE type, VALUE location, VALUE comment); -VALUE rbs_ast_members_method_definition(VALUE name, VALUE kind, VALUE overloads, VALUE annotations, VALUE location, VALUE comment, VALUE overloading, VALUE visibility); -VALUE rbs_ast_members_method_definition_overload(VALUE annotations, VALUE method_type); -VALUE rbs_ast_members_prepend(VALUE name, VALUE args, VALUE annotations, VALUE location, VALUE comment); -VALUE rbs_ast_members_private(VALUE location); -VALUE rbs_ast_members_public(VALUE location); -VALUE rbs_ast_type_param(VALUE name, VALUE variance, VALUE upper_bound, VALUE default_type, VALUE unchecked, VALUE location); -VALUE rbs_method_type(VALUE type_params, VALUE type, VALUE block, VALUE location); -VALUE rbs_namespace(VALUE path, VALUE absolute); -VALUE rbs_type_name(VALUE namespace, VALUE name); -VALUE rbs_alias(VALUE name, VALUE args, VALUE location); -VALUE rbs_bases_any(VALUE todo, VALUE location); -VALUE rbs_bases_bool(VALUE location); -VALUE rbs_bases_bottom(VALUE location); -VALUE rbs_bases_class(VALUE location); -VALUE rbs_bases_instance(VALUE location); -VALUE rbs_bases_nil(VALUE location); -VALUE rbs_bases_self(VALUE location); -VALUE rbs_bases_top(VALUE location); -VALUE rbs_bases_void(VALUE location); -VALUE rbs_block(VALUE type, VALUE required, VALUE self_type); -VALUE rbs_class_instance(VALUE name, VALUE args, VALUE location); -VALUE rbs_class_singleton(VALUE name, VALUE location); -VALUE rbs_function(VALUE required_positionals, VALUE optional_positionals, VALUE rest_positionals, VALUE trailing_positionals, VALUE required_keywords, VALUE optional_keywords, VALUE rest_keywords, VALUE return_type); -VALUE rbs_function_param(VALUE type, VALUE name, VALUE location); -VALUE rbs_interface(VALUE name, VALUE args, VALUE location); -VALUE rbs_intersection(VALUE types, VALUE location); -VALUE rbs_literal(VALUE literal, VALUE location); -VALUE rbs_optional(VALUE type, VALUE location); -VALUE rbs_proc(VALUE type, VALUE block, VALUE location, VALUE self_type); -VALUE rbs_record(VALUE all_fields, VALUE location); -VALUE rbs_tuple(VALUE types, VALUE location); -VALUE rbs_union(VALUE types, VALUE location); -VALUE rbs_untyped_function(VALUE return_type); -VALUE rbs_variable(VALUE name, VALUE location); - -#endif diff --git a/include/rbs/string.h b/include/rbs/string.h new file mode 100644 index 000000000..d07e1d7cb --- /dev/null +++ b/include/rbs/string.h @@ -0,0 +1,49 @@ +#ifndef RBS__RBS_STRING_H +#define RBS__RBS_STRING_H + +#include +#include +#include "rbs/util/rbs_allocator.h" + +typedef struct { + const char *start; + const char *end; +} rbs_string_t; + +#define RBS_STRING_NULL ((rbs_string_t) { \ + .start = NULL, \ + .end = NULL, \ +}) + +/** + * Returns a new `rbs_string_t` struct + */ +rbs_string_t rbs_string_new(const char *start, const char *end); + +/** + * Copies a portion of the input string into a new owned string. + * @param start_inset Number of characters to exclude from the start + * @param length Number of characters to include + * @return A new owned string that will be freed when the allocator is freed. + */ +rbs_string_t rbs_string_copy_slice(rbs_allocator_t *, rbs_string_t *self, size_t start_inset, size_t length); + +/** + * Drops the leading and trailing whitespace from the given string, in-place. + * @returns A new string that provides a view into the original string `self`. + */ +rbs_string_t rbs_string_strip_whitespace(rbs_string_t *self); + +/** + * Returns the length of the string. + */ +size_t rbs_string_len(const rbs_string_t self); + +/** + * Compares two strings for equality. + */ +bool rbs_string_equal(const rbs_string_t lhs, const rbs_string_t rhs); + +unsigned int rbs_utf8_string_to_codepoint(const rbs_string_t string); + +#endif diff --git a/include/rbs/util/rbs_allocator.h b/include/rbs/util/rbs_allocator.h new file mode 100644 index 000000000..729699e0e --- /dev/null +++ b/include/rbs/util/rbs_allocator.h @@ -0,0 +1,59 @@ +#ifndef RBS_ALLOCATOR_H +#define RBS_ALLOCATOR_H + +#include + +/* Include stdalign.h for C11 and later for alignof support */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +#include +#endif + +/* + * Define a portable alignment macro that works across all supported environments + */ +#if defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L +/* C11 or later - use _Alignof directly (always available in C11+) */ +#define rbs_alignof(type) _Alignof(type) +#elif defined(__cplusplus) && __cplusplus >= 201103L +/* C++11 or later has alignof keyword */ +#define rbs_alignof(type) alignof(type) +#elif defined(__GNUC__) || defined(__clang__) +/* GCC and Clang provide __alignof__ */ +#define rbs_alignof(type) __alignof__(type) +#elif defined(_MSC_VER) +/* MSVC provides __alignof */ +#define rbs_alignof(type) __alignof(type) +#else +/* Fallback using offset trick for other compilers */ +#define rbs_alignof(type) offsetof( \ + struct { char c; type member; }, \ + member \ +) +#endif + +typedef struct rbs_allocator { + // The head of a linked list of pages, starting with the most recently allocated page. + struct rbs_allocator_page *page; + + size_t default_page_payload_size; +} rbs_allocator_t; + +rbs_allocator_t *rbs_allocator_init(void); +void rbs_allocator_free(rbs_allocator_t *); +void *rbs_allocator_malloc_impl(rbs_allocator_t *, /* 1 */ size_t size, size_t alignment); +void *rbs_allocator_malloc_many_impl(rbs_allocator_t *, size_t count, size_t size, size_t alignment); +void *rbs_allocator_calloc_impl(rbs_allocator_t *, size_t count, size_t size, size_t alignment); + +void *rbs_allocator_realloc_impl(rbs_allocator_t *, void *ptr, size_t old_size, size_t new_size, size_t alignment); + +// Use this when allocating memory for a single instance of a type. +#define rbs_allocator_alloc(allocator, type) ((type *) rbs_allocator_malloc_impl((allocator), sizeof(type), rbs_alignof(type))) +// Use this when allocating memory that will be immediately written to in full. +// Such as allocating strings +#define rbs_allocator_alloc_many(allocator, count, type) ((type *) rbs_allocator_malloc_many_impl((allocator), (count), sizeof(type), rbs_alignof(type))) +// Use this when allocating memory that will NOT be immediately written to in full. +// Such as allocating buffers +#define rbs_allocator_calloc(allocator, count, type) ((type *) rbs_allocator_calloc_impl((allocator), (count), sizeof(type), rbs_alignof(type))) +#define rbs_allocator_realloc(allocator, ptr, old_size, new_size, type) ((type *) rbs_allocator_realloc_impl((allocator), (ptr), (old_size), (new_size), rbs_alignof(type))) + +#endif diff --git a/include/rbs/util/rbs_assert.h b/include/rbs/util/rbs_assert.h new file mode 100644 index 000000000..6a6201f92 --- /dev/null +++ b/include/rbs/util/rbs_assert.h @@ -0,0 +1,9 @@ +#ifndef RBS_ASSERT_H +#define RBS_ASSERT_H + +#include "rbs/defines.h" +#include + +void rbs_assert(bool condition, const char *fmt, ...) RBS_ATTRIBUTE_FORMAT(2, 3); + +#endif diff --git a/include/rbs/util/rbs_buffer.h b/include/rbs/util/rbs_buffer.h new file mode 100644 index 000000000..7cc001ecc --- /dev/null +++ b/include/rbs/util/rbs_buffer.h @@ -0,0 +1,83 @@ +#ifndef RBS__RBS_BUFFER_H +#define RBS__RBS_BUFFER_H + +#include "rbs/util/rbs_allocator.h" +#include "rbs/string.h" + +#include +#include +#include + +/** + * The default capacity of a rbs_buffer_t. + * If the buffer needs to grow beyond this capacity, it will be doubled. + */ +#define RBS_BUFFER_DEFAULT_CAPACITY 128 + +/** + * A rbs_buffer_t is a simple memory buffer that stores data in a contiguous block of memory. + */ +typedef struct { + /** The length of the buffer in bytes. */ + size_t length; + + /** The capacity of the buffer in bytes that has been allocated. */ + size_t capacity; + + /** A pointer to the start of the buffer. */ + char *value; +} rbs_buffer_t; + +/** + * Initialize a rbs_buffer_t with its default values. + * + * @param allocator The allocator to use. + * @param buffer The buffer to initialize. + * @returns True if the buffer was initialized successfully, false otherwise. + */ +bool rbs_buffer_init(rbs_allocator_t *, rbs_buffer_t *buffer); + +/** + * Return the value of the buffer. + * + * @param buffer The buffer to get the value of. + * @returns The value of the buffer. + */ +char *rbs_buffer_value(const rbs_buffer_t *buffer); + +/** + * Return the length of the buffer. + * + * @param buffer The buffer to get the length of. + * @returns The length of the buffer. + */ +size_t rbs_buffer_length(const rbs_buffer_t *buffer); + +/** + * Append a C string to the buffer. + * + * @param allocator The allocator to use. + * @param buffer The buffer to append to. + * @param value The C string to append. + */ +void rbs_buffer_append_cstr(rbs_allocator_t *, rbs_buffer_t *buffer, const char *value); + +/** + * Append a string to the buffer. + * + * @param allocator The allocator to use. + * @param buffer The buffer to append to. + * @param value The string to append. + * @param length The length of the string to append. + */ +void rbs_buffer_append_string(rbs_allocator_t *, rbs_buffer_t *buffer, const char *value, size_t length); + +/** + * Convert the buffer to a rbs_string_t. + * + * @param buffer The buffer to convert. + * @returns The converted rbs_string_t. + */ +rbs_string_t rbs_buffer_to_string(rbs_buffer_t *buffer); + +#endif diff --git a/include/rbs/util/rbs_constant_pool.h b/include/rbs/util/rbs_constant_pool.h index c63b1dea9..30d569f61 100644 --- a/include/rbs/util/rbs_constant_pool.h +++ b/include/rbs/util/rbs_constant_pool.h @@ -10,6 +10,8 @@ #ifndef RBS_CONSTANT_POOL_H #define RBS_CONSTANT_POOL_H +#include "rbs/util/rbs_encoding.h" + #include #include #include @@ -28,70 +30,6 @@ */ typedef uint32_t rbs_constant_id_t; -/** - * A list of constant IDs. Usually used to represent a set of locals. - */ -typedef struct { - /** The number of constant ids in the list. */ - size_t size; - - /** The number of constant ids that have been allocated in the list. */ - size_t capacity; - - /** The constant ids in the list. */ - rbs_constant_id_t *ids; -} rbs_constant_id_list_t; - -/** - * Initialize a list of constant ids. - * - * @param list The list to initialize. - */ -void rbs_constant_id_list_init(rbs_constant_id_list_t *list); - -/** - * Initialize a list of constant ids with a given capacity. - * - * @param list The list to initialize. - * @param capacity The initial capacity of the list. - */ -void rbs_constant_id_list_init_capacity(rbs_constant_id_list_t *list, size_t capacity); - -/** - * Append a constant id to a list of constant ids. Returns false if any - * potential reallocations fail. - * - * @param list The list to append to. - * @param id The id to append. - * @return Whether the append succeeded. - */ -bool rbs_constant_id_list_append(rbs_constant_id_list_t *list, rbs_constant_id_t id); - -/** - * Insert a constant id into a list of constant ids at the specified index. - * - * @param list The list to insert into. - * @param index The index at which to insert. - * @param id The id to insert. - */ -void rbs_constant_id_list_insert(rbs_constant_id_list_t *list, size_t index, rbs_constant_id_t id); - -/** - * Checks if the current constant id list includes the given constant id. - * - * @param list The list to check. - * @param id The id to check for. - * @return Whether the list includes the given id. - */ -bool rbs_constant_id_list_includes(rbs_constant_id_list_t *list, rbs_constant_id_t id); - -/** - * Free the memory associated with a list of constant ids. - * - * @param list The list to free. - */ -void rbs_constant_id_list_free(rbs_constant_id_list_t *list); - /** * The type of bucket in the constant pool hash map. This determines how the * bucket should be freed. @@ -110,10 +48,10 @@ static const rbs_constant_pool_bucket_type_t RBS_CONSTANT_POOL_BUCKET_CONSTANT = /** A bucket in the hash map. */ typedef struct { /** The incremental ID used for indexing back into the pool. */ - unsigned int id: 30; + unsigned int id : 30; /** The type of the bucket, which determines how to free it. */ - rbs_constant_pool_bucket_type_t type: 2; + rbs_constant_pool_bucket_type_t type : 2; /** The hash of the bucket. */ uint32_t hash; @@ -162,7 +100,7 @@ bool rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity); * @param constant_id The id of the constant to get. * @return A pointer to the constant. */ -rbs_constant_t * rbs_constant_pool_id_to_constant(const rbs_constant_pool_t *pool, rbs_constant_id_t constant_id); +rbs_constant_t *rbs_constant_pool_id_to_constant(const rbs_constant_pool_t *pool, rbs_constant_id_t constant_id); /** * Find a constant in a constant pool. Returns the id of the constant, or 0 if @@ -185,6 +123,7 @@ rbs_constant_id_t rbs_constant_pool_find(const rbs_constant_pool_t *pool, const * @return The id of the constant. */ rbs_constant_id_t rbs_constant_pool_insert_shared(rbs_constant_pool_t *pool, const uint8_t *start, size_t length); +rbs_constant_id_t rbs_constant_pool_insert_shared_with_encoding(rbs_constant_pool_t *pool, const uint8_t *start, size_t length, const rbs_encoding_t *encoding); /** * Insert a constant into a constant pool from memory that is now owned by the diff --git a/include/rbs/util/rbs_encoding.h b/include/rbs/util/rbs_encoding.h new file mode 100644 index 000000000..48f89f75a --- /dev/null +++ b/include/rbs/util/rbs_encoding.h @@ -0,0 +1,280 @@ +/** + * @file encoding.h + * + * The encoding interface and implementations used by the parser. + */ +#ifndef RBS_RBS_ENCODING_H +#define RBS_RBS_ENCODING_H + +#include +#include +#include +#include + +/** + * This struct defines the functions necessary to implement the encoding + * interface so we can determine how many bytes the subsequent character takes. + * Each callback should return the number of bytes, or 0 if the next bytes are + * invalid for the encoding and type. + */ +typedef struct { + /** + * Return the number of bytes that the next character takes if it is valid + * in the encoding. Does not read more than n bytes. It is assumed that n is + * at least 1. + */ + size_t (*char_width)(const uint8_t *b, ptrdiff_t n); + + /** + * Return the number of bytes that the next character takes if it is valid + * in the encoding and is alphabetical. Does not read more than n bytes. It + * is assumed that n is at least 1. + */ + size_t (*alpha_char)(const uint8_t *b, ptrdiff_t n); + + /** + * Return the number of bytes that the next character takes if it is valid + * in the encoding and is alphanumeric. Does not read more than n bytes. It + * is assumed that n is at least 1. + */ + size_t (*alnum_char)(const uint8_t *b, ptrdiff_t n); + + /** + * Return true if the next character is valid in the encoding and is an + * uppercase character. Does not read more than n bytes. It is assumed that + * n is at least 1. + */ + bool (*isupper_char)(const uint8_t *b, ptrdiff_t n); + + /** + * The name of the encoding. This should correspond to a value that can be + * passed to Encoding.find in Ruby. + */ + const char *name; + + /** + * Return true if the encoding is a multibyte encoding. + */ + bool multibyte; +} rbs_encoding_t; + +/** + * All of the lookup tables use the first bit of each embedded byte to indicate + * whether the codepoint is alphabetical. + */ +#define RBS_ENCODING_ALPHABETIC_BIT 1 << 0 + +/** + * All of the lookup tables use the second bit of each embedded byte to indicate + * whether the codepoint is alphanumeric. + */ +#define RBS_ENCODING_ALPHANUMERIC_BIT 1 << 1 + +/** + * All of the lookup tables use the third bit of each embedded byte to indicate + * whether the codepoint is uppercase. + */ +#define RBS_ENCODING_UPPERCASE_BIT 1 << 2 + +/** + * Return the size of the next character in the UTF-8 encoding. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns The number of bytes that the next character takes if it is valid in + * the encoding, or 0 if it is not. + */ +size_t rbs_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n); + +/** + * Return the size of the next character in the UTF-8 encoding if it is an + * alphabetical character. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns The number of bytes that the next character takes if it is valid in + * the encoding, or 0 if it is not. + */ +size_t rbs_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n); + +/** + * Return the size of the next character in the UTF-8 encoding if it is an + * alphanumeric character. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns The number of bytes that the next character takes if it is valid in + * the encoding, or 0 if it is not. + */ +size_t rbs_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n); + +/** + * Return true if the next character in the UTF-8 encoding if it is an uppercase + * character. + * + * @param b The bytes to read. + * @param n The number of bytes that can be read. + * @returns True if the next character is valid in the encoding and is an + * uppercase character, or false if it is not. + */ +bool rbs_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n); + +/** + * This lookup table is referenced in both the UTF-8 encoding file and the + * parser directly in order to speed up the default encoding processing. It is + * used to indicate whether a character is alphabetical, alphanumeric, or + * uppercase in unicode mappings. + */ +extern const uint8_t rbs_encoding_unicode_table[256]; + +/** + * These are all of the encodings that prism supports. + */ +typedef enum { + RBS_ENCODING_UTF_8 = 0, + RBS_ENCODING_US_ASCII, + RBS_ENCODING_ASCII_8BIT, + RBS_ENCODING_EUC_JP, + RBS_ENCODING_WINDOWS_31J, + +// We optionally support excluding the full set of encodings to only support the +// minimum necessary to process Ruby code without encoding comments. +#ifndef RBS_ENCODING_EXCLUDE_FULL + RBS_ENCODING_BIG5, + RBS_ENCODING_BIG5_HKSCS, + RBS_ENCODING_BIG5_UAO, + RBS_ENCODING_CESU_8, + RBS_ENCODING_CP51932, + RBS_ENCODING_CP850, + RBS_ENCODING_CP852, + RBS_ENCODING_CP855, + RBS_ENCODING_CP949, + RBS_ENCODING_CP950, + RBS_ENCODING_CP951, + RBS_ENCODING_EMACS_MULE, + RBS_ENCODING_EUC_JP_MS, + RBS_ENCODING_EUC_JIS_2004, + RBS_ENCODING_EUC_KR, + RBS_ENCODING_EUC_TW, + RBS_ENCODING_GB12345, + RBS_ENCODING_GB18030, + RBS_ENCODING_GB1988, + RBS_ENCODING_GB2312, + RBS_ENCODING_GBK, + RBS_ENCODING_IBM437, + RBS_ENCODING_IBM720, + RBS_ENCODING_IBM737, + RBS_ENCODING_IBM775, + RBS_ENCODING_IBM852, + RBS_ENCODING_IBM855, + RBS_ENCODING_IBM857, + RBS_ENCODING_IBM860, + RBS_ENCODING_IBM861, + RBS_ENCODING_IBM862, + RBS_ENCODING_IBM863, + RBS_ENCODING_IBM864, + RBS_ENCODING_IBM865, + RBS_ENCODING_IBM866, + RBS_ENCODING_IBM869, + RBS_ENCODING_ISO_8859_1, + RBS_ENCODING_ISO_8859_2, + RBS_ENCODING_ISO_8859_3, + RBS_ENCODING_ISO_8859_4, + RBS_ENCODING_ISO_8859_5, + RBS_ENCODING_ISO_8859_6, + RBS_ENCODING_ISO_8859_7, + RBS_ENCODING_ISO_8859_8, + RBS_ENCODING_ISO_8859_9, + RBS_ENCODING_ISO_8859_10, + RBS_ENCODING_ISO_8859_11, + RBS_ENCODING_ISO_8859_13, + RBS_ENCODING_ISO_8859_14, + RBS_ENCODING_ISO_8859_15, + RBS_ENCODING_ISO_8859_16, + RBS_ENCODING_KOI8_R, + RBS_ENCODING_KOI8_U, + RBS_ENCODING_MAC_CENT_EURO, + RBS_ENCODING_MAC_CROATIAN, + RBS_ENCODING_MAC_CYRILLIC, + RBS_ENCODING_MAC_GREEK, + RBS_ENCODING_MAC_ICELAND, + RBS_ENCODING_MAC_JAPANESE, + RBS_ENCODING_MAC_ROMAN, + RBS_ENCODING_MAC_ROMANIA, + RBS_ENCODING_MAC_THAI, + RBS_ENCODING_MAC_TURKISH, + RBS_ENCODING_MAC_UKRAINE, + RBS_ENCODING_SHIFT_JIS, + RBS_ENCODING_SJIS_DOCOMO, + RBS_ENCODING_SJIS_KDDI, + RBS_ENCODING_SJIS_SOFTBANK, + RBS_ENCODING_STATELESS_ISO_2022_JP, + RBS_ENCODING_STATELESS_ISO_2022_JP_KDDI, + RBS_ENCODING_TIS_620, + RBS_ENCODING_UTF8_MAC, + RBS_ENCODING_UTF8_DOCOMO, + RBS_ENCODING_UTF8_KDDI, + RBS_ENCODING_UTF8_SOFTBANK, + RBS_ENCODING_WINDOWS_1250, + RBS_ENCODING_WINDOWS_1251, + RBS_ENCODING_WINDOWS_1252, + RBS_ENCODING_WINDOWS_1253, + RBS_ENCODING_WINDOWS_1254, + RBS_ENCODING_WINDOWS_1255, + RBS_ENCODING_WINDOWS_1256, + RBS_ENCODING_WINDOWS_1257, + RBS_ENCODING_WINDOWS_1258, + RBS_ENCODING_WINDOWS_874, +#endif + + RBS_ENCODING_MAXIMUM +} rbs_encoding_type_t; + +/** + * This is the table of all of the encodings that prism supports. + */ +extern const rbs_encoding_t rbs_encodings[RBS_ENCODING_MAXIMUM]; + +/** + * This is the default UTF-8 encoding. We need a reference to it to quickly + * create parsers. + */ +#define RBS_ENCODING_UTF_8_ENTRY (&rbs_encodings[RBS_ENCODING_UTF_8]) + +/** + * This is the US-ASCII encoding. We need a reference to it to be able to + * compare against it when a string is being created because it could possibly + * need to fall back to ASCII-8BIT. + */ +#define RBS_ENCODING_US_ASCII_ENTRY (&rbs_encodings[RBS_ENCODING_US_ASCII]) + +/** + * This is the ASCII-8BIT encoding. We need a reference to it so that rbs_strpbrk + * can compare against it because invalid multibyte characters are not a thing + * in this encoding. It is also needed for handling Regexp encoding flags. + */ +#define RBS_ENCODING_ASCII_8BIT_ENTRY (&rbs_encodings[RBS_ENCODING_ASCII_8BIT]) + +/** + * This is the EUC-JP encoding. We need a reference to it to quickly process + * regular expression modifiers. + */ +#define RBS_ENCODING_EUC_JP_ENTRY (&rbs_encodings[RBS_ENCODING_EUC_JP]) + +/** + * This is the Windows-31J encoding. We need a reference to it to quickly + * process regular expression modifiers. + */ +#define RBS_ENCODING_WINDOWS_31J_ENTRY (&rbs_encodings[RBS_ENCODING_WINDOWS_31J]) + +/** + * Parse the given name of an encoding and return a pointer to the corresponding + * encoding struct if one can be found, otherwise return NULL. + * + * @param start A pointer to the first byte of the name. + * @param end A pointer to the last byte of the name. + * @returns A pointer to the encoding struct if one is found, otherwise NULL. + */ +const rbs_encoding_t *rbs_encoding_find(const uint8_t *start, const uint8_t *end); + +#endif diff --git a/include/rbs/util/rbs_unescape.h b/include/rbs/util/rbs_unescape.h new file mode 100644 index 000000000..cc551cbdd --- /dev/null +++ b/include/rbs/util/rbs_unescape.h @@ -0,0 +1,23 @@ +#ifndef RBS_RBS_UNESCAPE_H +#define RBS_RBS_UNESCAPE_H + +#include +#include "rbs/util/rbs_allocator.h" +#include "rbs/string.h" + +/** + * Receives `rbs_parser_t` and `range`, which represents a string token or symbol token, and returns a string VALUE. + * + * Input token | Output string + * ------------+------------- + * "foo\\n" | foo\n + * 'foo' | foo + * `bar` | bar + * :"baz\\t" | baz\t + * :'baz' | baz + * + * @returns A new owned string that will be freed when the allocator is freed. + * */ +rbs_string_t rbs_unquote_string(rbs_allocator_t *, const rbs_string_t input); + +#endif // RBS_RBS_UNESCAPE_H diff --git a/src/ast.c b/src/ast.c new file mode 100644 index 000000000..ee63fbd80 --- /dev/null +++ b/src/ast.c @@ -0,0 +1,1256 @@ +/*----------------------------------------------------------------------------*/ +/* This file is generated by the templates/template.rb script and should not */ +/* be modified manually. */ +/* To change the template see */ +/* templates/src/ast.c.erb */ +/*----------------------------------------------------------------------------*/ + +#line 2 "prism/templates/src/ast.c.erb" +#include "rbs/ast.h" + +#include +#include + +const char *rbs_node_type_name(rbs_node_t *node) { + switch (node->type) { + case RBS_AST_ANNOTATION: + return "RBS::AST::Annotation"; + case RBS_AST_BOOL: + return "RBS::AST::Bool"; + case RBS_AST_COMMENT: + return "RBS::AST::Comment"; + case RBS_AST_DECLARATIONS_CLASS: + return "RBS::AST::Declarations::Class"; + case RBS_AST_DECLARATIONS_CLASS_SUPER: + return "RBS::AST::Declarations::Class::Super"; + case RBS_AST_DECLARATIONS_CLASS_ALIAS: + return "RBS::AST::Declarations::ClassAlias"; + case RBS_AST_DECLARATIONS_CONSTANT: + return "RBS::AST::Declarations::Constant"; + case RBS_AST_DECLARATIONS_GLOBAL: + return "RBS::AST::Declarations::Global"; + case RBS_AST_DECLARATIONS_INTERFACE: + return "RBS::AST::Declarations::Interface"; + case RBS_AST_DECLARATIONS_MODULE: + return "RBS::AST::Declarations::Module"; + case RBS_AST_DECLARATIONS_MODULE_SELF: + return "RBS::AST::Declarations::Module::Self"; + case RBS_AST_DECLARATIONS_MODULE_ALIAS: + return "RBS::AST::Declarations::ModuleAlias"; + case RBS_AST_DECLARATIONS_TYPE_ALIAS: + return "RBS::AST::Declarations::TypeAlias"; + case RBS_AST_DIRECTIVES_USE: + return "RBS::AST::Directives::Use"; + case RBS_AST_DIRECTIVES_USE_SINGLE_CLAUSE: + return "RBS::AST::Directives::Use::SingleClause"; + case RBS_AST_DIRECTIVES_USE_WILDCARD_CLAUSE: + return "RBS::AST::Directives::Use::WildcardClause"; + case RBS_AST_INTEGER: + return "RBS::AST::Integer"; + case RBS_AST_MEMBERS_ALIAS: + return "RBS::AST::Members::Alias"; + case RBS_AST_MEMBERS_ATTR_ACCESSOR: + return "RBS::AST::Members::AttrAccessor"; + case RBS_AST_MEMBERS_ATTR_READER: + return "RBS::AST::Members::AttrReader"; + case RBS_AST_MEMBERS_ATTR_WRITER: + return "RBS::AST::Members::AttrWriter"; + case RBS_AST_MEMBERS_CLASS_INSTANCE_VARIABLE: + return "RBS::AST::Members::ClassInstanceVariable"; + case RBS_AST_MEMBERS_CLASS_VARIABLE: + return "RBS::AST::Members::ClassVariable"; + case RBS_AST_MEMBERS_EXTEND: + return "RBS::AST::Members::Extend"; + case RBS_AST_MEMBERS_INCLUDE: + return "RBS::AST::Members::Include"; + case RBS_AST_MEMBERS_INSTANCE_VARIABLE: + return "RBS::AST::Members::InstanceVariable"; + case RBS_AST_MEMBERS_METHOD_DEFINITION: + return "RBS::AST::Members::MethodDefinition"; + case RBS_AST_MEMBERS_METHOD_DEFINITION_OVERLOAD: + return "RBS::AST::Members::MethodDefinition::Overload"; + case RBS_AST_MEMBERS_PREPEND: + return "RBS::AST::Members::Prepend"; + case RBS_AST_MEMBERS_PRIVATE: + return "RBS::AST::Members::Private"; + case RBS_AST_MEMBERS_PUBLIC: + return "RBS::AST::Members::Public"; + case RBS_AST_STRING: + return "RBS::AST::String"; + case RBS_AST_TYPE_PARAM: + return "RBS::AST::TypeParam"; + case RBS_METHOD_TYPE: + return "RBS::MethodType"; + case RBS_NAMESPACE: + return "RBS::Namespace"; + case RBS_SIGNATURE: + return "RBS::Signature"; + case RBS_TYPE_NAME: + return "RBS::TypeName"; + case RBS_TYPES_ALIAS: + return "RBS::Types::Alias"; + case RBS_TYPES_BASES_ANY: + return "RBS::Types::Bases::Any"; + case RBS_TYPES_BASES_BOOL: + return "RBS::Types::Bases::Bool"; + case RBS_TYPES_BASES_BOTTOM: + return "RBS::Types::Bases::Bottom"; + case RBS_TYPES_BASES_CLASS: + return "RBS::Types::Bases::Class"; + case RBS_TYPES_BASES_INSTANCE: + return "RBS::Types::Bases::Instance"; + case RBS_TYPES_BASES_NIL: + return "RBS::Types::Bases::Nil"; + case RBS_TYPES_BASES_SELF: + return "RBS::Types::Bases::Self"; + case RBS_TYPES_BASES_TOP: + return "RBS::Types::Bases::Top"; + case RBS_TYPES_BASES_VOID: + return "RBS::Types::Bases::Void"; + case RBS_TYPES_BLOCK: + return "RBS::Types::Block"; + case RBS_TYPES_CLASS_INSTANCE: + return "RBS::Types::ClassInstance"; + case RBS_TYPES_CLASS_SINGLETON: + return "RBS::Types::ClassSingleton"; + case RBS_TYPES_FUNCTION: + return "RBS::Types::Function"; + case RBS_TYPES_FUNCTION_PARAM: + return "RBS::Types::Function::Param"; + case RBS_TYPES_INTERFACE: + return "RBS::Types::Interface"; + case RBS_TYPES_INTERSECTION: + return "RBS::Types::Intersection"; + case RBS_TYPES_LITERAL: + return "RBS::Types::Literal"; + case RBS_TYPES_OPTIONAL: + return "RBS::Types::Optional"; + case RBS_TYPES_PROC: + return "RBS::Types::Proc"; + case RBS_TYPES_RECORD: + return "RBS::Types::Record"; + case RBS_TYPES_RECORD_FIELD_TYPE: + return "RBS::Types::Record::FieldType"; + case RBS_TYPES_TUPLE: + return "RBS::Types::Tuple"; + case RBS_TYPES_UNION: + return "RBS::Types::Union"; + case RBS_TYPES_UNTYPED_FUNCTION: + return "RBS::Types::UntypedFunction"; + case RBS_TYPES_VARIABLE: + return "RBS::Types::Variable"; + case RBS_AST_SYMBOL: + return "Symbol"; + default: + return "Unknown"; + } +} + +/* rbs_node_list */ + +rbs_node_list_t *rbs_node_list_new(rbs_allocator_t *allocator) { + rbs_node_list_t *list = rbs_allocator_alloc(allocator, rbs_node_list_t); + *list = (rbs_node_list_t) { + .allocator = allocator, + .head = NULL, + .tail = NULL, + .length = 0, + }; + + return list; +} + +void rbs_node_list_append(rbs_node_list_t *list, rbs_node_t *node) { + rbs_node_list_node_t *new_node = rbs_allocator_alloc(list->allocator, rbs_node_list_node_t); + *new_node = (rbs_node_list_node_t) { + .node = node, + .next = NULL, + }; + + if (list->tail == NULL) { + list->head = new_node; + list->tail = new_node; + } else { + list->tail->next = new_node; + list->tail = new_node; + } + + list->length++; +} + +/* rbs_hash */ + +rbs_hash_t *rbs_hash_new(rbs_allocator_t *allocator) { + rbs_hash_t *hash = rbs_allocator_alloc(allocator, rbs_hash_t); + *hash = (rbs_hash_t) { + .allocator = allocator, + .head = NULL, + .tail = NULL, + .length = 0, + }; + + return hash; +} + +bool rbs_node_equal(rbs_node_t *lhs, rbs_node_t *rhs) { + if (lhs == rhs) return true; + if (lhs->type != rhs->type) return false; + + switch (lhs->type) { + case RBS_AST_SYMBOL: + return ((rbs_ast_symbol_t *) lhs)->constant_id == ((rbs_ast_symbol_t *) rhs)->constant_id; + case RBS_KEYWORD: + return ((rbs_keyword_t *) lhs)->constant_id == ((rbs_keyword_t *) rhs)->constant_id; + case RBS_AST_BOOL: + return ((rbs_ast_bool_t *) lhs)->value == ((rbs_ast_bool_t *) rhs)->value; + case RBS_AST_INTEGER: + return rbs_string_equal(((rbs_ast_integer_t *) lhs)->string_representation, ((rbs_ast_integer_t *) rhs)->string_representation); + case RBS_AST_STRING: + return rbs_string_equal(((rbs_ast_string_t *) lhs)->string, ((rbs_ast_string_t *) rhs)->string); + default: + printf("Unhandled node type: %d\n", lhs->type); + return false; + } +} + +rbs_hash_node_t *rbs_hash_find(rbs_hash_t *hash, rbs_node_t *key) { + rbs_hash_node_t *current = hash->head; + + while (current != NULL) { + if (rbs_node_equal(key, current->key)) { + return current; + } + current = current->next; + } + + return NULL; +} + +void rbs_hash_set(rbs_hash_t *hash, rbs_node_t *key, rbs_node_t *value) { + rbs_hash_node_t *existing_node = rbs_hash_find(hash, key); + if (existing_node != NULL) { + existing_node->value = value; + return; + } + + rbs_hash_node_t *new_node = rbs_allocator_alloc(hash->allocator, rbs_hash_node_t); + new_node->key = key; + new_node->value = value; + new_node->next = NULL; + + if (hash->tail == NULL) { + hash->head = new_node; + hash->tail = new_node; + } else { + hash->tail->next = new_node; + hash->tail = new_node; + } +} + +rbs_node_t *rbs_hash_get(rbs_hash_t *hash, rbs_node_t *key) { + rbs_hash_node_t *node = rbs_hash_find(hash, key); + return node ? node->value : NULL; +} + +rbs_keyword_t *rbs_keyword_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_constant_id_t constant_id) { + rbs_keyword_t *instance = rbs_allocator_alloc(allocator, rbs_keyword_t); + + *instance = (rbs_keyword_t) { + .base = (rbs_node_t) { + .type = RBS_KEYWORD, + .location = location, + }, + .constant_id = constant_id, + }; + + return instance; +} + +rbs_ast_symbol_t *rbs_ast_symbol_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_constant_pool_t *constant_pool, rbs_constant_id_t constant_id) { + rbs_ast_symbol_t *instance = rbs_allocator_alloc(allocator, rbs_ast_symbol_t); + + *instance = (rbs_ast_symbol_t) { + .base = (rbs_node_t) { + .type = RBS_AST_SYMBOL, + .location = location, + }, + .constant_id = constant_id, + }; + + return instance; +} + +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_annotation_t *rbs_ast_annotation_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_string_t string) { + rbs_ast_annotation_t *instance = rbs_allocator_alloc(allocator, rbs_ast_annotation_t); + + *instance = (rbs_ast_annotation_t) { + .base = (rbs_node_t) { + .type = RBS_AST_ANNOTATION, + .location = location, + }, + .string = string, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_bool_t *rbs_ast_bool_new(rbs_allocator_t *allocator, rbs_location_t *location, bool value) { + rbs_ast_bool_t *instance = rbs_allocator_alloc(allocator, rbs_ast_bool_t); + + *instance = (rbs_ast_bool_t) { + .base = (rbs_node_t) { + .type = RBS_AST_BOOL, + .location = location, + }, + .value = value, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_comment_t *rbs_ast_comment_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_string_t string) { + rbs_ast_comment_t *instance = rbs_allocator_alloc(allocator, rbs_ast_comment_t); + + *instance = (rbs_ast_comment_t) { + .base = (rbs_node_t) { + .type = RBS_AST_COMMENT, + .location = location, + }, + .string = string, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_declarations_class_t *rbs_ast_declarations_class_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *type_params, rbs_ast_declarations_class_super_t *super_class, rbs_node_list_t *members, rbs_node_list_t *annotations, rbs_ast_comment_t *comment) { + rbs_ast_declarations_class_t *instance = rbs_allocator_alloc(allocator, rbs_ast_declarations_class_t); + + *instance = (rbs_ast_declarations_class_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DECLARATIONS_CLASS, + .location = location, + }, + .name = name, + .type_params = type_params, + .super_class = super_class, + .members = members, + .annotations = annotations, + .comment = comment, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_declarations_class_super_t *rbs_ast_declarations_class_super_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args) { + rbs_ast_declarations_class_super_t *instance = rbs_allocator_alloc(allocator, rbs_ast_declarations_class_super_t); + + *instance = (rbs_ast_declarations_class_super_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DECLARATIONS_CLASS_SUPER, + .location = location, + }, + .name = name, + .args = args, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_declarations_class_alias_t *rbs_ast_declarations_class_alias_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *new_name, rbs_type_name_t *old_name, rbs_ast_comment_t *comment, rbs_node_list_t *annotations) { + rbs_ast_declarations_class_alias_t *instance = rbs_allocator_alloc(allocator, rbs_ast_declarations_class_alias_t); + + *instance = (rbs_ast_declarations_class_alias_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DECLARATIONS_CLASS_ALIAS, + .location = location, + }, + .new_name = new_name, + .old_name = old_name, + .comment = comment, + .annotations = annotations, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_declarations_constant_t *rbs_ast_declarations_constant_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_t *type, rbs_ast_comment_t *comment, rbs_node_list_t *annotations) { + rbs_ast_declarations_constant_t *instance = rbs_allocator_alloc(allocator, rbs_ast_declarations_constant_t); + + *instance = (rbs_ast_declarations_constant_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DECLARATIONS_CONSTANT, + .location = location, + }, + .name = name, + .type = type, + .comment = comment, + .annotations = annotations, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_declarations_global_t *rbs_ast_declarations_global_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_ast_comment_t *comment, rbs_node_list_t *annotations) { + rbs_ast_declarations_global_t *instance = rbs_allocator_alloc(allocator, rbs_ast_declarations_global_t); + + *instance = (rbs_ast_declarations_global_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DECLARATIONS_GLOBAL, + .location = location, + }, + .name = name, + .type = type, + .comment = comment, + .annotations = annotations, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_declarations_interface_t *rbs_ast_declarations_interface_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *type_params, rbs_node_list_t *members, rbs_node_list_t *annotations, rbs_ast_comment_t *comment) { + rbs_ast_declarations_interface_t *instance = rbs_allocator_alloc(allocator, rbs_ast_declarations_interface_t); + + *instance = (rbs_ast_declarations_interface_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DECLARATIONS_INTERFACE, + .location = location, + }, + .name = name, + .type_params = type_params, + .members = members, + .annotations = annotations, + .comment = comment, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_declarations_module_t *rbs_ast_declarations_module_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *type_params, rbs_node_list_t *self_types, rbs_node_list_t *members, rbs_node_list_t *annotations, rbs_ast_comment_t *comment) { + rbs_ast_declarations_module_t *instance = rbs_allocator_alloc(allocator, rbs_ast_declarations_module_t); + + *instance = (rbs_ast_declarations_module_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DECLARATIONS_MODULE, + .location = location, + }, + .name = name, + .type_params = type_params, + .self_types = self_types, + .members = members, + .annotations = annotations, + .comment = comment, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_declarations_module_self_t *rbs_ast_declarations_module_self_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args) { + rbs_ast_declarations_module_self_t *instance = rbs_allocator_alloc(allocator, rbs_ast_declarations_module_self_t); + + *instance = (rbs_ast_declarations_module_self_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DECLARATIONS_MODULE_SELF, + .location = location, + }, + .name = name, + .args = args, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_declarations_module_alias_t *rbs_ast_declarations_module_alias_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *new_name, rbs_type_name_t *old_name, rbs_ast_comment_t *comment, rbs_node_list_t *annotations) { + rbs_ast_declarations_module_alias_t *instance = rbs_allocator_alloc(allocator, rbs_ast_declarations_module_alias_t); + + *instance = (rbs_ast_declarations_module_alias_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DECLARATIONS_MODULE_ALIAS, + .location = location, + }, + .new_name = new_name, + .old_name = old_name, + .comment = comment, + .annotations = annotations, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_declarations_type_alias_t *rbs_ast_declarations_type_alias_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *type_params, rbs_node_t *type, rbs_node_list_t *annotations, rbs_ast_comment_t *comment) { + rbs_ast_declarations_type_alias_t *instance = rbs_allocator_alloc(allocator, rbs_ast_declarations_type_alias_t); + + *instance = (rbs_ast_declarations_type_alias_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DECLARATIONS_TYPE_ALIAS, + .location = location, + }, + .name = name, + .type_params = type_params, + .type = type, + .annotations = annotations, + .comment = comment, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_directives_use_t *rbs_ast_directives_use_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *clauses) { + rbs_ast_directives_use_t *instance = rbs_allocator_alloc(allocator, rbs_ast_directives_use_t); + + *instance = (rbs_ast_directives_use_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DIRECTIVES_USE, + .location = location, + }, + .clauses = clauses, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_directives_use_single_clause_t *rbs_ast_directives_use_single_clause_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *type_name, rbs_ast_symbol_t *new_name) { + rbs_ast_directives_use_single_clause_t *instance = rbs_allocator_alloc(allocator, rbs_ast_directives_use_single_clause_t); + + *instance = (rbs_ast_directives_use_single_clause_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DIRECTIVES_USE_SINGLE_CLAUSE, + .location = location, + }, + .type_name = type_name, + .new_name = new_name, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_directives_use_wildcard_clause_t *rbs_ast_directives_use_wildcard_clause_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_namespace_t *rbs_namespace) { + rbs_ast_directives_use_wildcard_clause_t *instance = rbs_allocator_alloc(allocator, rbs_ast_directives_use_wildcard_clause_t); + + *instance = (rbs_ast_directives_use_wildcard_clause_t) { + .base = (rbs_node_t) { + .type = RBS_AST_DIRECTIVES_USE_WILDCARD_CLAUSE, + .location = location, + }, + .rbs_namespace = rbs_namespace, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_integer_t *rbs_ast_integer_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_string_t string_representation) { + rbs_ast_integer_t *instance = rbs_allocator_alloc(allocator, rbs_ast_integer_t); + + *instance = (rbs_ast_integer_t) { + .base = (rbs_node_t) { + .type = RBS_AST_INTEGER, + .location = location, + }, + .string_representation = string_representation, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_alias_t *rbs_ast_members_alias_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *new_name, rbs_ast_symbol_t *old_name, rbs_keyword_t *kind, rbs_node_list_t *annotations, rbs_ast_comment_t *comment) { + rbs_ast_members_alias_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_alias_t); + + *instance = (rbs_ast_members_alias_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_ALIAS, + .location = location, + }, + .new_name = new_name, + .old_name = old_name, + .kind = kind, + .annotations = annotations, + .comment = comment, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_attr_accessor_t *rbs_ast_members_attr_accessor_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_node_t *ivar_name, rbs_keyword_t *kind, rbs_node_list_t *annotations, rbs_ast_comment_t *comment, rbs_keyword_t *visibility) { + rbs_ast_members_attr_accessor_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_attr_accessor_t); + + *instance = (rbs_ast_members_attr_accessor_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_ATTR_ACCESSOR, + .location = location, + }, + .name = name, + .type = type, + .ivar_name = ivar_name, + .kind = kind, + .annotations = annotations, + .comment = comment, + .visibility = visibility, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_attr_reader_t *rbs_ast_members_attr_reader_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_node_t *ivar_name, rbs_keyword_t *kind, rbs_node_list_t *annotations, rbs_ast_comment_t *comment, rbs_keyword_t *visibility) { + rbs_ast_members_attr_reader_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_attr_reader_t); + + *instance = (rbs_ast_members_attr_reader_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_ATTR_READER, + .location = location, + }, + .name = name, + .type = type, + .ivar_name = ivar_name, + .kind = kind, + .annotations = annotations, + .comment = comment, + .visibility = visibility, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_attr_writer_t *rbs_ast_members_attr_writer_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_node_t *ivar_name, rbs_keyword_t *kind, rbs_node_list_t *annotations, rbs_ast_comment_t *comment, rbs_keyword_t *visibility) { + rbs_ast_members_attr_writer_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_attr_writer_t); + + *instance = (rbs_ast_members_attr_writer_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_ATTR_WRITER, + .location = location, + }, + .name = name, + .type = type, + .ivar_name = ivar_name, + .kind = kind, + .annotations = annotations, + .comment = comment, + .visibility = visibility, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_class_instance_variable_t *rbs_ast_members_class_instance_variable_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_ast_comment_t *comment) { + rbs_ast_members_class_instance_variable_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_class_instance_variable_t); + + *instance = (rbs_ast_members_class_instance_variable_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_CLASS_INSTANCE_VARIABLE, + .location = location, + }, + .name = name, + .type = type, + .comment = comment, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_class_variable_t *rbs_ast_members_class_variable_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_ast_comment_t *comment) { + rbs_ast_members_class_variable_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_class_variable_t); + + *instance = (rbs_ast_members_class_variable_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_CLASS_VARIABLE, + .location = location, + }, + .name = name, + .type = type, + .comment = comment, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_extend_t *rbs_ast_members_extend_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args, rbs_node_list_t *annotations, rbs_ast_comment_t *comment) { + rbs_ast_members_extend_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_extend_t); + + *instance = (rbs_ast_members_extend_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_EXTEND, + .location = location, + }, + .name = name, + .args = args, + .annotations = annotations, + .comment = comment, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_include_t *rbs_ast_members_include_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args, rbs_node_list_t *annotations, rbs_ast_comment_t *comment) { + rbs_ast_members_include_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_include_t); + + *instance = (rbs_ast_members_include_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_INCLUDE, + .location = location, + }, + .name = name, + .args = args, + .annotations = annotations, + .comment = comment, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_instance_variable_t *rbs_ast_members_instance_variable_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_node_t *type, rbs_ast_comment_t *comment) { + rbs_ast_members_instance_variable_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_instance_variable_t); + + *instance = (rbs_ast_members_instance_variable_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_INSTANCE_VARIABLE, + .location = location, + }, + .name = name, + .type = type, + .comment = comment, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_method_definition_t *rbs_ast_members_method_definition_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_keyword_t *kind, rbs_node_list_t *overloads, rbs_node_list_t *annotations, rbs_ast_comment_t *comment, bool overloading, rbs_keyword_t *visibility) { + rbs_ast_members_method_definition_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_method_definition_t); + + *instance = (rbs_ast_members_method_definition_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_METHOD_DEFINITION, + .location = location, + }, + .name = name, + .kind = kind, + .overloads = overloads, + .annotations = annotations, + .comment = comment, + .overloading = overloading, + .visibility = visibility, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_method_definition_overload_t *rbs_ast_members_method_definition_overload_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *annotations, rbs_node_t *method_type) { + rbs_ast_members_method_definition_overload_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_method_definition_overload_t); + + *instance = (rbs_ast_members_method_definition_overload_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_METHOD_DEFINITION_OVERLOAD, + .location = location, + }, + .annotations = annotations, + .method_type = method_type, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_prepend_t *rbs_ast_members_prepend_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args, rbs_node_list_t *annotations, rbs_ast_comment_t *comment) { + rbs_ast_members_prepend_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_prepend_t); + + *instance = (rbs_ast_members_prepend_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_PREPEND, + .location = location, + }, + .name = name, + .args = args, + .annotations = annotations, + .comment = comment, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_private_t *rbs_ast_members_private_new(rbs_allocator_t *allocator, rbs_location_t *location) { + rbs_ast_members_private_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_private_t); + + *instance = (rbs_ast_members_private_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_PRIVATE, + .location = location, + }, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_members_public_t *rbs_ast_members_public_new(rbs_allocator_t *allocator, rbs_location_t *location) { + rbs_ast_members_public_t *instance = rbs_allocator_alloc(allocator, rbs_ast_members_public_t); + + *instance = (rbs_ast_members_public_t) { + .base = (rbs_node_t) { + .type = RBS_AST_MEMBERS_PUBLIC, + .location = location, + }, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_string_t *rbs_ast_string_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_string_t string) { + rbs_ast_string_t *instance = rbs_allocator_alloc(allocator, rbs_ast_string_t); + + *instance = (rbs_ast_string_t) { + .base = (rbs_node_t) { + .type = RBS_AST_STRING, + .location = location, + }, + .string = string, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_ast_type_param_t *rbs_ast_type_param_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name, rbs_keyword_t *variance, rbs_node_t *upper_bound, rbs_node_t *default_type, bool unchecked) { + rbs_ast_type_param_t *instance = rbs_allocator_alloc(allocator, rbs_ast_type_param_t); + + *instance = (rbs_ast_type_param_t) { + .base = (rbs_node_t) { + .type = RBS_AST_TYPE_PARAM, + .location = location, + }, + .name = name, + .variance = variance, + .upper_bound = upper_bound, + .default_type = default_type, + .unchecked = unchecked, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_method_type_t *rbs_method_type_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *type_params, rbs_node_t *type, rbs_types_block_t *block) { + rbs_method_type_t *instance = rbs_allocator_alloc(allocator, rbs_method_type_t); + + *instance = (rbs_method_type_t) { + .base = (rbs_node_t) { + .type = RBS_METHOD_TYPE, + .location = location, + }, + .type_params = type_params, + .type = type, + .block = block, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_namespace_t *rbs_namespace_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *path, bool absolute) { + rbs_namespace_t *instance = rbs_allocator_alloc(allocator, rbs_namespace_t); + + *instance = (rbs_namespace_t) { + .base = (rbs_node_t) { + .type = RBS_NAMESPACE, + .location = location, + }, + .path = path, + .absolute = absolute, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_signature_t *rbs_signature_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *directives, rbs_node_list_t *declarations) { + rbs_signature_t *instance = rbs_allocator_alloc(allocator, rbs_signature_t); + + *instance = (rbs_signature_t) { + .base = (rbs_node_t) { + .type = RBS_SIGNATURE, + .location = location, + }, + .directives = directives, + .declarations = declarations, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_type_name_t *rbs_type_name_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_namespace_t *rbs_namespace, rbs_ast_symbol_t *name) { + rbs_type_name_t *instance = rbs_allocator_alloc(allocator, rbs_type_name_t); + + *instance = (rbs_type_name_t) { + .base = (rbs_node_t) { + .type = RBS_TYPE_NAME, + .location = location, + }, + .rbs_namespace = rbs_namespace, + .name = name, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_alias_t *rbs_types_alias_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args) { + rbs_types_alias_t *instance = rbs_allocator_alloc(allocator, rbs_types_alias_t); + + *instance = (rbs_types_alias_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_ALIAS, + .location = location, + }, + .name = name, + .args = args, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_bases_any_t *rbs_types_bases_any_new(rbs_allocator_t *allocator, rbs_location_t *location, bool todo) { + rbs_types_bases_any_t *instance = rbs_allocator_alloc(allocator, rbs_types_bases_any_t); + + *instance = (rbs_types_bases_any_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_BASES_ANY, + .location = location, + }, + .todo = todo, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_bases_bool_t *rbs_types_bases_bool_new(rbs_allocator_t *allocator, rbs_location_t *location) { + rbs_types_bases_bool_t *instance = rbs_allocator_alloc(allocator, rbs_types_bases_bool_t); + + *instance = (rbs_types_bases_bool_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_BASES_BOOL, + .location = location, + }, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_bases_bottom_t *rbs_types_bases_bottom_new(rbs_allocator_t *allocator, rbs_location_t *location) { + rbs_types_bases_bottom_t *instance = rbs_allocator_alloc(allocator, rbs_types_bases_bottom_t); + + *instance = (rbs_types_bases_bottom_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_BASES_BOTTOM, + .location = location, + }, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_bases_class_t *rbs_types_bases_class_new(rbs_allocator_t *allocator, rbs_location_t *location) { + rbs_types_bases_class_t *instance = rbs_allocator_alloc(allocator, rbs_types_bases_class_t); + + *instance = (rbs_types_bases_class_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_BASES_CLASS, + .location = location, + }, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_bases_instance_t *rbs_types_bases_instance_new(rbs_allocator_t *allocator, rbs_location_t *location) { + rbs_types_bases_instance_t *instance = rbs_allocator_alloc(allocator, rbs_types_bases_instance_t); + + *instance = (rbs_types_bases_instance_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_BASES_INSTANCE, + .location = location, + }, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_bases_nil_t *rbs_types_bases_nil_new(rbs_allocator_t *allocator, rbs_location_t *location) { + rbs_types_bases_nil_t *instance = rbs_allocator_alloc(allocator, rbs_types_bases_nil_t); + + *instance = (rbs_types_bases_nil_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_BASES_NIL, + .location = location, + }, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_bases_self_t *rbs_types_bases_self_new(rbs_allocator_t *allocator, rbs_location_t *location) { + rbs_types_bases_self_t *instance = rbs_allocator_alloc(allocator, rbs_types_bases_self_t); + + *instance = (rbs_types_bases_self_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_BASES_SELF, + .location = location, + }, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_bases_top_t *rbs_types_bases_top_new(rbs_allocator_t *allocator, rbs_location_t *location) { + rbs_types_bases_top_t *instance = rbs_allocator_alloc(allocator, rbs_types_bases_top_t); + + *instance = (rbs_types_bases_top_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_BASES_TOP, + .location = location, + }, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_bases_void_t *rbs_types_bases_void_new(rbs_allocator_t *allocator, rbs_location_t *location) { + rbs_types_bases_void_t *instance = rbs_allocator_alloc(allocator, rbs_types_bases_void_t); + + *instance = (rbs_types_bases_void_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_BASES_VOID, + .location = location, + }, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_block_t *rbs_types_block_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *type, bool required, rbs_node_t *self_type) { + rbs_types_block_t *instance = rbs_allocator_alloc(allocator, rbs_types_block_t); + + *instance = (rbs_types_block_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_BLOCK, + .location = location, + }, + .type = type, + .required = required, + .self_type = self_type, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_class_instance_t *rbs_types_class_instance_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args) { + rbs_types_class_instance_t *instance = rbs_allocator_alloc(allocator, rbs_types_class_instance_t); + + *instance = (rbs_types_class_instance_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_CLASS_INSTANCE, + .location = location, + }, + .name = name, + .args = args, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_class_singleton_t *rbs_types_class_singleton_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name) { + rbs_types_class_singleton_t *instance = rbs_allocator_alloc(allocator, rbs_types_class_singleton_t); + + *instance = (rbs_types_class_singleton_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_CLASS_SINGLETON, + .location = location, + }, + .name = name, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_function_t *rbs_types_function_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *required_positionals, rbs_node_list_t *optional_positionals, rbs_node_t *rest_positionals, rbs_node_list_t *trailing_positionals, rbs_hash_t *required_keywords, rbs_hash_t *optional_keywords, rbs_node_t *rest_keywords, rbs_node_t *return_type) { + rbs_types_function_t *instance = rbs_allocator_alloc(allocator, rbs_types_function_t); + + *instance = (rbs_types_function_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_FUNCTION, + .location = location, + }, + .required_positionals = required_positionals, + .optional_positionals = optional_positionals, + .rest_positionals = rest_positionals, + .trailing_positionals = trailing_positionals, + .required_keywords = required_keywords, + .optional_keywords = optional_keywords, + .rest_keywords = rest_keywords, + .return_type = return_type, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_function_param_t *rbs_types_function_param_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *type, rbs_ast_symbol_t *name) { + rbs_types_function_param_t *instance = rbs_allocator_alloc(allocator, rbs_types_function_param_t); + + *instance = (rbs_types_function_param_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_FUNCTION_PARAM, + .location = location, + }, + .type = type, + .name = name, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_interface_t *rbs_types_interface_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_type_name_t *name, rbs_node_list_t *args) { + rbs_types_interface_t *instance = rbs_allocator_alloc(allocator, rbs_types_interface_t); + + *instance = (rbs_types_interface_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_INTERFACE, + .location = location, + }, + .name = name, + .args = args, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_intersection_t *rbs_types_intersection_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *types) { + rbs_types_intersection_t *instance = rbs_allocator_alloc(allocator, rbs_types_intersection_t); + + *instance = (rbs_types_intersection_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_INTERSECTION, + .location = location, + }, + .types = types, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_literal_t *rbs_types_literal_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *literal) { + rbs_types_literal_t *instance = rbs_allocator_alloc(allocator, rbs_types_literal_t); + + *instance = (rbs_types_literal_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_LITERAL, + .location = location, + }, + .literal = literal, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_optional_t *rbs_types_optional_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *type) { + rbs_types_optional_t *instance = rbs_allocator_alloc(allocator, rbs_types_optional_t); + + *instance = (rbs_types_optional_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_OPTIONAL, + .location = location, + }, + .type = type, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_proc_t *rbs_types_proc_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *type, rbs_types_block_t *block, rbs_node_t *self_type) { + rbs_types_proc_t *instance = rbs_allocator_alloc(allocator, rbs_types_proc_t); + + *instance = (rbs_types_proc_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_PROC, + .location = location, + }, + .type = type, + .block = block, + .self_type = self_type, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_record_t *rbs_types_record_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_hash_t *all_fields) { + rbs_types_record_t *instance = rbs_allocator_alloc(allocator, rbs_types_record_t); + + *instance = (rbs_types_record_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_RECORD, + .location = location, + }, + .all_fields = all_fields, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_record_field_type_t *rbs_types_record_field_type_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *type, bool required) { + rbs_types_record_field_type_t *instance = rbs_allocator_alloc(allocator, rbs_types_record_field_type_t); + + *instance = (rbs_types_record_field_type_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_RECORD_FIELD_TYPE, + .location = location, + }, + .type = type, + .required = required, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_tuple_t *rbs_types_tuple_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *types) { + rbs_types_tuple_t *instance = rbs_allocator_alloc(allocator, rbs_types_tuple_t); + + *instance = (rbs_types_tuple_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_TUPLE, + .location = location, + }, + .types = types, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_union_t *rbs_types_union_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_list_t *types) { + rbs_types_union_t *instance = rbs_allocator_alloc(allocator, rbs_types_union_t); + + *instance = (rbs_types_union_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_UNION, + .location = location, + }, + .types = types, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_untyped_function_t *rbs_types_untyped_function_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_node_t *return_type) { + rbs_types_untyped_function_t *instance = rbs_allocator_alloc(allocator, rbs_types_untyped_function_t); + + *instance = (rbs_types_untyped_function_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_UNTYPED_FUNCTION, + .location = location, + }, + .return_type = return_type, + }; + + return instance; +} +#line 156 "prism/templates/src/ast.c.erb" +rbs_types_variable_t *rbs_types_variable_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_ast_symbol_t *name) { + rbs_types_variable_t *instance = rbs_allocator_alloc(allocator, rbs_types_variable_t); + + *instance = (rbs_types_variable_t) { + .base = (rbs_node_t) { + .type = RBS_TYPES_VARIABLE, + .location = location, + }, + .name = name, + }; + + return instance; +} diff --git a/src/constants.c b/src/constants.c deleted file mode 100644 index dd1e2ee5c..000000000 --- a/src/constants.c +++ /dev/null @@ -1,153 +0,0 @@ -/*----------------------------------------------------------------------------*/ -/* This file is generated by the templates/template.rb script and should not */ -/* be modified manually. */ -/* To change the template see */ -/* templates/src/constants.c.erb */ -/*----------------------------------------------------------------------------*/ - -#include "rbs_extension.h" - -VALUE RBS_Parser; - -VALUE RBS; -VALUE RBS_AST; -VALUE RBS_AST_Declarations; -VALUE RBS_AST_Directives; -VALUE RBS_AST_Members; -VALUE RBS_Parser; -VALUE RBS_Types; -VALUE RBS_Types_Bases; - -VALUE RBS_AST_Annotation; -VALUE RBS_AST_Comment; -VALUE RBS_AST_Declarations_Class; -VALUE RBS_AST_Declarations_Class_Super; -VALUE RBS_AST_Declarations_ClassAlias; -VALUE RBS_AST_Declarations_Constant; -VALUE RBS_AST_Declarations_Global; -VALUE RBS_AST_Declarations_Interface; -VALUE RBS_AST_Declarations_Module; -VALUE RBS_AST_Declarations_Module_Self; -VALUE RBS_AST_Declarations_ModuleAlias; -VALUE RBS_AST_Declarations_TypeAlias; -VALUE RBS_AST_Directives_Use; -VALUE RBS_AST_Directives_Use_SingleClause; -VALUE RBS_AST_Directives_Use_WildcardClause; -VALUE RBS_AST_Members_Alias; -VALUE RBS_AST_Members_AttrAccessor; -VALUE RBS_AST_Members_AttrReader; -VALUE RBS_AST_Members_AttrWriter; -VALUE RBS_AST_Members_ClassInstanceVariable; -VALUE RBS_AST_Members_ClassVariable; -VALUE RBS_AST_Members_Extend; -VALUE RBS_AST_Members_Include; -VALUE RBS_AST_Members_InstanceVariable; -VALUE RBS_AST_Members_MethodDefinition; -VALUE RBS_AST_Members_MethodDefinition_Overload; -VALUE RBS_AST_Members_Prepend; -VALUE RBS_AST_Members_Private; -VALUE RBS_AST_Members_Public; -VALUE RBS_AST_TypeParam; -VALUE RBS_MethodType; -VALUE RBS_Namespace; -VALUE RBS_TypeName; -VALUE RBS_Types_Alias; -VALUE RBS_Types_Bases_Any; -VALUE RBS_Types_Bases_Bool; -VALUE RBS_Types_Bases_Bottom; -VALUE RBS_Types_Bases_Class; -VALUE RBS_Types_Bases_Instance; -VALUE RBS_Types_Bases_Nil; -VALUE RBS_Types_Bases_Self; -VALUE RBS_Types_Bases_Top; -VALUE RBS_Types_Bases_Void; -VALUE RBS_Types_Block; -VALUE RBS_Types_ClassInstance; -VALUE RBS_Types_ClassSingleton; -VALUE RBS_Types_Function; -VALUE RBS_Types_Function_Param; -VALUE RBS_Types_Interface; -VALUE RBS_Types_Intersection; -VALUE RBS_Types_Literal; -VALUE RBS_Types_Optional; -VALUE RBS_Types_Proc; -VALUE RBS_Types_Record; -VALUE RBS_Types_Tuple; -VALUE RBS_Types_Union; -VALUE RBS_Types_UntypedFunction; -VALUE RBS_Types_Variable; - -VALUE RBS_ParsingError; - -#define IMPORT_CONSTANT(var, parent, name) { var = rb_const_get(parent, rb_intern(name)); rb_gc_register_mark_object(var); } - -void rbs__init_constants(void) { - IMPORT_CONSTANT(RBS, rb_cObject, "RBS"); - IMPORT_CONSTANT(RBS_ParsingError, RBS, "ParsingError"); - - IMPORT_CONSTANT(RBS_AST, RBS, "AST"); - IMPORT_CONSTANT(RBS_AST_Declarations, RBS_AST, "Declarations"); - IMPORT_CONSTANT(RBS_AST_Directives, RBS_AST, "Directives"); - IMPORT_CONSTANT(RBS_AST_Members, RBS_AST, "Members"); - IMPORT_CONSTANT(RBS_Types, RBS, "Types"); - IMPORT_CONSTANT(RBS_Types_Bases, RBS_Types, "Bases"); - - IMPORT_CONSTANT(RBS_AST_Annotation, RBS_AST, "Annotation"); - IMPORT_CONSTANT(RBS_AST_Comment, RBS_AST, "Comment"); - IMPORT_CONSTANT(RBS_AST_Declarations_Class, RBS_AST_Declarations, "Class"); - IMPORT_CONSTANT(RBS_AST_Declarations_Class_Super, RBS_AST_Declarations_Class, "Super"); - IMPORT_CONSTANT(RBS_AST_Declarations_ClassAlias, RBS_AST_Declarations, "ClassAlias"); - IMPORT_CONSTANT(RBS_AST_Declarations_Constant, RBS_AST_Declarations, "Constant"); - IMPORT_CONSTANT(RBS_AST_Declarations_Global, RBS_AST_Declarations, "Global"); - IMPORT_CONSTANT(RBS_AST_Declarations_Interface, RBS_AST_Declarations, "Interface"); - IMPORT_CONSTANT(RBS_AST_Declarations_Module, RBS_AST_Declarations, "Module"); - IMPORT_CONSTANT(RBS_AST_Declarations_Module_Self, RBS_AST_Declarations_Module, "Self"); - IMPORT_CONSTANT(RBS_AST_Declarations_ModuleAlias, RBS_AST_Declarations, "ModuleAlias"); - IMPORT_CONSTANT(RBS_AST_Declarations_TypeAlias, RBS_AST_Declarations, "TypeAlias"); - IMPORT_CONSTANT(RBS_AST_Directives_Use, RBS_AST_Directives, "Use"); - IMPORT_CONSTANT(RBS_AST_Directives_Use_SingleClause, RBS_AST_Directives_Use, "SingleClause"); - IMPORT_CONSTANT(RBS_AST_Directives_Use_WildcardClause, RBS_AST_Directives_Use, "WildcardClause"); - IMPORT_CONSTANT(RBS_AST_Members_Alias, RBS_AST_Members, "Alias"); - IMPORT_CONSTANT(RBS_AST_Members_AttrAccessor, RBS_AST_Members, "AttrAccessor"); - IMPORT_CONSTANT(RBS_AST_Members_AttrReader, RBS_AST_Members, "AttrReader"); - IMPORT_CONSTANT(RBS_AST_Members_AttrWriter, RBS_AST_Members, "AttrWriter"); - IMPORT_CONSTANT(RBS_AST_Members_ClassInstanceVariable, RBS_AST_Members, "ClassInstanceVariable"); - IMPORT_CONSTANT(RBS_AST_Members_ClassVariable, RBS_AST_Members, "ClassVariable"); - IMPORT_CONSTANT(RBS_AST_Members_Extend, RBS_AST_Members, "Extend"); - IMPORT_CONSTANT(RBS_AST_Members_Include, RBS_AST_Members, "Include"); - IMPORT_CONSTANT(RBS_AST_Members_InstanceVariable, RBS_AST_Members, "InstanceVariable"); - IMPORT_CONSTANT(RBS_AST_Members_MethodDefinition, RBS_AST_Members, "MethodDefinition"); - IMPORT_CONSTANT(RBS_AST_Members_MethodDefinition_Overload, RBS_AST_Members_MethodDefinition, "Overload"); - IMPORT_CONSTANT(RBS_AST_Members_Prepend, RBS_AST_Members, "Prepend"); - IMPORT_CONSTANT(RBS_AST_Members_Private, RBS_AST_Members, "Private"); - IMPORT_CONSTANT(RBS_AST_Members_Public, RBS_AST_Members, "Public"); - IMPORT_CONSTANT(RBS_AST_TypeParam, RBS_AST, "TypeParam"); - IMPORT_CONSTANT(RBS_MethodType, RBS, "MethodType"); - IMPORT_CONSTANT(RBS_Namespace, RBS, "Namespace"); - IMPORT_CONSTANT(RBS_TypeName, RBS, "TypeName"); - IMPORT_CONSTANT(RBS_Types_Alias, RBS_Types, "Alias"); - IMPORT_CONSTANT(RBS_Types_Bases_Any, RBS_Types_Bases, "Any"); - IMPORT_CONSTANT(RBS_Types_Bases_Bool, RBS_Types_Bases, "Bool"); - IMPORT_CONSTANT(RBS_Types_Bases_Bottom, RBS_Types_Bases, "Bottom"); - IMPORT_CONSTANT(RBS_Types_Bases_Class, RBS_Types_Bases, "Class"); - IMPORT_CONSTANT(RBS_Types_Bases_Instance, RBS_Types_Bases, "Instance"); - IMPORT_CONSTANT(RBS_Types_Bases_Nil, RBS_Types_Bases, "Nil"); - IMPORT_CONSTANT(RBS_Types_Bases_Self, RBS_Types_Bases, "Self"); - IMPORT_CONSTANT(RBS_Types_Bases_Top, RBS_Types_Bases, "Top"); - IMPORT_CONSTANT(RBS_Types_Bases_Void, RBS_Types_Bases, "Void"); - IMPORT_CONSTANT(RBS_Types_Block, RBS_Types, "Block"); - IMPORT_CONSTANT(RBS_Types_ClassInstance, RBS_Types, "ClassInstance"); - IMPORT_CONSTANT(RBS_Types_ClassSingleton, RBS_Types, "ClassSingleton"); - IMPORT_CONSTANT(RBS_Types_Function, RBS_Types, "Function"); - IMPORT_CONSTANT(RBS_Types_Function_Param, RBS_Types_Function, "Param"); - IMPORT_CONSTANT(RBS_Types_Interface, RBS_Types, "Interface"); - IMPORT_CONSTANT(RBS_Types_Intersection, RBS_Types, "Intersection"); - IMPORT_CONSTANT(RBS_Types_Literal, RBS_Types, "Literal"); - IMPORT_CONSTANT(RBS_Types_Optional, RBS_Types, "Optional"); - IMPORT_CONSTANT(RBS_Types_Proc, RBS_Types, "Proc"); - IMPORT_CONSTANT(RBS_Types_Record, RBS_Types, "Record"); - IMPORT_CONSTANT(RBS_Types_Tuple, RBS_Types, "Tuple"); - IMPORT_CONSTANT(RBS_Types_Union, RBS_Types, "Union"); - IMPORT_CONSTANT(RBS_Types_UntypedFunction, RBS_Types, "UntypedFunction"); - IMPORT_CONSTANT(RBS_Types_Variable, RBS_Types, "Variable"); -} diff --git a/src/lexer.c b/src/lexer.c new file mode 100644 index 000000000..47b54abcd --- /dev/null +++ b/src/lexer.c @@ -0,0 +1,2956 @@ +/* Generated by re2c 4.3 */ +#line 1 "src/lexer.re" +#include "rbs/lexer.h" + +rbs_token_t rbs_lexer_next_token(rbs_lexer_t *lexer) { + rbs_lexer_t backup; + + backup = *lexer; + +#line 12 "src/lexer.c" + { + unsigned int yych; + unsigned int yyaccept = 0; + yych = rbs_peek(lexer); + switch (yych) { + case 0x00000000: + goto yy1; + case '\t': + case ' ': + goto yy4; + case '\n': + case '\r': + goto yy6; + case '!': + goto yy7; + case '"': + goto yy9; + case '#': + goto yy10; + case '$': + goto yy12; + case '%': + goto yy13; + case '&': + goto yy14; + case '\'': + goto yy15; + case '(': + goto yy16; + case ')': + goto yy17; + case '*': + goto yy18; + case '+': + goto yy19; + case ',': + goto yy20; + case '-': + goto yy21; + case '.': + goto yy22; + case '/': + case '~': + goto yy24; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + goto yy25; + case ':': + goto yy27; + case '<': + goto yy29; + case '=': + goto yy31; + case '>': + goto yy33; + case '?': + goto yy34; + case '@': + goto yy35; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + goto yy36; + case '[': + goto yy38; + case ']': + goto yy39; + case '^': + goto yy40; + case '_': + goto yy41; + case '`': + goto yy43; + case 'a': + goto yy45; + case 'b': + goto yy47; + case 'c': + goto yy48; + case 'd': + goto yy49; + case 'e': + goto yy50; + case 'f': + goto yy51; + case 'g': + case 'h': + case 'j': + case 'k': + case 'l': + case 'q': + case 'r': + case 'w': + case 'x': + case 'y': + case 'z': + goto yy52; + case 'i': + goto yy54; + case 'm': + goto yy55; + case 'n': + goto yy56; + case 'o': + goto yy57; + case 'p': + goto yy58; + case 's': + goto yy59; + case 't': + goto yy60; + case 'u': + goto yy61; + case 'v': + goto yy62; + case '{': + goto yy63; + case '|': + goto yy64; + case '}': + goto yy65; + default: + goto yy2; + } + yy1: + rbs_skip(lexer); +#line 144 "src/lexer.re" + { + return rbs_next_eof_token(lexer); + } +#line 121 "src/lexer.c" + yy2: + rbs_skip(lexer); + yy3: +#line 145 "src/lexer.re" + { + return rbs_next_token(lexer, ErrorToken); + } +#line 127 "src/lexer.c" + yy4: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '\t') goto yy4; + if (yych == ' ') goto yy4; + yy5: +#line 143 "src/lexer.re" + { + return rbs_next_token(lexer, tTRIVIA); + } +#line 136 "src/lexer.c" + yy6: + rbs_skip(lexer); + goto yy5; + yy7: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '=') goto yy24; + if (yych == '~') goto yy24; + yy8: +#line 48 "src/lexer.re" + { + return rbs_next_token(lexer, tOPERATOR); + } +#line 148 "src/lexer.c" + yy9: + yyaccept = 0; + rbs_skip(lexer); + backup = *lexer; + yych = rbs_peek(lexer); + if (yych <= 0x00000000) goto yy3; + goto yy67; + yy10: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 0x00000000) goto yy11; + if (yych != '\n') goto yy10; + yy11: +#line 59 "src/lexer.re" + { + return rbs_next_token( + lexer, + lexer->first_token_of_line ? tLINECOMMENT : tCOMMENT + ); + } +#line 169 "src/lexer.c" + yy12: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= ')') { + if (yych <= 0x0000001F) { + if (yych <= '\n') { + if (yych <= 0x00000000) goto yy3; + if (yych <= 0x00000008) goto yy71; + goto yy3; + } else { + if (yych == '\r') goto yy3; + goto yy71; + } + } else { + if (yych <= '#') { + if (yych <= ' ') goto yy3; + if (yych <= '"') goto yy73; + goto yy71; + } else { + if (yych == '%') goto yy3; + if (yych <= '\'') goto yy73; + goto yy3; + } + } + } else { + if (yych <= 'Z') { + if (yych <= '/') { + if (yych == '-') goto yy71; + goto yy73; + } else { + if (yych <= '9') goto yy71; + if (yych <= '>') goto yy73; + goto yy71; + } + } else { + if (yych <= '^') { + if (yych == '\\') goto yy73; + goto yy3; + } else { + if (yych <= 'z') goto yy71; + if (yych <= '}') goto yy3; + if (yych <= '~') goto yy73; + goto yy71; + } + } + } + yy13: + yyaccept = 1; + rbs_skip(lexer); + backup = *lexer; + yych = rbs_peek(lexer); + if (yych == 'a') goto yy74; + goto yy8; + yy14: + rbs_skip(lexer); +#line 33 "src/lexer.re" + { + return rbs_next_token(lexer, pAMP); + } +#line 227 "src/lexer.c" + yy15: + yyaccept = 0; + rbs_skip(lexer); + backup = *lexer; + yych = rbs_peek(lexer); + if (yych <= 0x00000000) goto yy3; + goto yy76; + yy16: + rbs_skip(lexer); +#line 24 "src/lexer.re" + { + return rbs_next_token(lexer, pLPAREN); + } +#line 239 "src/lexer.c" + yy17: + rbs_skip(lexer); +#line 25 "src/lexer.re" + { + return rbs_next_token(lexer, pRPAREN); + } +#line 244 "src/lexer.c" + yy18: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '*') goto yy80; +#line 35 "src/lexer.re" + { + return rbs_next_token(lexer, pSTAR); + } +#line 251 "src/lexer.c" + yy19: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '/') goto yy8; + if (yych <= '9') goto yy25; + if (yych == '@') goto yy24; + goto yy8; + yy20: + rbs_skip(lexer); +#line 30 "src/lexer.re" + { + return rbs_next_token(lexer, pCOMMA); + } +#line 263 "src/lexer.c" + yy21: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') goto yy8; + if (yych <= '9') goto yy25; + goto yy8; + } else { + if (yych <= '>') goto yy81; + if (yych == '@') goto yy24; + goto yy8; + } + yy22: + yyaccept = 2; + rbs_skip(lexer); + backup = *lexer; + yych = rbs_peek(lexer); + if (yych == '.') goto yy82; + yy23: +#line 37 "src/lexer.re" + { + return rbs_next_token(lexer, pDOT); + } +#line 285 "src/lexer.c" + yy24: + rbs_skip(lexer); + goto yy8; + yy25: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '/') goto yy26; + if (yych <= '9') goto yy25; + if (yych == '_') goto yy25; + yy26: +#line 51 "src/lexer.re" + { + return rbs_next_token(lexer, tINTEGER); + } +#line 298 "src/lexer.c" + yy27: + yyaccept = 3; + rbs_skip(lexer); + backup = *lexer; + yych = rbs_peek(lexer); + switch (yych) { + case '!': + goto yy83; + case '"': + goto yy85; + case '$': + goto yy86; + case '%': + case '&': + case '/': + case '^': + case '`': + case '|': + case '~': + goto yy87; + case '\'': + goto yy88; + case '*': + goto yy89; + case '+': + case '-': + goto yy90; + case ':': + goto yy91; + case '<': + goto yy92; + case '=': + goto yy93; + case '>': + goto yy94; + case '@': + goto yy95; + case 'A': + case 'B': + case 'C': + case 'D': + case 'E': + case 'F': + case 'G': + case 'H': + case 'I': + case 'J': + case 'K': + case 'L': + case 'M': + case 'N': + case 'O': + case 'P': + case 'Q': + case 'R': + case 'S': + case 'T': + case 'U': + case 'V': + case 'W': + case 'X': + case 'Y': + case 'Z': + case '_': + case 'a': + case 'b': + case 'c': + case 'd': + case 'e': + case 'f': + case 'g': + case 'h': + case 'i': + case 'j': + case 'k': + case 'l': + case 'm': + case 'n': + case 'o': + case 'p': + case 'q': + case 'r': + case 's': + case 't': + case 'u': + case 'v': + case 'w': + case 'x': + case 'y': + case 'z': + goto yy96; + case '[': + goto yy98; + default: + goto yy28; + } + yy28: +#line 44 "src/lexer.re" + { + return rbs_next_token(lexer, pCOLON); + } +#line 383 "src/lexer.c" + yy29: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= ';') goto yy30; + if (yych <= '<') goto yy24; + if (yych <= '=') goto yy99; + yy30: +#line 46 "src/lexer.re" + { + return rbs_next_token(lexer, pLT); + } +#line 393 "src/lexer.c" + yy31: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '>') { + if (yych <= '<') goto yy32; + if (yych <= '=') goto yy100; + goto yy101; + } else { + if (yych == '~') goto yy24; + } + yy32: +#line 43 "src/lexer.re" + { + return rbs_next_token(lexer, pEQ); + } +#line 407 "src/lexer.c" + yy33: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '<') goto yy8; + if (yych <= '>') goto yy24; + goto yy8; + yy34: + rbs_skip(lexer); +#line 34 "src/lexer.re" + { + return rbs_next_token(lexer, pQUESTION); + } +#line 418 "src/lexer.c" + yy35: + yyaccept = 0; + rbs_skip(lexer); + backup = *lexer; + yych = rbs_peek(lexer); + if (yych <= '^') { + if (yych <= '?') goto yy3; + if (yych <= '@') goto yy102; + if (yych <= 'Z') goto yy103; + goto yy3; + } else { + if (yych == '`') goto yy3; + if (yych <= 'z') goto yy103; + goto yy3; + } + yy36: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy36; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy37; + if (yych <= 'Z') goto yy36; + } else { + if (yych == '`') goto yy37; + if (yych <= 'z') goto yy36; + } + } + yy37: +#line 129 "src/lexer.re" + { + return rbs_next_token(lexer, tUIDENT); + } +#line 456 "src/lexer.c" + yy38: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == ']') goto yy107; +#line 26 "src/lexer.re" + { + return rbs_next_token(lexer, pLBRACKET); + } +#line 463 "src/lexer.c" + yy39: + rbs_skip(lexer); +#line 27 "src/lexer.re" + { + return rbs_next_token(lexer, pRBRACKET); + } +#line 468 "src/lexer.c" + yy40: + rbs_skip(lexer); +#line 32 "src/lexer.re" + { + return rbs_next_token(lexer, pHAT); + } +#line 473 "src/lexer.c" + yy41: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy108; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy42; + if (yych <= 'Z') goto yy111; + } else { + if (yych <= '_') goto yy113; + if (yych <= '`') goto yy42; + if (yych <= 'z') goto yy108; + } + } + yy42: +#line 132 "src/lexer.re" + { + return rbs_next_token(lexer, tULLIDENT); + } +#line 497 "src/lexer.c" + yy43: + yyaccept = 4; + rbs_skip(lexer); + backup = *lexer; + yych = rbs_peek(lexer); + if (yych <= ' ') { + if (yych <= 0x00000000) goto yy44; + if (yych <= 0x0000001F) goto yy114; + } else { + if (yych != ':') goto yy114; + } + yy44: +#line 39 "src/lexer.re" + { + return rbs_next_token(lexer, tOPERATOR); + } +#line 512 "src/lexer.c" + yy45: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 'r') { + if (yych == 'l') goto yy115; + goto yy53; + } else { + if (yych <= 's') goto yy116; + if (yych <= 't') goto yy118; + goto yy53; + } + yy46: +#line 128 "src/lexer.re" + { + return rbs_next_token(lexer, tLIDENT); + } +#line 527 "src/lexer.c" + yy47: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'o') goto yy119; + goto yy53; + yy48: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'l') goto yy120; + goto yy53; + yy49: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy121; + goto yy53; + yy50: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'n') goto yy122; + if (yych == 'x') goto yy123; + goto yy53; + yy51: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'a') goto yy124; + goto yy53; + yy52: + rbs_skip(lexer); + yych = rbs_peek(lexer); + yy53: + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + goto yy46; + } else { + if (yych <= '9') goto yy52; + if (yych <= '<') goto yy46; + goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy46; + if (yych <= 'Z') goto yy52; + goto yy46; + } else { + if (yych == '`') goto yy46; + if (yych <= 'z') goto yy52; + goto yy46; + } + } + yy54: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'n') goto yy125; + goto yy53; + yy55: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'o') goto yy127; + goto yy53; + yy56: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'i') goto yy128; + goto yy53; + yy57: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'u') goto yy129; + goto yy53; + yy58: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'r') goto yy130; + if (yych == 'u') goto yy131; + goto yy53; + yy59: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy132; + if (yych == 'i') goto yy133; + goto yy53; + yy60: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 'q') { + if (yych == 'o') goto yy134; + goto yy53; + } else { + if (yych <= 'r') goto yy135; + if (yych == 'y') goto yy136; + goto yy53; + } + yy61: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'n') goto yy137; + if (yych == 's') goto yy138; + goto yy53; + yy62: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'o') goto yy139; + goto yy53; + yy63: + rbs_skip(lexer); +#line 28 "src/lexer.re" + { + return rbs_next_token(lexer, pLBRACE); + } +#line 636 "src/lexer.c" + yy64: + rbs_skip(lexer); +#line 31 "src/lexer.re" + { + return rbs_next_token(lexer, pBAR); + } +#line 641 "src/lexer.c" + yy65: + rbs_skip(lexer); +#line 29 "src/lexer.re" + { + return rbs_next_token(lexer, pRBRACE); + } +#line 646 "src/lexer.c" + yy66: + rbs_skip(lexer); + yych = rbs_peek(lexer); + yy67: + if (yych <= '"') { + if (yych <= 0x00000000) goto yy68; + if (yych <= '!') goto yy66; + goto yy69; + } else { + if (yych == '\\') goto yy70; + goto yy66; + } + yy68: + *lexer = backup; + if (yyaccept <= 3) { + if (yyaccept <= 1) { + if (yyaccept == 0) goto yy3; + else + goto yy8; + } else { + if (yyaccept == 2) goto yy23; + else + goto yy28; + } + } else { + if (yyaccept <= 5) { + if (yyaccept == 4) goto yy44; + else + goto yy78; + } else { + goto yy155; + } + } + yy69: + rbs_skip(lexer); +#line 106 "src/lexer.re" + { + return rbs_next_token(lexer, tDQSTRING); + } +#line 681 "src/lexer.c" + yy70: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'u') goto yy140; + if (yych == 'x') goto yy141; + goto yy66; + yy71: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= ',') { + if (yych <= '\f') { + if (yych <= 0x00000000) goto yy72; + if (yych <= 0x00000008) goto yy71; + if (yych >= '\v') goto yy71; + } else { + if (yych <= 0x0000001F) { + if (yych >= 0x0000000E) goto yy71; + } else { + if (yych == '#') goto yy71; + } + } + } else { + if (yych <= '>') { + if (yych <= '-') goto yy71; + if (yych <= '/') goto yy72; + if (yych <= '9') goto yy71; + } else { + if (yych <= '^') { + if (yych <= 'Z') goto yy71; + } else { + if (yych <= 'z') goto yy71; + if (yych >= 0x0000007F) goto yy71; + } + } + } + yy72: +#line 139 "src/lexer.re" + { + return rbs_next_token(lexer, tGIDENT); + } +#line 720 "src/lexer.c" + yy73: + rbs_skip(lexer); + goto yy72; + yy74: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 'Z') { + if (yych <= '(') { + if (yych <= '\'') goto yy68; + goto yy142; + } else { + if (yych == '<') goto yy143; + goto yy68; + } + } else { + if (yych <= 'z') { + if (yych <= '[') goto yy144; + goto yy68; + } else { + if (yych <= '{') goto yy145; + if (yych <= '|') goto yy146; + goto yy68; + } + } + yy75: + rbs_skip(lexer); + yych = rbs_peek(lexer); + yy76: + if (yych <= '\'') { + if (yych <= 0x00000000) goto yy68; + if (yych <= '&') goto yy75; + } else { + if (yych == '\\') goto yy79; + goto yy75; + } + yy77: + rbs_skip(lexer); + yy78: +#line 107 "src/lexer.re" + { + return rbs_next_token(lexer, tSQSTRING); + } +#line 761 "src/lexer.c" + yy79: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '\'') { + if (yych <= 0x00000000) goto yy68; + if (yych <= '&') goto yy75; + goto yy147; + } else { + if (yych == '\\') goto yy79; + goto yy75; + } + yy80: + rbs_skip(lexer); +#line 36 "src/lexer.re" + { + return rbs_next_token(lexer, pSTAR2); + } +#line 777 "src/lexer.c" + yy81: + rbs_skip(lexer); +#line 41 "src/lexer.re" + { + return rbs_next_token(lexer, pARROW); + } +#line 782 "src/lexer.c" + yy82: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '.') goto yy148; + goto yy68; + yy83: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '=') goto yy87; + if (yych == '~') goto yy87; + yy84: +#line 126 "src/lexer.re" + { + return rbs_next_token(lexer, tSYMBOL); + } +#line 796 "src/lexer.c" + yy85: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '"') { + if (yych <= 0x00000000) goto yy68; + if (yych <= '!') goto yy85; + goto yy149; + } else { + if (yych == '\\') goto yy150; + goto yy85; + } + yy86: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= ')') { + if (yych <= 0x0000001F) { + if (yych <= '\n') { + if (yych <= 0x00000000) goto yy68; + if (yych <= 0x00000008) goto yy151; + goto yy68; + } else { + if (yych == '\r') goto yy68; + goto yy151; + } + } else { + if (yych <= '#') { + if (yych <= ' ') goto yy68; + if (yych <= '"') goto yy153; + goto yy151; + } else { + if (yych == '%') goto yy68; + if (yych <= '\'') goto yy153; + goto yy68; + } + } + } else { + if (yych <= 'Z') { + if (yych <= '/') { + if (yych == '-') goto yy151; + goto yy153; + } else { + if (yych <= '9') goto yy151; + if (yych <= '>') goto yy153; + goto yy151; + } + } else { + if (yych <= '^') { + if (yych == '\\') goto yy153; + goto yy68; + } else { + if (yych <= 'z') goto yy151; + if (yych <= '}') goto yy68; + if (yych <= '~') goto yy153; + goto yy151; + } + } + } + yy87: + rbs_skip(lexer); + goto yy84; + yy88: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '\'') { + if (yych <= 0x00000000) goto yy68; + if (yych <= '&') goto yy88; + goto yy154; + } else { + if (yych == '\\') goto yy156; + goto yy88; + } + yy89: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '*') goto yy87; + goto yy84; + yy90: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '@') goto yy87; + goto yy84; + yy91: + rbs_skip(lexer); +#line 45 "src/lexer.re" + { + return rbs_next_token(lexer, pCOLON2); + } +#line 882 "src/lexer.c" + yy92: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= ';') goto yy84; + if (yych <= '<') goto yy87; + if (yych <= '=') goto yy157; + goto yy84; + yy93: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '=') goto yy158; + if (yych == '~') goto yy87; + goto yy68; + yy94: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '<') goto yy84; + if (yych <= '>') goto yy87; + goto yy84; + yy95: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '^') { + if (yych <= '?') goto yy68; + if (yych <= '@') goto yy159; + if (yych <= 'Z') goto yy160; + goto yy68; + } else { + if (yych == '`') goto yy68; + if (yych <= 'z') goto yy160; + goto yy68; + } + yy96: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '>') { + if (yych <= '/') { + if (yych == '!') goto yy162; + } else { + if (yych <= '9') goto yy96; + if (yych == '=') goto yy162; + } + } else { + if (yych <= '^') { + if (yych <= '?') goto yy162; + if (yych <= '@') goto yy97; + if (yych <= 'Z') goto yy96; + } else { + if (yych == '`') goto yy97; + if (yych <= 'z') goto yy96; + } + } + yy97: +#line 122 "src/lexer.re" + { + return rbs_next_token(lexer, tSYMBOL); + } +#line 938 "src/lexer.c" + yy98: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == ']') goto yy158; + goto yy68; + yy99: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '>') goto yy24; + goto yy8; + yy100: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '=') goto yy24; + goto yy8; + yy101: + rbs_skip(lexer); +#line 42 "src/lexer.re" + { + return rbs_next_token(lexer, pFATARROW); + } +#line 958 "src/lexer.c" + yy102: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '^') { + if (yych <= '@') goto yy68; + if (yych <= 'Z') goto yy163; + goto yy68; + } else { + if (yych == '`') goto yy68; + if (yych <= 'z') goto yy163; + goto yy68; + } + yy103: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 'Z') { + if (yych <= '/') goto yy104; + if (yych <= '9') goto yy103; + if (yych >= 'A') goto yy103; + } else { + if (yych <= '_') { + if (yych >= '_') goto yy103; + } else { + if (yych <= '`') goto yy104; + if (yych <= 'z') goto yy103; + } + } + yy104: +#line 136 "src/lexer.re" + { + return rbs_next_token(lexer, tAIDENT); + } +#line 989 "src/lexer.c" + yy105: + rbs_skip(lexer); +#line 133 "src/lexer.re" + { + return rbs_next_token(lexer, tBANGIDENT); + } +#line 994 "src/lexer.c" + yy106: + rbs_skip(lexer); +#line 134 "src/lexer.re" + { + return rbs_next_token(lexer, tEQIDENT); + } +#line 999 "src/lexer.c" + yy107: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '=') goto yy24; +#line 47 "src/lexer.re" + { + return rbs_next_token(lexer, pAREF_OPR); + } +#line 1006 "src/lexer.c" + yy108: + rbs_skip(lexer); + yych = rbs_peek(lexer); + yy109: + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy108; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy110; + if (yych <= 'Z') goto yy108; + } else { + if (yych == '`') goto yy110; + if (yych <= 'z') goto yy108; + } + } + yy110: +#line 130 "src/lexer.re" + { + return rbs_next_token(lexer, tULLIDENT); + } +#line 1030 "src/lexer.c" + yy111: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy111; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy112; + if (yych <= 'Z') goto yy111; + } else { + if (yych == '`') goto yy112; + if (yych <= 'z') goto yy111; + } + } + yy112: +#line 131 "src/lexer.re" + { + return rbs_next_token(lexer, tULIDENT); + } +#line 1053 "src/lexer.c" + yy113: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 't') goto yy165; + goto yy109; + yy114: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 0x00000000) goto yy68; + if (yych == '`') goto yy166; + goto yy114; + yy115: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'i') goto yy167; + goto yy53; + yy116: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy117; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy117; + if (yych <= 'z') goto yy52; + } + } + yy117: +#line 96 "src/lexer.re" + { + return rbs_next_token(lexer, kAS); + } +#line 1092 "src/lexer.c" + yy118: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 't') goto yy168; + goto yy53; + yy119: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'o') goto yy169; + if (yych == 't') goto yy170; + goto yy53; + yy120: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'a') goto yy172; + goto yy53; + yy121: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'f') goto yy173; + goto yy53; + yy122: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'd') goto yy175; + goto yy53; + yy123: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 't') goto yy177; + goto yy53; + yy124: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'l') goto yy178; + goto yy53; + yy125: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '^') { + if (yych <= '9') { + if (yych == '!') goto yy105; + if (yych >= '0') goto yy52; + } else { + if (yych <= '=') { + if (yych >= '=') goto yy106; + } else { + if (yych <= '@') goto yy126; + if (yych <= 'Z') goto yy52; + } + } + } else { + if (yych <= 'c') { + if (yych == '`') goto yy126; + if (yych <= 'b') goto yy52; + goto yy179; + } else { + if (yych <= 's') { + if (yych <= 'r') goto yy52; + goto yy180; + } else { + if (yych <= 't') goto yy181; + if (yych <= 'z') goto yy52; + } + } + } + yy126: +#line 77 "src/lexer.re" + { + return rbs_next_token(lexer, kIN); + } +#line 1162 "src/lexer.c" + yy127: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'd') goto yy182; + goto yy53; + yy128: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'l') goto yy183; + goto yy53; + yy129: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 't') goto yy185; + goto yy53; + yy130: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy187; + if (yych == 'i') goto yy188; + goto yy53; + yy131: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'b') goto yy189; + goto yy53; + yy132: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'l') goto yy190; + goto yy53; + yy133: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'n') goto yy191; + goto yy53; + yy134: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'p') goto yy192; + goto yy53; + yy135: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'u') goto yy194; + goto yy53; + yy136: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'p') goto yy195; + goto yy53; + yy137: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'c') goto yy196; + if (yych == 't') goto yy197; + goto yy53; + yy138: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy198; + goto yy53; + yy139: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'i') goto yy200; + goto yy53; + yy140: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '@') { + if (yych <= '/') goto yy68; + if (yych <= '9') goto yy201; + goto yy68; + } else { + if (yych <= 'F') goto yy201; + if (yych <= '`') goto yy68; + if (yych <= 'f') goto yy201; + goto yy68; + } + yy141: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '/') goto yy68; + if (yych <= '9') goto yy66; + if (yych <= '`') goto yy68; + if (yych <= 'f') goto yy66; + goto yy68; + yy142: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 0x00000000) goto yy68; + if (yych == ')') goto yy202; + goto yy142; + yy143: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 0x00000000) goto yy68; + if (yych == '>') goto yy203; + goto yy143; + yy144: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 0x00000000) goto yy68; + if (yych == ']') goto yy204; + goto yy144; + yy145: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 0x00000000) goto yy68; + if (yych == '}') goto yy205; + goto yy145; + yy146: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 0x00000000) goto yy68; + if (yych == '|') goto yy206; + goto yy146; + yy147: + yyaccept = 5; + rbs_skip(lexer); + backup = *lexer; + yych = rbs_peek(lexer); + if (yych <= '\'') { + if (yych <= 0x00000000) goto yy78; + if (yych <= '&') goto yy75; + goto yy77; + } else { + if (yych == '\\') goto yy79; + goto yy75; + } + yy148: + rbs_skip(lexer); +#line 38 "src/lexer.re" + { + return rbs_next_token(lexer, pDOT3); + } +#line 1298 "src/lexer.c" + yy149: + rbs_skip(lexer); +#line 108 "src/lexer.re" + { + return rbs_next_token(lexer, tDQSYMBOL); + } +#line 1303 "src/lexer.c" + yy150: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'u') goto yy207; + if (yych == 'x') goto yy208; + goto yy85; + yy151: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= ',') { + if (yych <= '\f') { + if (yych <= 0x00000000) goto yy152; + if (yych <= 0x00000008) goto yy151; + if (yych >= '\v') goto yy151; + } else { + if (yych <= 0x0000001F) { + if (yych >= 0x0000000E) goto yy151; + } else { + if (yych == '#') goto yy151; + } + } + } else { + if (yych <= '>') { + if (yych <= '-') goto yy151; + if (yych <= '/') goto yy152; + if (yych <= '9') goto yy151; + } else { + if (yych <= '^') { + if (yych <= 'Z') goto yy151; + } else { + if (yych <= 'z') goto yy151; + if (yych >= 0x0000007F) goto yy151; + } + } + } + yy152: +#line 125 "src/lexer.re" + { + return rbs_next_token(lexer, tSYMBOL); + } +#line 1342 "src/lexer.c" + yy153: + rbs_skip(lexer); + goto yy152; + yy154: + rbs_skip(lexer); + yy155: +#line 109 "src/lexer.re" + { + return rbs_next_token(lexer, tSQSYMBOL); + } +#line 1351 "src/lexer.c" + yy156: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '\'') { + if (yych <= 0x00000000) goto yy68; + if (yych <= '&') goto yy88; + goto yy209; + } else { + if (yych == '\\') goto yy156; + goto yy88; + } + yy157: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '>') goto yy87; + goto yy84; + yy158: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '=') goto yy87; + goto yy84; + yy159: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '^') { + if (yych <= '@') goto yy68; + if (yych <= 'Z') goto yy210; + goto yy68; + } else { + if (yych == '`') goto yy68; + if (yych <= 'z') goto yy210; + goto yy68; + } + yy160: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '>') { + if (yych <= '/') { + if (yych == '!') goto yy212; + } else { + if (yych <= '9') goto yy160; + if (yych == '=') goto yy212; + } + } else { + if (yych <= '^') { + if (yych <= '?') goto yy212; + if (yych <= '@') goto yy161; + if (yych <= 'Z') goto yy160; + } else { + if (yych == '`') goto yy161; + if (yych <= 'z') goto yy160; + } + } + yy161: +#line 123 "src/lexer.re" + { + return rbs_next_token(lexer, tSYMBOL); + } +#line 1408 "src/lexer.c" + yy162: + rbs_skip(lexer); + goto yy97; + yy163: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 'Z') { + if (yych <= '/') goto yy164; + if (yych <= '9') goto yy163; + if (yych >= 'A') goto yy163; + } else { + if (yych <= '_') { + if (yych >= '_') goto yy163; + } else { + if (yych <= '`') goto yy164; + if (yych <= 'z') goto yy163; + } + } + yy164: +#line 137 "src/lexer.re" + { + return rbs_next_token(lexer, tA2IDENT); + } +#line 1430 "src/lexer.c" + yy165: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'o') goto yy213; + goto yy109; + yy166: + rbs_skip(lexer); +#line 40 "src/lexer.re" + { + return rbs_next_token(lexer, tQIDENT); + } +#line 1440 "src/lexer.c" + yy167: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'a') goto yy214; + goto yy53; + yy168: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'r') goto yy215; + goto yy53; + yy169: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'l') goto yy216; + goto yy53; + yy170: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy171; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy171; + if (yych <= 'z') goto yy52; + } + } + yy171: +#line 71 "src/lexer.re" + { + return rbs_next_token(lexer, kBOT); + } +#line 1478 "src/lexer.c" + yy172: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 's') goto yy218; + goto yy53; + yy173: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy174; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy174; + if (yych <= 'z') goto yy52; + } + } + yy174: +#line 73 "src/lexer.re" + { + return rbs_next_token(lexer, kDEF); + } +#line 1506 "src/lexer.c" + yy175: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy176; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy176; + if (yych <= 'z') goto yy52; + } + } + yy176: +#line 74 "src/lexer.re" + { + return rbs_next_token(lexer, kEND); + } +#line 1529 "src/lexer.c" + yy177: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy219; + goto yy53; + yy178: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 's') goto yy220; + goto yy53; + yy179: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'l') goto yy221; + goto yy53; + yy180: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 't') goto yy222; + goto yy53; + yy181: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy223; + goto yy53; + yy182: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'u') goto yy224; + goto yy53; + yy183: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy184; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy184; + if (yych <= 'z') goto yy52; + } + } + yy184: +#line 82 "src/lexer.re" + { + return rbs_next_token(lexer, kNIL); + } +#line 1582 "src/lexer.c" + yy185: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy186; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy186; + if (yych <= 'z') goto yy52; + } + } + yy186: +#line 83 "src/lexer.re" + { + return rbs_next_token(lexer, kOUT); + } +#line 1605 "src/lexer.c" + yy187: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'p') goto yy225; + goto yy53; + yy188: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'v') goto yy226; + goto yy53; + yy189: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'l') goto yy227; + goto yy53; + yy190: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'f') goto yy228; + goto yy53; + yy191: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'g') goto yy230; + goto yy53; + yy192: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy193; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy193; + if (yych <= 'z') goto yy52; + } + } + yy193: +#line 89 "src/lexer.re" + { + return rbs_next_token(lexer, kTOP); + } +#line 1653 "src/lexer.c" + yy194: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy231; + goto yy53; + yy195: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy233; + goto yy53; + yy196: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'h') goto yy235; + goto yy53; + yy197: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'y') goto yy236; + goto yy53; + yy198: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy199; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy199; + if (yych <= 'z') goto yy52; + } + } + yy199: +#line 95 "src/lexer.re" + { + return rbs_next_token(lexer, kUSE); + } +#line 1696 "src/lexer.c" + yy200: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'd') goto yy237; + goto yy53; + yy201: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '@') { + if (yych <= '/') goto yy68; + if (yych <= '9') goto yy239; + goto yy68; + } else { + if (yych <= 'F') goto yy239; + if (yych <= '`') goto yy68; + if (yych <= 'f') goto yy239; + goto yy68; + } + yy202: + rbs_skip(lexer); +#line 54 "src/lexer.re" + { + return rbs_next_token(lexer, tANNOTATION); + } +#line 1719 "src/lexer.c" + yy203: + rbs_skip(lexer); +#line 57 "src/lexer.re" + { + return rbs_next_token(lexer, tANNOTATION); + } +#line 1724 "src/lexer.c" + yy204: + rbs_skip(lexer); +#line 55 "src/lexer.re" + { + return rbs_next_token(lexer, tANNOTATION); + } +#line 1729 "src/lexer.c" + yy205: + rbs_skip(lexer); +#line 53 "src/lexer.re" + { + return rbs_next_token(lexer, tANNOTATION); + } +#line 1734 "src/lexer.c" + yy206: + rbs_skip(lexer); +#line 56 "src/lexer.re" + { + return rbs_next_token(lexer, tANNOTATION); + } +#line 1739 "src/lexer.c" + yy207: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '@') { + if (yych <= '/') goto yy68; + if (yych <= '9') goto yy240; + goto yy68; + } else { + if (yych <= 'F') goto yy240; + if (yych <= '`') goto yy68; + if (yych <= 'f') goto yy240; + goto yy68; + } + yy208: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '/') goto yy68; + if (yych <= '9') goto yy85; + if (yych <= '`') goto yy68; + if (yych <= 'f') goto yy85; + goto yy68; + yy209: + yyaccept = 6; + rbs_skip(lexer); + backup = *lexer; + yych = rbs_peek(lexer); + if (yych <= '\'') { + if (yych <= 0x00000000) goto yy155; + if (yych <= '&') goto yy88; + goto yy154; + } else { + if (yych == '\\') goto yy156; + goto yy88; + } + yy210: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '>') { + if (yych <= '/') { + if (yych == '!') goto yy241; + } else { + if (yych <= '9') goto yy210; + if (yych == '=') goto yy241; + } + } else { + if (yych <= '^') { + if (yych <= '?') goto yy241; + if (yych <= '@') goto yy211; + if (yych <= 'Z') goto yy210; + } else { + if (yych == '`') goto yy211; + if (yych <= 'z') goto yy210; + } + } + yy211: +#line 124 "src/lexer.re" + { + return rbs_next_token(lexer, tSYMBOL); + } +#line 1797 "src/lexer.c" + yy212: + rbs_skip(lexer); + goto yy161; + yy213: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'd') goto yy242; + goto yy109; + yy214: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 's') goto yy243; + goto yy53; + yy215: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '_') goto yy245; + goto yy53; + yy216: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy217; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy217; + if (yych <= 'z') goto yy52; + } + } + yy217: +#line 70 "src/lexer.re" + { + return rbs_next_token(lexer, kBOOL); + } +#line 1838 "src/lexer.c" + yy218: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 's') goto yy246; + goto yy53; + yy219: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'n') goto yy248; + goto yy53; + yy220: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy249; + goto yy53; + yy221: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'u') goto yy251; + goto yy53; + yy222: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'a') goto yy252; + goto yy53; + yy223: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'r') goto yy253; + goto yy53; + yy224: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'l') goto yy254; + goto yy53; + yy225: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy255; + goto yy53; + yy226: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'a') goto yy256; + goto yy53; + yy227: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'i') goto yy257; + goto yy53; + yy228: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy229; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy229; + if (yych <= 'z') goto yy52; + } + } + yy229: +#line 87 "src/lexer.re" + { + return rbs_next_token(lexer, kSELF); + } +#line 1911 "src/lexer.c" + yy230: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'l') goto yy258; + goto yy53; + yy231: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy232; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy232; + if (yych <= 'z') goto yy52; + } + } + yy232: +#line 90 "src/lexer.re" + { + return rbs_next_token(lexer, kTRUE); + } +#line 1939 "src/lexer.c" + yy233: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy234; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy234; + if (yych <= 'z') goto yy52; + } + } + yy234: +#line 91 "src/lexer.re" + { + return rbs_next_token(lexer, kTYPE); + } +#line 1962 "src/lexer.c" + yy235: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy259; + goto yy53; + yy236: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'p') goto yy260; + goto yy53; + yy237: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy238; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy238; + if (yych <= 'z') goto yy52; + } + } + yy238: +#line 94 "src/lexer.re" + { + return rbs_next_token(lexer, kVOID); + } +#line 1995 "src/lexer.c" + yy239: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '@') { + if (yych <= '/') goto yy68; + if (yych <= '9') goto yy261; + goto yy68; + } else { + if (yych <= 'F') goto yy261; + if (yych <= '`') goto yy68; + if (yych <= 'f') goto yy261; + goto yy68; + } + yy240: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '@') { + if (yych <= '/') goto yy68; + if (yych <= '9') goto yy262; + goto yy68; + } else { + if (yych <= 'F') goto yy262; + if (yych <= '`') goto yy68; + if (yych <= 'f') goto yy262; + goto yy68; + } + yy241: + rbs_skip(lexer); + goto yy211; + yy242: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'o') goto yy263; + goto yy109; + yy243: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy244; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy244; + if (yych <= 'z') goto yy52; + } + } + yy244: +#line 66 "src/lexer.re" + { + return rbs_next_token(lexer, kALIAS); + } +#line 2052 "src/lexer.c" + yy245: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= 'q') { + if (yych == 'a') goto yy264; + goto yy53; + } else { + if (yych <= 'r') goto yy265; + if (yych == 'w') goto yy266; + goto yy53; + } + yy246: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy247; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy247; + if (yych <= 'z') goto yy52; + } + } + yy247: +#line 72 "src/lexer.re" + { + return rbs_next_token(lexer, kCLASS); + } +#line 2086 "src/lexer.c" + yy248: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'd') goto yy267; + goto yy53; + yy249: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy250; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy250; + if (yych <= 'z') goto yy52; + } + } + yy250: +#line 76 "src/lexer.re" + { + return rbs_next_token(lexer, kFALSE); + } +#line 2114 "src/lexer.c" + yy251: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'd') goto yy269; + goto yy53; + yy252: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'n') goto yy270; + goto yy53; + yy253: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'f') goto yy271; + goto yy53; + yy254: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy272; + goto yy53; + yy255: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'n') goto yy274; + goto yy53; + yy256: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 't') goto yy275; + goto yy53; + yy257: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'c') goto yy276; + goto yy53; + yy258: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy278; + goto yy53; + yy259: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'c') goto yy279; + goto yy53; + yy260: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy280; + goto yy53; + yy261: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '@') { + if (yych <= '/') goto yy68; + if (yych <= '9') goto yy66; + goto yy68; + } else { + if (yych <= 'F') goto yy66; + if (yych <= '`') goto yy68; + if (yych <= 'f') goto yy66; + goto yy68; + } + yy262: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '@') { + if (yych <= '/') goto yy68; + if (yych <= '9') goto yy281; + goto yy68; + } else { + if (yych <= 'F') goto yy281; + if (yych <= '`') goto yy68; + if (yych <= 'f') goto yy281; + goto yy68; + } + yy263: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '_') goto yy282; + goto yy109; + yy264: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'c') goto yy283; + goto yy53; + yy265: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy284; + goto yy53; + yy266: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'r') goto yy285; + goto yy53; + yy267: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy268; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy268; + if (yych <= 'z') goto yy52; + } + } + yy268: +#line 75 "src/lexer.re" + { + return rbs_next_token(lexer, kEXTEND); + } +#line 2233 "src/lexer.c" + yy269: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy286; + goto yy53; + yy270: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'c') goto yy288; + goto yy53; + yy271: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'a') goto yy289; + goto yy53; + yy272: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy273; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy273; + if (yych <= 'z') goto yy52; + } + } + yy273: +#line 81 "src/lexer.re" + { + return rbs_next_token(lexer, kMODULE); + } +#line 2271 "src/lexer.c" + yy274: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'd') goto yy290; + goto yy53; + yy275: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy292; + goto yy53; + yy276: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy277; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy277; + if (yych <= 'z') goto yy52; + } + } + yy277: +#line 86 "src/lexer.re" + { + return rbs_next_token(lexer, kPUBLIC); + } +#line 2304 "src/lexer.c" + yy278: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 't') goto yy294; + goto yy53; + yy279: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'k') goto yy295; + goto yy53; + yy280: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'd') goto yy296; + goto yy53; + yy281: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '@') { + if (yych <= '/') goto yy68; + if (yych <= '9') goto yy85; + goto yy68; + } else { + if (yych <= 'F') goto yy85; + if (yych <= '`') goto yy68; + if (yych <= 'f') goto yy85; + goto yy68; + } + yy282: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == '_') goto yy298; + goto yy109; + yy283: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'c') goto yy300; + goto yy53; + yy284: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'a') goto yy301; + goto yy53; + yy285: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'i') goto yy302; + goto yy53; + yy286: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy287; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy287; + if (yych <= 'z') goto yy52; + } + } + yy287: +#line 78 "src/lexer.re" + { + return rbs_next_token(lexer, kINCLUDE); + } +#line 2375 "src/lexer.c" + yy288: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy303; + goto yy53; + yy289: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'c') goto yy305; + goto yy53; + yy290: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy291; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy291; + if (yych <= 'z') goto yy52; + } + } + yy291: +#line 84 "src/lexer.re" + { + return rbs_next_token(lexer, kPREPEND); + } +#line 2408 "src/lexer.c" + yy292: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy293; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy293; + if (yych <= 'z') goto yy52; + } + } + yy293: +#line 85 "src/lexer.re" + { + return rbs_next_token(lexer, kPRIVATE); + } +#line 2431 "src/lexer.c" + yy294: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'o') goto yy306; + goto yy53; + yy295: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy307; + goto yy53; + yy296: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy297; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy297; + if (yych <= 'z') goto yy52; + } + } + yy297: +#line 93 "src/lexer.re" + { + return rbs_next_token(lexer, kUNTYPED); + } +#line 2464 "src/lexer.c" + yy298: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy108; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy299; + if (yych <= 'Z') goto yy108; + } else { + if (yych == '`') goto yy299; + if (yych <= 'z') goto yy108; + } + } + yy299: +#line 97 "src/lexer.re" + { + return rbs_next_token(lexer, k__TODO__); + } +#line 2487 "src/lexer.c" + yy300: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy308; + goto yy53; + yy301: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'd') goto yy309; + goto yy53; + yy302: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 't') goto yy310; + goto yy53; + yy303: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy304; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy304; + if (yych <= 'z') goto yy52; + } + } + yy304: +#line 79 "src/lexer.re" + { + return rbs_next_token(lexer, kINSTANCE); + } +#line 2525 "src/lexer.c" + yy305: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy311; + goto yy53; + yy306: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'n') goto yy313; + goto yy53; + yy307: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'd') goto yy315; + goto yy53; + yy308: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 's') goto yy317; + goto yy53; + yy309: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy318; + goto yy53; + yy310: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'e') goto yy319; + goto yy53; + yy311: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy312; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy312; + if (yych <= 'z') goto yy52; + } + } + yy312: +#line 80 "src/lexer.re" + { + return rbs_next_token(lexer, kINTERFACE); + } +#line 2578 "src/lexer.c" + yy313: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy314; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy314; + if (yych <= 'z') goto yy52; + } + } + yy314: +#line 88 "src/lexer.re" + { + return rbs_next_token(lexer, kSINGLETON); + } +#line 2601 "src/lexer.c" + yy315: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy316; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy316; + if (yych <= 'z') goto yy52; + } + } + yy316: +#line 92 "src/lexer.re" + { + return rbs_next_token(lexer, kUNCHECKED); + } +#line 2624 "src/lexer.c" + yy317: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 's') goto yy320; + goto yy53; + yy318: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'r') goto yy321; + goto yy53; + yy319: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'r') goto yy323; + goto yy53; + yy320: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych == 'o') goto yy325; + goto yy53; + yy321: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy322; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy322; + if (yych <= 'z') goto yy52; + } + } + yy322: +#line 68 "src/lexer.re" + { + return rbs_next_token(lexer, kATTRREADER); + } +#line 2667 "src/lexer.c" + yy323: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy324; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy324; + if (yych <= 'z') goto yy52; + } + } + yy324: +#line 69 "src/lexer.re" + { + return rbs_next_token(lexer, kATTRWRITER); + } +#line 2690 "src/lexer.c" + yy325: + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych != 'r') goto yy53; + rbs_skip(lexer); + yych = rbs_peek(lexer); + if (yych <= '=') { + if (yych <= '/') { + if (yych == '!') goto yy105; + } else { + if (yych <= '9') goto yy52; + if (yych >= '=') goto yy106; + } + } else { + if (yych <= '^') { + if (yych <= '@') goto yy326; + if (yych <= 'Z') goto yy52; + } else { + if (yych == '`') goto yy326; + if (yych <= 'z') goto yy52; + } + } + yy326: +#line 67 "src/lexer.re" + { + return rbs_next_token(lexer, kATTRACCESSOR); + } +#line 2716 "src/lexer.c" + } +#line 146 "src/lexer.re" +} diff --git a/src/lexer.re b/src/lexer.re new file mode 100644 index 000000000..575a0c301 --- /dev/null +++ b/src/lexer.re @@ -0,0 +1,147 @@ +#include "rbs/lexer.h" + +rbs_token_t rbs_lexer_next_token(rbs_lexer_t *lexer) { + rbs_lexer_t backup; + + backup = *lexer; + + /*!re2c + re2c:flags:u = 1; + re2c:api:style = free-form; + re2c:flags:input = custom; + re2c:define:YYCTYPE = "unsigned int"; + re2c:define:YYPEEK = "rbs_peek(lexer)"; + re2c:define:YYSKIP = "rbs_skip(lexer);"; + re2c:define:YYBACKUP = "backup = *lexer;"; + re2c:define:YYRESTORE = "*lexer = backup;"; + re2c:yyfill:enable = 0; + + word = [a-zA-Z0-9_]; + + operator = "/" | "~" | "[]=" | "!" | "!=" | "!~" | "-" | "-@" | "+" | "+@" + | "==" | "===" | "=~" | "<<" | "<=" | "<=>" | ">" | ">=" | ">>" | "%"; + + "(" { return rbs_next_token(lexer, pLPAREN); } + ")" { return rbs_next_token(lexer, pRPAREN); } + "[" { return rbs_next_token(lexer, pLBRACKET); } + "]" { return rbs_next_token(lexer, pRBRACKET); } + "{" { return rbs_next_token(lexer, pLBRACE); } + "}" { return rbs_next_token(lexer, pRBRACE); } + "," { return rbs_next_token(lexer, pCOMMA); } + "|" { return rbs_next_token(lexer, pBAR); } + "^" { return rbs_next_token(lexer, pHAT); } + "&" { return rbs_next_token(lexer, pAMP); } + "?" { return rbs_next_token(lexer, pQUESTION); } + "*" { return rbs_next_token(lexer, pSTAR); } + "**" { return rbs_next_token(lexer, pSTAR2); } + "." { return rbs_next_token(lexer, pDOT); } + "..." { return rbs_next_token(lexer, pDOT3); } + "`" { return rbs_next_token(lexer, tOPERATOR); } + "`" [^ :\x00] [^`\x00]* "`" { return rbs_next_token(lexer, tQIDENT); } + "->" { return rbs_next_token(lexer, pARROW); } + "=>" { return rbs_next_token(lexer, pFATARROW); } + "=" { return rbs_next_token(lexer, pEQ); } + ":" { return rbs_next_token(lexer, pCOLON); } + "::" { return rbs_next_token(lexer, pCOLON2); } + "<" { return rbs_next_token(lexer, pLT); } + "[]" { return rbs_next_token(lexer, pAREF_OPR); } + operator { return rbs_next_token(lexer, tOPERATOR); } + + number = [0-9] [0-9_]*; + ("-"|"+")? number { return rbs_next_token(lexer, tINTEGER); } + + "%a{" [^}\x00]* "}" { return rbs_next_token(lexer, tANNOTATION); } + "%a(" [^)\x00]* ")" { return rbs_next_token(lexer, tANNOTATION); } + "%a[" [^\]\x00]* "]" { return rbs_next_token(lexer, tANNOTATION); } + "%a|" [^|\x00]* "|" { return rbs_next_token(lexer, tANNOTATION); } + "%a<" [^>\x00]* ">" { return rbs_next_token(lexer, tANNOTATION); } + + "#" (. \ [\x00])* { + return rbs_next_token( + lexer, + lexer->first_token_of_line ? tLINECOMMENT : tCOMMENT + ); + } + + "alias" { return rbs_next_token(lexer, kALIAS); } + "attr_accessor" { return rbs_next_token(lexer, kATTRACCESSOR); } + "attr_reader" { return rbs_next_token(lexer, kATTRREADER); } + "attr_writer" { return rbs_next_token(lexer, kATTRWRITER); } + "bool" { return rbs_next_token(lexer, kBOOL); } + "bot" { return rbs_next_token(lexer, kBOT); } + "class" { return rbs_next_token(lexer, kCLASS); } + "def" { return rbs_next_token(lexer, kDEF); } + "end" { return rbs_next_token(lexer, kEND); } + "extend" { return rbs_next_token(lexer, kEXTEND); } + "false" { return rbs_next_token(lexer, kFALSE); } + "in" { return rbs_next_token(lexer, kIN); } + "include" { return rbs_next_token(lexer, kINCLUDE); } + "instance" { return rbs_next_token(lexer, kINSTANCE); } + "interface" { return rbs_next_token(lexer, kINTERFACE); } + "module" { return rbs_next_token(lexer, kMODULE); } + "nil" { return rbs_next_token(lexer, kNIL); } + "out" { return rbs_next_token(lexer, kOUT); } + "prepend" { return rbs_next_token(lexer, kPREPEND); } + "private" { return rbs_next_token(lexer, kPRIVATE); } + "public" { return rbs_next_token(lexer, kPUBLIC); } + "self" { return rbs_next_token(lexer, kSELF); } + "singleton" { return rbs_next_token(lexer, kSINGLETON); } + "top" { return rbs_next_token(lexer, kTOP); } + "true" { return rbs_next_token(lexer, kTRUE); } + "type" { return rbs_next_token(lexer, kTYPE); } + "unchecked" { return rbs_next_token(lexer, kUNCHECKED); } + "untyped" { return rbs_next_token(lexer, kUNTYPED); } + "void" { return rbs_next_token(lexer, kVOID); } + "use" { return rbs_next_token(lexer, kUSE); } + "as" { return rbs_next_token(lexer, kAS); } + "__todo__" { return rbs_next_token(lexer, k__TODO__); } + + unicode_char = "\\u" [0-9a-fA-F]{4}; + oct_char = "\\x" [0-9a-f]{1,2}; + hex_char = "\\" [0-7]{1,3}; + + dqstring = ["] (unicode_char | oct_char | hex_char | "\\" [^xu] | [^\\"\x00])* ["]; + sqstring = ['] ("\\"['\\] | [^'\x00])* [']; + + dqstring { return rbs_next_token(lexer, tDQSTRING); } + sqstring { return rbs_next_token(lexer, tSQSTRING); } + ":" dqstring { return rbs_next_token(lexer, tDQSYMBOL); } + ":" sqstring { return rbs_next_token(lexer, tSQSYMBOL); } + + identifier = [a-zA-Z_] word* [!?=]?; + symbol_opr = ":|" | ":&" | ":/" | ":%" | ":~" | ":`" | ":^" + | ":==" | ":=~" | ":===" | ":!" | ":!=" | ":!~" + | ":<" | ":<=" | ":<<" | ":<=>" | ":>" | ":>=" | ":>>" + | ":-" | ":-@" | ":+" | ":+@" | ":*" | ":**" | ":[]" | ":[]="; + + global_ident = [0-9]+ + | "-" [a-zA-Z0-9_] + | [~*$?!@\\/;,.=:<>"&'`+] + | [^ \t\r\n:;=.,!"$%&()-+~|\\'[\]{}*/<>^\x00]+; + + ":" identifier { return rbs_next_token(lexer, tSYMBOL); } + ":@" identifier { return rbs_next_token(lexer, tSYMBOL); } + ":@@" identifier { return rbs_next_token(lexer, tSYMBOL); } + ":$" global_ident { return rbs_next_token(lexer, tSYMBOL); } + symbol_opr { return rbs_next_token(lexer, tSYMBOL); } + + [a-z] word* { return rbs_next_token(lexer, tLIDENT); } + [A-Z] word* { return rbs_next_token(lexer, tUIDENT); } + "_" [a-z0-9_] word* { return rbs_next_token(lexer, tULLIDENT); } + "_" [A-Z] word* { return rbs_next_token(lexer, tULIDENT); } + "_" { return rbs_next_token(lexer, tULLIDENT); } + [a-zA-Z_] word* "!" { return rbs_next_token(lexer, tBANGIDENT); } + [a-zA-Z_] word* "=" { return rbs_next_token(lexer, tEQIDENT); } + + "@" [a-zA-Z_] word* { return rbs_next_token(lexer, tAIDENT); } + "@@" [a-zA-Z_] word* { return rbs_next_token(lexer, tA2IDENT); } + + "$" global_ident { return rbs_next_token(lexer, tGIDENT); } + + skip = ([ \t]+|[\r\n]); + + skip { return rbs_next_token(lexer, tTRIVIA); } + "\x00" { return rbs_next_eof_token(lexer); } + * { return rbs_next_token(lexer, ErrorToken); } + */ +} diff --git a/src/lexstate.c b/src/lexstate.c new file mode 100644 index 000000000..dc115f661 --- /dev/null +++ b/src/lexstate.c @@ -0,0 +1,205 @@ +#include "rbs/defines.h" +#include "rbs/lexer.h" +#include "rbs/util/rbs_assert.h" + +static const char *RBS_TOKENTYPE_NAMES[] = { + "NullType", + "pEOF", + "ErrorToken", + + "pLPAREN", /* ( */ + "pRPAREN", /* ) */ + "pCOLON", /* : */ + "pCOLON2", /* :: */ + "pLBRACKET", /* [ */ + "pRBRACKET", /* ] */ + "pLBRACE", /* { */ + "pRBRACE", /* } */ + "pHAT", /* ^ */ + "pARROW", /* -> */ + "pFATARROW", /* => */ + "pCOMMA", /* , */ + "pBAR", /* | */ + "pAMP", /* & */ + "pSTAR", /* * */ + "pSTAR2", /* ** */ + "pDOT", /* . */ + "pDOT3", /* ... */ + "pBANG", /* ! */ + "pQUESTION", /* ? */ + "pLT", /* < */ + "pEQ", /* = */ + + "kALIAS", /* alias */ + "kATTRACCESSOR", /* attr_accessor */ + "kATTRREADER", /* attr_reader */ + "kATTRWRITER", /* attr_writer */ + "kBOOL", /* bool */ + "kBOT", /* bot */ + "kCLASS", /* class */ + "kDEF", /* def */ + "kEND", /* end */ + "kEXTEND", /* extend */ + "kFALSE", /* kFALSE */ + "kIN", /* in */ + "kINCLUDE", /* include */ + "kINSTANCE", /* instance */ + "kINTERFACE", /* interface */ + "kMODULE", /* module */ + "kNIL", /* nil */ + "kOUT", /* out */ + "kPREPEND", /* prepend */ + "kPRIVATE", /* private */ + "kPUBLIC", /* public */ + "kSELF", /* self */ + "kSINGLETON", /* singleton */ + "kTOP", /* top */ + "kTRUE", /* true */ + "kTYPE", /* type */ + "kUNCHECKED", /* unchecked */ + "kUNTYPED", /* untyped */ + "kVOID", /* void */ + "kUSE", /* use */ + "kAS", /* as */ + "k__TODO__", /* __todo__ */ + + "tLIDENT", /* Identifiers starting with lower case */ + "tUIDENT", /* Identifiers starting with upper case */ + "tULIDENT", /* Identifiers starting with `_` */ + "tULLIDENT", + "tGIDENT", /* Identifiers starting with `$` */ + "tAIDENT", /* Identifiers starting with `@` */ + "tA2IDENT", /* Identifiers starting with `@@` */ + "tBANGIDENT", + "tEQIDENT", + "tQIDENT", /* Quoted identifier */ + "pAREF_OPR", /* [] */ + "tOPERATOR", /* Operator identifier */ + + "tCOMMENT", + "tLINECOMMENT", + + "tTRIVIA", + + "tDQSTRING", /* Double quoted string */ + "tSQSTRING", /* Single quoted string */ + "tINTEGER", /* Integer */ + "tSYMBOL", /* Symbol */ + "tDQSYMBOL", + "tSQSYMBOL", + "tANNOTATION", /* Annotation */ +}; + +const rbs_position_t NullPosition = { -1, -1, -1, -1 }; +const rbs_range_t NULL_RANGE = { { -1, -1, -1, -1 }, { -1, -1, -1, -1 } }; +const rbs_token_t NullToken = { .type = NullType, .range = { { 0 }, { 0 } } }; + +const char *rbs_token_type_str(enum RBSTokenType type) { + return RBS_TOKENTYPE_NAMES[type]; +} + +int rbs_token_chars(rbs_token_t tok) { + return tok.range.end.char_pos - tok.range.start.char_pos; +} + +int rbs_token_bytes(rbs_token_t tok) { + return RBS_RANGE_BYTES(tok.range); +} + +unsigned int rbs_peek(rbs_lexer_t *lexer) { + return lexer->current_code_point; +} + +bool rbs_next_char(rbs_lexer_t *lexer, unsigned int *codepoint, size_t *byte_len) { + if (RBS_UNLIKELY(lexer->current.char_pos == lexer->end_pos)) { + return false; + } + + const char *start = lexer->string.start + lexer->current.byte_pos; + + // Fast path for ASCII (single-byte) characters + if ((unsigned int) *start < 128) { + *codepoint = (unsigned int) *start; + *byte_len = 1; + return true; + } + + *byte_len = lexer->encoding->char_width((const uint8_t *) start, (ptrdiff_t) (lexer->string.end - start)); + + if (*byte_len == 1) { + *codepoint = (unsigned int) *start; + } else { + *codepoint = 12523; // Dummy data for "ル" from "ルビー" (Ruby) in Unicode + } + + return true; +} + +void rbs_skip(rbs_lexer_t *lexer) { + rbs_assert(lexer->current_character_bytes > 0, "rbs_skip called with current_character_bytes == 0"); + + if (RBS_UNLIKELY(lexer->current_code_point == '\0')) { + return; + } + + unsigned int codepoint; + size_t byte_len; + + lexer->current.byte_pos += lexer->current_character_bytes; + lexer->current.char_pos += 1; + if (lexer->current_code_point == '\n') { + lexer->current.line += 1; + lexer->current.column = 0; + lexer->first_token_of_line = true; + } else { + lexer->current.column += 1; + } + + if (rbs_next_char(lexer, &codepoint, &byte_len)) { + lexer->current_code_point = codepoint; + lexer->current_character_bytes = byte_len; + } else { + lexer->current_character_bytes = 1; + lexer->current_code_point = '\0'; + } +} + +rbs_token_t rbs_next_token(rbs_lexer_t *lexer, enum RBSTokenType type) { + rbs_token_t t; + + t.type = type; + t.range.start = lexer->start; + t.range.end = lexer->current; + lexer->start = lexer->current; + if (type != tTRIVIA) { + lexer->first_token_of_line = false; + } + + return t; +} + +rbs_token_t rbs_next_eof_token(rbs_lexer_t *lexer) { + if ((size_t) lexer->current.byte_pos == rbs_string_len(lexer->string) + 1) { + // End of String + rbs_token_t t; + t.type = pEOF; + t.range.start = lexer->start; + t.range.end = lexer->start; + lexer->start = lexer->current; + + return t; + } else { + // NULL byte in the middle of the string + return rbs_next_token(lexer, pEOF); + } +} + +void rbs_skipn(rbs_lexer_t *lexer, size_t size) { + for (size_t i = 0; i < size; i++) { + rbs_skip(lexer); + } +} + +char *rbs_peek_token(rbs_lexer_t *lexer, rbs_token_t tok) { + return (char *) lexer->string.start + tok.range.start.byte_pos; +} diff --git a/src/location.c b/src/location.c new file mode 100644 index 000000000..ad75a4ad0 --- /dev/null +++ b/src/location.c @@ -0,0 +1,71 @@ +#include "rbs/location.h" +#include "rbs/util/rbs_assert.h" + +#include + +#define RBS_LOC_CHILDREN_SIZE(cap) (sizeof(rbs_loc_children) + sizeof(rbs_loc_entry) * ((cap) - 1)) + +void rbs_loc_alloc_children(rbs_allocator_t *allocator, rbs_location_t *loc, size_t capacity) { + rbs_assert(capacity <= sizeof(rbs_loc_entry_bitmap) * 8, "Capacity %zu is too large. Max is %zu", capacity, sizeof(rbs_loc_entry_bitmap) * 8); + + loc->children = rbs_allocator_malloc_impl(allocator, RBS_LOC_CHILDREN_SIZE(capacity), rbs_alignof(rbs_loc_children)); + + loc->children->len = 0; + loc->children->required_p = 0; + loc->children->cap = capacity; +} + +void rbs_loc_add_optional_child(rbs_location_t *loc, rbs_constant_id_t name, rbs_range_t r) { + rbs_assert(loc->children != NULL, "All children should have been pre-allocated with rbs_loc_alloc_children()"); + rbs_assert((loc->children->len + 1 <= loc->children->cap), "Not enough space was pre-allocated for the children. Children: %hu, Capacity: %hu", loc->children->len, loc->children->cap); + + unsigned short i = loc->children->len++; + loc->children->entries[i].name = name; + loc->children->entries[i].rg = (rbs_loc_range) { r.start.char_pos, r.end.char_pos }; +} + +void rbs_loc_add_required_child(rbs_location_t *loc, rbs_constant_id_t name, rbs_range_t r) { + rbs_loc_add_optional_child(loc, name, r); + unsigned short last_index = loc->children->len - 1; + loc->children->required_p |= 1 << last_index; +} + +rbs_location_t *rbs_location_new(rbs_allocator_t *allocator, rbs_range_t rg) { + rbs_location_t *location = rbs_allocator_alloc(allocator, rbs_location_t); + *location = (rbs_location_t) { + .rg = rg, + .children = NULL, + }; + + return location; +} + +rbs_location_list_t *rbs_location_list_new(rbs_allocator_t *allocator) { + rbs_location_list_t *list = rbs_allocator_alloc(allocator, rbs_location_list_t); + *list = (rbs_location_list_t) { + .allocator = allocator, + .head = NULL, + .tail = NULL, + .length = 0, + }; + + return list; +} + +void rbs_location_list_append(rbs_location_list_t *list, rbs_location_t *loc) { + rbs_location_list_node_t *node = rbs_allocator_alloc(list->allocator, rbs_location_list_node_t); + *node = (rbs_location_list_node_t) { + .loc = loc, + .next = NULL, + }; + + if (list->head == NULL) { + list->head = node; + list->tail = node; + } else { + list->tail->next = node; + list->tail = node; + } + + list->length++; +} diff --git a/src/parser.c b/src/parser.c new file mode 100644 index 000000000..6f66d0e15 --- /dev/null +++ b/src/parser.c @@ -0,0 +1,3468 @@ +#include "rbs/parser.h" + +#include +#include +#include +#include +#include + +#include "rbs/defines.h" +#include "rbs/lexer.h" +#include "rbs/string.h" +#include "rbs/util/rbs_unescape.h" +#include "rbs/util/rbs_buffer.h" +#include "rbs/util/rbs_assert.h" + +#define INTERN(str) \ + rbs_constant_pool_insert_constant( \ + RBS_GLOBAL_CONSTANT_POOL, \ + (const uint8_t *) str, \ + strlen(str) \ + ) + +#define INTERN_TOKEN(parser, tok) \ + rbs_constant_pool_insert_shared_with_encoding( \ + &parser->constant_pool, \ + (const uint8_t *) rbs_peek_token(parser->rbs_lexer_t, tok), \ + rbs_token_bytes(tok), \ + (void *) parser->rbs_lexer_t->encoding \ + ) + +#define KEYWORD_CASES \ + case kBOOL: \ + case kBOT: \ + case kCLASS: \ + case kFALSE: \ + case kINSTANCE: \ + case kINTERFACE: \ + case kNIL: \ + case kSELF: \ + case kSINGLETON: \ + case kTOP: \ + case kTRUE: \ + case kVOID: \ + case kTYPE: \ + case kUNCHECKED: \ + case kIN: \ + case kOUT: \ + case kEND: \ + case kDEF: \ + case kINCLUDE: \ + case kEXTEND: \ + case kPREPEND: \ + case kALIAS: \ + case kMODULE: \ + case kATTRREADER: \ + case kATTRWRITER: \ + case kATTRACCESSOR: \ + case kPUBLIC: \ + case kPRIVATE: \ + case kUNTYPED: \ + case kUSE: \ + case kAS: \ + case k__TODO__: \ + /* nop */ + +#define CHECK_PARSE(call) \ + if (!call) { \ + return false; \ + } + +#define ASSERT_TOKEN(parser, expected_type) \ + if (parser->current_token.type != expected_type) { \ + rbs_parser_set_error(parser, parser->current_token, true, "expected a token `%s`", rbs_token_type_str(expected_type)); \ + return false; \ + } + +#define ADVANCE_ASSERT(parser, expected_type) \ + do { \ + rbs_parser_advance(parser); \ + ASSERT_TOKEN(parser, expected_type) \ + } while (0); + +#define RESET_TABLE_P(table) (table->size == 0) + +#define ALLOCATOR() parser->allocator + +typedef struct { + rbs_node_list_t *required_positionals; + rbs_node_list_t *optional_positionals; + rbs_node_t *rest_positionals; + rbs_node_list_t *trailing_positionals; + rbs_hash_t *required_keywords; + rbs_hash_t *optional_keywords; + rbs_node_t *rest_keywords; +} method_params; + +/** + * id_table represents a set of RBS constant IDs. + * This is used to manage the set of bound variables. + * */ +typedef struct id_table { + size_t size; + size_t count; + rbs_constant_id_t *ids; + struct id_table *next; +} id_table; + +static bool rbs_is_untyped_params(method_params *params) { + return params->required_positionals == NULL; +} + +/** + * Returns RBS::Location object of `current_token` of a parser parser. + * + * @param parser + * @return New RBS::Location object. + * */ +static rbs_location_t *rbs_location_current_token(rbs_parser_t *parser) { + return rbs_location_new(ALLOCATOR(), parser->current_token.range); +} + +static bool parse_optional(rbs_parser_t *parser, rbs_node_t **optional); +static bool parse_simple(rbs_parser_t *parser, rbs_node_t **type); + +/** + * @returns A borrowed copy of the current token, which does *not* need to be freed. + */ +static rbs_string_t rbs_parser_peek_current_token(rbs_parser_t *parser) { + rbs_range_t rg = parser->current_token.range; + + const char *start = parser->rbs_lexer_t->string.start + rg.start.byte_pos; + size_t length = rg.end.byte_pos - rg.start.byte_pos; + + return rbs_string_new(start, start + length); +} + +static rbs_constant_id_t rbs_constant_pool_insert_string(rbs_constant_pool_t *self, rbs_string_t string) { + return rbs_constant_pool_insert_shared(self, (const uint8_t *) string.start, rbs_string_len(string)); +} + +typedef enum { + CLASS_NAME = 1, + INTERFACE_NAME = 2, + ALIAS_NAME = 4 +} TypeNameKind; + +static void parser_advance_no_gap(rbs_parser_t *parser) { + if (parser->current_token.range.end.byte_pos == parser->next_token.range.start.byte_pos) { + rbs_parser_advance(parser); + } else { + rbs_parser_set_error(parser, parser->next_token, true, "unexpected token"); + } +} + +/* + type_name ::= {`::`} (tUIDENT `::`)* + | {(tUIDENT `::`)*} + | {} +*/ +NODISCARD +static bool parse_type_name(rbs_parser_t *parser, TypeNameKind kind, rbs_range_t *rg, rbs_type_name_t **type_name) { + bool absolute = false; + + if (rg) { + rg->start = parser->current_token.range.start; + } + + if (parser->current_token.type == pCOLON2) { + absolute = true; + parser_advance_no_gap(parser); + } + + rbs_node_list_t *path = rbs_node_list_new(ALLOCATOR()); + + while ( + parser->current_token.type == tUIDENT && parser->next_token.type == pCOLON2 && parser->current_token.range.end.byte_pos == parser->next_token.range.start.byte_pos && parser->next_token.range.end.byte_pos == parser->next_token2.range.start.byte_pos + ) { + rbs_constant_id_t symbol_value = INTERN_TOKEN(parser, parser->current_token); + rbs_location_t *symbolLoc = rbs_location_new(ALLOCATOR(), parser->next_token.range); + rbs_ast_symbol_t *symbol = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, symbol_value); + rbs_node_list_append(path, (rbs_node_t *) symbol); + + rbs_parser_advance(parser); + rbs_parser_advance(parser); + } + + rbs_range_t namespace_range = { + .start = rg->start, + .end = parser->current_token.range.end + }; + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), namespace_range); + rbs_namespace_t *namespace = rbs_namespace_new(ALLOCATOR(), loc, path, absolute); + + switch (parser->current_token.type) { + case tLIDENT: + if (kind & ALIAS_NAME) goto success; + goto error_handling; + case tULIDENT: + if (kind & INTERFACE_NAME) goto success; + goto error_handling; + case tUIDENT: + if (kind & CLASS_NAME) goto success; + goto error_handling; + default: + goto error_handling; + } + +success: { + if (rg) { + rg->end = parser->current_token.range.end; + } + + rbs_location_t *symbolLoc = rbs_location_current_token(parser); + rbs_constant_id_t name = INTERN_TOKEN(parser, parser->current_token); + rbs_ast_symbol_t *symbol = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, name); + *type_name = rbs_type_name_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), *rg), namespace, symbol); + return true; +} + +error_handling: { + const char *ids = NULL; + if (kind & ALIAS_NAME) { + ids = "alias name"; + } + if (kind & INTERFACE_NAME) { + ids = "interface name"; + } + if (kind & CLASS_NAME) { + ids = "class/module/constant name"; + } + + rbs_assert(ids != NULL, "Unknown kind of type: %i", kind); + + rbs_parser_set_error(parser, parser->current_token, true, "expected one of %s", ids); + return false; +} +} + +/* + type_list ::= {} type `,` ... <`,`> eol + | {} type `,` ... `,` eol +*/ +NODISCARD +static bool parse_type_list(rbs_parser_t *parser, enum RBSTokenType eol, rbs_node_list_t *types) { + while (true) { + rbs_node_t *type; + CHECK_PARSE(rbs_parse_type(parser, &type)); + rbs_node_list_append(types, type); + + if (parser->next_token.type == pCOMMA) { + rbs_parser_advance(parser); + + if (parser->next_token.type == eol) { + break; + } + } else { + if (parser->next_token.type == eol) { + break; + } else { + rbs_parser_set_error(parser, parser->next_token, true, "comma delimited type list is expected"); + return false; + } + } + } + + return true; +} + +static bool is_keyword_token(enum RBSTokenType type) { + switch (type) { + case tLIDENT: + case tUIDENT: + case tULIDENT: + case tULLIDENT: + case tQIDENT: + case tBANGIDENT: + KEYWORD_CASES + return true; + default: + return false; + } +} + +/* + function_param ::= {} + | {} type +*/ +NODISCARD +static bool parse_function_param(rbs_parser_t *parser, rbs_types_function_param_t **function_param) { + rbs_range_t type_range; + type_range.start = parser->next_token.range.start; + rbs_node_t *type; + CHECK_PARSE(rbs_parse_type(parser, &type)); + type_range.end = parser->current_token.range.end; + + if (parser->next_token.type == pCOMMA || parser->next_token.type == pRPAREN) { + rbs_range_t param_range = type_range; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), param_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 1); + rbs_loc_add_optional_child(loc, INTERN("name"), NULL_RANGE); + + *function_param = rbs_types_function_param_new(ALLOCATOR(), loc, type, NULL); + return true; + } else { + rbs_range_t name_range = parser->next_token.range; + + rbs_parser_advance(parser); + + rbs_range_t param_range = { + .start = type_range.start, + .end = name_range.end, + }; + + if (!is_keyword_token(parser->current_token.type)) { + rbs_parser_set_error(parser, parser->current_token, true, "unexpected token for function parameter name"); + return false; + } + + rbs_string_t unquoted_str = rbs_unquote_string(ALLOCATOR(), rbs_parser_peek_current_token(parser)); + rbs_location_t *symbolLoc = rbs_location_current_token(parser); + rbs_constant_id_t constant_id = rbs_constant_pool_insert_string(&parser->constant_pool, unquoted_str); + rbs_ast_symbol_t *name = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, constant_id); + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), param_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 1); + rbs_loc_add_optional_child(loc, INTERN("name"), name_range); + + *function_param = rbs_types_function_param_new(ALLOCATOR(), loc, type, name); + return true; + } +} + +static rbs_constant_id_t intern_token_start_end(rbs_parser_t *parser, rbs_token_t start_token, rbs_token_t end_token) { + return rbs_constant_pool_insert_shared_with_encoding( + &parser->constant_pool, + (const uint8_t *) rbs_peek_token(parser->rbs_lexer_t, start_token), + end_token.range.end.byte_pos - start_token.range.start.byte_pos, + parser->rbs_lexer_t->encoding + ); +} + +/* + keyword_key ::= {} `:` + | {} keyword <`?`> `:` +*/ +NODISCARD +static bool parse_keyword_key(rbs_parser_t *parser, rbs_ast_symbol_t **key) { + rbs_parser_advance(parser); + + rbs_location_t *symbolLoc = rbs_location_current_token(parser); + + if (parser->next_token.type == pQUESTION) { + *key = rbs_ast_symbol_new( + ALLOCATOR(), + symbolLoc, + &parser->constant_pool, + intern_token_start_end(parser, parser->current_token, parser->next_token) + ); + rbs_parser_advance(parser); + } else { + *key = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->current_token)); + } + + return true; +} + +/* + keyword ::= {} keyword `:` +*/ +NODISCARD +static bool parse_keyword(rbs_parser_t *parser, rbs_hash_t *keywords, rbs_hash_t *memo) { + rbs_ast_symbol_t *key = NULL; + CHECK_PARSE(parse_keyword_key(parser, &key)); + + if (rbs_hash_find(memo, (rbs_node_t *) key)) { + rbs_parser_set_error(parser, parser->current_token, true, "duplicated keyword argument"); + return false; + } else { + rbs_location_t *loc = rbs_location_current_token(parser); + rbs_hash_set(memo, (rbs_node_t *) key, (rbs_node_t *) rbs_ast_bool_new(ALLOCATOR(), loc, true)); + } + + ADVANCE_ASSERT(parser, pCOLON); + rbs_types_function_param_t *param = NULL; + CHECK_PARSE(parse_function_param(parser, ¶m)); + + rbs_hash_set(keywords, (rbs_node_t *) key, (rbs_node_t *) param); + + return true; +} + +/* +Returns true if keyword is given. + + is_keyword === {} KEYWORD `:` +*/ +static bool is_keyword(rbs_parser_t *parser) { + if (is_keyword_token(parser->next_token.type)) { + if (parser->next_token2.type == pCOLON && parser->next_token.range.end.byte_pos == parser->next_token2.range.start.byte_pos) { + return true; + } + + if (parser->next_token2.type == pQUESTION && parser->next_token3.type == pCOLON && parser->next_token.range.end.byte_pos == parser->next_token2.range.start.byte_pos && parser->next_token2.range.end.byte_pos == parser->next_token3.range.start.byte_pos) { + return true; + } + } + + return false; +} + +/** + * Advance token if _next_ token is `type`. + * Ensures one token advance and `parser->current_token.type == type`, or current token not changed. + * + * @returns true if token advances, false otherwise. + **/ +static bool parser_advance_if(rbs_parser_t *parser, enum RBSTokenType type) { + if (parser->next_token.type == type) { + rbs_parser_advance(parser); + return true; + } else { + return false; + } +} + +/* + params ::= {} `)` + | {} `?` `)` -- Untyped function params (assign params.required = nil) + | `)` + | `,` `)` + + required_params ::= {} function_param `,` + | {} + | {} + + optional_params ::= {} `?` function_param `,` + | {} `?` + | {} + + rest_params ::= {} `*` function_param `,` + | {} `*` + | {} + + trailing_params ::= {} function_param `,` + | {} + | {} + + keywords ::= {} required_keyword `,` + | {} `?` optional_keyword `,` + | {} `**` function_param `,` + | {} + | {} `?` + | {} `**` +*/ +NODISCARD +static bool parse_params(rbs_parser_t *parser, method_params *params) { + if (parser->next_token.type == pQUESTION && parser->next_token2.type == pRPAREN) { + params->required_positionals = NULL; + rbs_parser_advance(parser); + return true; + } + if (parser->next_token.type == pRPAREN) { + return true; + } + + rbs_hash_t *memo = rbs_hash_new(ALLOCATOR()); + + while (true) { + switch (parser->next_token.type) { + case pQUESTION: + goto PARSE_OPTIONAL_PARAMS; + case pSTAR: + goto PARSE_REST_PARAM; + case pSTAR2: + goto PARSE_KEYWORDS; + case pRPAREN: + goto EOP; + + default: + if (is_keyword(parser)) { + goto PARSE_KEYWORDS; + } + + rbs_types_function_param_t *param = NULL; + CHECK_PARSE(parse_function_param(parser, ¶m)); + rbs_node_list_append(params->required_positionals, (rbs_node_t *) param); + + break; + } + + if (!parser_advance_if(parser, pCOMMA)) { + goto EOP; + } + } + +PARSE_OPTIONAL_PARAMS: + while (true) { + switch (parser->next_token.type) { + case pQUESTION: + rbs_parser_advance(parser); + + if (is_keyword(parser)) { + CHECK_PARSE(parse_keyword(parser, params->optional_keywords, memo)); + parser_advance_if(parser, pCOMMA); + goto PARSE_KEYWORDS; + } + + rbs_types_function_param_t *param = NULL; + CHECK_PARSE(parse_function_param(parser, ¶m)); + rbs_node_list_append(params->optional_positionals, (rbs_node_t *) param); + + break; + default: + goto PARSE_REST_PARAM; + } + + if (!parser_advance_if(parser, pCOMMA)) { + goto EOP; + } + } + +PARSE_REST_PARAM: + if (parser->next_token.type == pSTAR) { + rbs_parser_advance(parser); + rbs_types_function_param_t *param = NULL; + CHECK_PARSE(parse_function_param(parser, ¶m)); + params->rest_positionals = (rbs_node_t *) param; + + if (!parser_advance_if(parser, pCOMMA)) { + goto EOP; + } + } + goto PARSE_TRAILING_PARAMS; + +PARSE_TRAILING_PARAMS: + while (true) { + switch (parser->next_token.type) { + case pQUESTION: + goto PARSE_KEYWORDS; + case pSTAR: + goto EOP; + case pSTAR2: + goto PARSE_KEYWORDS; + case pRPAREN: + goto EOP; + + default: + if (is_keyword(parser)) { + goto PARSE_KEYWORDS; + } + + rbs_types_function_param_t *param = NULL; + CHECK_PARSE(parse_function_param(parser, ¶m)); + rbs_node_list_append(params->trailing_positionals, (rbs_node_t *) param); + + break; + } + + if (!parser_advance_if(parser, pCOMMA)) { + goto EOP; + } + } + +PARSE_KEYWORDS: + while (true) { + switch (parser->next_token.type) { + case pQUESTION: + rbs_parser_advance(parser); + if (is_keyword(parser)) { + CHECK_PARSE(parse_keyword(parser, params->optional_keywords, memo)); + } else { + rbs_parser_set_error(parser, parser->next_token, true, "optional keyword argument type is expected"); + return false; + } + break; + + case pSTAR2: + rbs_parser_advance(parser); + rbs_types_function_param_t *param = NULL; + CHECK_PARSE(parse_function_param(parser, ¶m)); + params->rest_keywords = (rbs_node_t *) param; + break; + + case tUIDENT: + case tLIDENT: + case tQIDENT: + case tULIDENT: + case tULLIDENT: + case tBANGIDENT: + KEYWORD_CASES + if (is_keyword(parser)) { + CHECK_PARSE(parse_keyword(parser, params->required_keywords, memo)); + } else { + rbs_parser_set_error(parser, parser->next_token, true, "required keyword argument type is expected"); + return false; + } + break; + + default: + goto EOP; + } + + if (!parser_advance_if(parser, pCOMMA)) { + goto EOP; + } + } + +EOP: + if (parser->next_token.type != pRPAREN) { + rbs_parser_set_error(parser, parser->next_token, true, "unexpected token for method type parameters"); + return false; + } + + return true; +} + +/* + optional ::= {} + | {} simple_type <`?`> +*/ +NODISCARD +static bool parse_optional(rbs_parser_t *parser, rbs_node_t **optional) { + rbs_range_t rg; + rg.start = parser->next_token.range.start; + + rbs_node_t *type = NULL; + CHECK_PARSE(parse_simple(parser, &type)); + + if (parser->next_token.type == pQUESTION) { + rbs_parser_advance(parser); + rg.end = parser->current_token.range.end; + rbs_location_t *location = rbs_location_new(ALLOCATOR(), rg); + *optional = (rbs_node_t *) rbs_types_optional_new(ALLOCATOR(), location, type); + } else { + *optional = type; + } + + return true; +} + +static void initialize_method_params(method_params *params, rbs_allocator_t *allocator) { + *params = (method_params) { + .required_positionals = rbs_node_list_new(allocator), + .optional_positionals = rbs_node_list_new(allocator), + .rest_positionals = NULL, + .trailing_positionals = rbs_node_list_new(allocator), + .required_keywords = rbs_hash_new(allocator), + .optional_keywords = rbs_hash_new(allocator), + .rest_keywords = NULL, + }; +} + +/* + self_type_binding ::= {} <> + | {} `[` `self` `:` type <`]`> +*/ +NODISCARD +static bool parse_self_type_binding(rbs_parser_t *parser, rbs_node_t **self_type) { + if (parser->next_token.type == pLBRACKET) { + rbs_parser_advance(parser); + ADVANCE_ASSERT(parser, kSELF); + ADVANCE_ASSERT(parser, pCOLON); + rbs_node_t *type; + CHECK_PARSE(rbs_parse_type(parser, &type)); + ADVANCE_ASSERT(parser, pRBRACKET); + *self_type = type; + } + + return true; +} + +typedef struct { + rbs_node_t *function; + rbs_types_block_t *block; + rbs_node_t *function_self_type; +} parse_function_result; + +/* + function ::= {} `(` params `)` self_type_binding? `{` `(` params `)` self_type_binding? `->` optional `}` `->` + | {} `(` params `)` self_type_binding? `->` + | {} self_type_binding? `{` `(` params `)` self_type_binding? `->` optional `}` `->` + | {} self_type_binding? `{` self_type_binding `->` optional `}` `->` + | {} self_type_binding? `->` +*/ +NODISCARD +static bool parse_function(rbs_parser_t *parser, bool accept_type_binding, parse_function_result **result) { + rbs_node_t *function = NULL; + rbs_types_block_t *block = NULL; + rbs_node_t *function_self_type = NULL; + rbs_range_t function_range; + function_range.start = parser->current_token.range.start; + + method_params params; + initialize_method_params(¶ms, ALLOCATOR()); + + if (parser->next_token.type == pLPAREN) { + rbs_parser_advance(parser); + CHECK_PARSE(parse_params(parser, ¶ms)); + ADVANCE_ASSERT(parser, pRPAREN); + } + + // Passing NULL to function_self_type means the function itself doesn't accept self type binding. (== method type) + if (accept_type_binding) { + CHECK_PARSE(parse_self_type_binding(parser, &function_self_type)); + } else { + if (rbs_is_untyped_params(¶ms)) { + if (parser->next_token.type != pARROW) { + rbs_parser_set_error(parser, parser->next_token2, true, "A method type with untyped method parameter cannot have block"); + return false; + } + } + } + + bool required = true; + if (parser->next_token.type == pQUESTION && parser->next_token2.type == pLBRACE) { + // Optional block + required = false; + rbs_parser_advance(parser); + } + if (parser->next_token.type == pLBRACE) { + rbs_parser_advance(parser); + + method_params block_params; + initialize_method_params(&block_params, ALLOCATOR()); + + if (parser->next_token.type == pLPAREN) { + rbs_parser_advance(parser); + CHECK_PARSE(parse_params(parser, &block_params)); + ADVANCE_ASSERT(parser, pRPAREN); + } + + rbs_node_t *self_type = NULL; + CHECK_PARSE(parse_self_type_binding(parser, &self_type)); + + ADVANCE_ASSERT(parser, pARROW); + rbs_node_t *block_return_type = NULL; + CHECK_PARSE(parse_optional(parser, &block_return_type)); + + rbs_node_t *block_function = NULL; + function_range.end = parser->current_token.range.end; + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), function_range); + if (rbs_is_untyped_params(&block_params)) { + block_function = (rbs_node_t *) rbs_types_untyped_function_new(ALLOCATOR(), loc, block_return_type); + } else { + block_function = (rbs_node_t *) rbs_types_function_new( + ALLOCATOR(), + loc, + block_params.required_positionals, + block_params.optional_positionals, + block_params.rest_positionals, + block_params.trailing_positionals, + block_params.required_keywords, + block_params.optional_keywords, + block_params.rest_keywords, + block_return_type + ); + } + + block = rbs_types_block_new(ALLOCATOR(), loc, block_function, required, self_type); + + ADVANCE_ASSERT(parser, pRBRACE); + } + + ADVANCE_ASSERT(parser, pARROW); + rbs_node_t *type = NULL; + CHECK_PARSE(parse_optional(parser, &type)); + + function_range.end = parser->current_token.range.end; + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), function_range); + if (rbs_is_untyped_params(¶ms)) { + function = (rbs_node_t *) rbs_types_untyped_function_new(ALLOCATOR(), loc, type); + } else { + function = (rbs_node_t *) rbs_types_function_new( + ALLOCATOR(), + loc, + params.required_positionals, + params.optional_positionals, + params.rest_positionals, + params.trailing_positionals, + params.required_keywords, + params.optional_keywords, + params.rest_keywords, + type + ); + } + + (*result)->function = function; + (*result)->block = block; + (*result)->function_self_type = function_self_type; + return true; +} + +/* + proc_type ::= {`^`} +*/ +NODISCARD +static bool parse_proc_type(rbs_parser_t *parser, rbs_types_proc_t **proc) { + rbs_position_t start = parser->current_token.range.start; + parse_function_result *result = rbs_allocator_alloc(ALLOCATOR(), parse_function_result); + CHECK_PARSE(parse_function(parser, true, &result)); + + rbs_position_t end = parser->current_token.range.end; + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), (rbs_range_t) { .start = start, .end = end }); + *proc = rbs_types_proc_new(ALLOCATOR(), loc, result->function, result->block, result->function_self_type); + return true; +} + +static void check_key_duplication(rbs_parser_t *parser, rbs_hash_t *fields, rbs_node_t *key) { + if (rbs_hash_find(fields, ((rbs_node_t *) key))) { + rbs_parser_set_error(parser, parser->current_token, true, "duplicated record key"); + } +} + +/** + * ... `{` ... `}` ... + * > > + * */ +/* + record_attributes ::= {`{`} record_attribute... `}` + + record_attribute ::= {} keyword_token `:` + | {} literal_type `=>` +*/ +NODISCARD +static bool parse_record_attributes(rbs_parser_t *parser, rbs_hash_t **fields) { + *fields = rbs_hash_new(ALLOCATOR()); + + if (parser->next_token.type == pRBRACE) return true; + + while (true) { + rbs_ast_symbol_t *key = NULL; + bool required = true; + + if (parser->next_token.type == pQUESTION) { + // { ?foo: type } syntax + required = false; + rbs_parser_advance(parser); + } + + if (is_keyword(parser)) { + // { foo: type } syntax + CHECK_PARSE(parse_keyword_key(parser, &key)); + + check_key_duplication(parser, *fields, (rbs_node_t *) key); + ADVANCE_ASSERT(parser, pCOLON); + } else { + // { key => type } syntax + switch (parser->next_token.type) { + case tSYMBOL: + case tSQSYMBOL: + case tDQSYMBOL: + case tSQSTRING: + case tDQSTRING: + case tINTEGER: + case kTRUE: + case kFALSE: { + rbs_node_t *type = NULL; + CHECK_PARSE(parse_simple(parser, &type)); + + key = (rbs_ast_symbol_t *) ((rbs_types_literal_t *) type)->literal; + break; + } + default: + rbs_parser_set_error(parser, parser->next_token, true, "unexpected record key token"); + return false; + } + check_key_duplication(parser, *fields, (rbs_node_t *) key); + ADVANCE_ASSERT(parser, pFATARROW); + } + + rbs_range_t field_range; + field_range.start = parser->current_token.range.end; + + rbs_node_t *type; + CHECK_PARSE(rbs_parse_type(parser, &type)); + + field_range.end = parser->current_token.range.end; + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), field_range); + rbs_hash_set(*fields, (rbs_node_t *) key, (rbs_node_t *) rbs_types_record_field_type_new(ALLOCATOR(), loc, type, required)); + + if (parser_advance_if(parser, pCOMMA)) { + if (parser->next_token.type == pRBRACE) { + break; + } + } else { + break; + } + } + return true; +} + +/* + symbol ::= {} +*/ +NODISCARD +static bool parse_symbol(rbs_parser_t *parser, rbs_location_t *location, rbs_types_literal_t **symbol) { + size_t offset_bytes = parser->rbs_lexer_t->encoding->char_width((const uint8_t *) ":", (size_t) 1); + size_t bytes = rbs_token_bytes(parser->current_token) - offset_bytes; + + rbs_ast_symbol_t *literal; + + switch (parser->current_token.type) { + case tSYMBOL: { + rbs_location_t *symbolLoc = rbs_location_current_token(parser); + + char *buffer = rbs_peek_token(parser->rbs_lexer_t, parser->current_token); + rbs_constant_id_t constant_id = rbs_constant_pool_insert_shared( + &parser->constant_pool, + (const uint8_t *) buffer + offset_bytes, + bytes + ); + literal = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, constant_id); + break; + } + case tDQSYMBOL: + case tSQSYMBOL: { + rbs_location_t *symbolLoc = rbs_location_current_token(parser); + rbs_string_t current_token = rbs_parser_peek_current_token(parser); + + rbs_string_t symbol = rbs_string_new(current_token.start + offset_bytes, current_token.end); + + rbs_string_t unquoted_symbol = rbs_unquote_string(ALLOCATOR(), symbol); + + rbs_constant_id_t constant_id = rbs_constant_pool_insert_string(&parser->constant_pool, unquoted_symbol); + + literal = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, constant_id); + break; + } + default: + rbs_parser_set_error(parser, parser->current_token, false, "Unexpected error"); + return false; + } + + *symbol = rbs_types_literal_new(ALLOCATOR(), location, (rbs_node_t *) literal); + return true; +} + +/* + instance_type ::= {type_name} + + type_args ::= {} <> /empty/ + | {} `[` type_list <`]`> + */ +NODISCARD +static bool parse_instance_type(rbs_parser_t *parser, bool parse_alias, rbs_node_t **type) { + TypeNameKind expected_kind = INTERFACE_NAME | CLASS_NAME; + if (parse_alias) { + expected_kind |= ALIAS_NAME; + } + + rbs_range_t name_range; + rbs_type_name_t *type_name = NULL; + CHECK_PARSE(parse_type_name(parser, expected_kind, &name_range, &type_name)); + + rbs_node_list_t *types = rbs_node_list_new(ALLOCATOR()); + + TypeNameKind kind; + switch (parser->current_token.type) { + case tUIDENT: { + kind = CLASS_NAME; + break; + } + case tULIDENT: { + kind = INTERFACE_NAME; + break; + } + case tLIDENT: { + kind = ALIAS_NAME; + break; + } + default: + rbs_parser_set_error(parser, parser->current_token, false, "unexpected token for type name"); + return false; + } + + rbs_range_t args_range; + if (parser->next_token.type == pLBRACKET) { + rbs_parser_advance(parser); + args_range.start = parser->current_token.range.start; + CHECK_PARSE(parse_type_list(parser, pRBRACKET, types)); + ADVANCE_ASSERT(parser, pRBRACKET); + args_range.end = parser->current_token.range.end; + } else { + args_range = NULL_RANGE; + } + + rbs_range_t type_range = { + .start = name_range.start, + .end = rbs_nonnull_pos_or(args_range.end, name_range.end), + }; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), type_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 2); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_optional_child(loc, INTERN("args"), args_range); + + if (kind == CLASS_NAME) { + *type = (rbs_node_t *) rbs_types_class_instance_new(ALLOCATOR(), loc, type_name, types); + } else if (kind == INTERFACE_NAME) { + *type = (rbs_node_t *) rbs_types_interface_new(ALLOCATOR(), loc, type_name, types); + } else if (kind == ALIAS_NAME) { + *type = (rbs_node_t *) rbs_types_alias_new(ALLOCATOR(), loc, type_name, types); + } + + return true; +} + +/* + singleton_type ::= {`singleton`} `(` type_name <`)`> +*/ +NODISCARD +static bool parse_singleton_type(rbs_parser_t *parser, rbs_types_class_singleton_t **singleton) { + ASSERT_TOKEN(parser, kSINGLETON); + + rbs_range_t type_range; + type_range.start = parser->current_token.range.start; + ADVANCE_ASSERT(parser, pLPAREN); + rbs_parser_advance(parser); + + rbs_range_t name_range; + rbs_type_name_t *type_name = NULL; + CHECK_PARSE(parse_type_name(parser, CLASS_NAME, &name_range, &type_name)); + + ADVANCE_ASSERT(parser, pRPAREN); + type_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), type_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 1); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + + *singleton = rbs_types_class_singleton_new(ALLOCATOR(), loc, type_name); + return true; +} + +/** + * Returns true if given type variable is recorded in the table. + * If not found, it goes one table up, if it's not a reset table. + * Or returns false, if it's a reset table. + * */ +static bool parser_typevar_member(rbs_parser_t *parser, rbs_constant_id_t id) { + id_table *table = parser->vars; + + while (table && !RESET_TABLE_P(table)) { + for (size_t i = 0; i < table->count; i++) { + if (table->ids[i] == id) { + return true; + } + } + + table = table->next; + } + + return false; +} + +/* + simple ::= {} `(` type <`)`> + | {} + | {} + | {} class_instance `[` type_list <`]`> + | {} `singleton` `(` type_name <`)`> + | {} `[` type_list <`]`> + | {} `{` record_attributes <`}`> + | {} `^` +*/ +NODISCARD +static bool parse_simple(rbs_parser_t *parser, rbs_node_t **type) { + rbs_parser_advance(parser); + + switch (parser->current_token.type) { + case pLPAREN: { + rbs_node_t *lparen_type; + CHECK_PARSE(rbs_parse_type(parser, &lparen_type)); + ADVANCE_ASSERT(parser, pRPAREN); + *type = lparen_type; + return true; + } + case kBOOL: { + rbs_location_t *loc = rbs_location_current_token(parser); + *type = (rbs_node_t *) rbs_types_bases_bool_new(ALLOCATOR(), loc); + return true; + } + case kBOT: { + rbs_location_t *loc = rbs_location_current_token(parser); + *type = (rbs_node_t *) rbs_types_bases_bottom_new(ALLOCATOR(), loc); + return true; + } + case kCLASS: { + rbs_location_t *loc = rbs_location_current_token(parser); + *type = (rbs_node_t *) rbs_types_bases_class_new(ALLOCATOR(), loc); + return true; + } + case kINSTANCE: { + rbs_location_t *loc = rbs_location_current_token(parser); + *type = (rbs_node_t *) rbs_types_bases_instance_new(ALLOCATOR(), loc); + return true; + } + case kNIL: { + rbs_location_t *loc = rbs_location_current_token(parser); + *type = (rbs_node_t *) rbs_types_bases_nil_new(ALLOCATOR(), loc); + return true; + } + case kSELF: { + rbs_location_t *loc = rbs_location_current_token(parser); + *type = (rbs_node_t *) rbs_types_bases_self_new(ALLOCATOR(), loc); + return true; + } + case kTOP: { + rbs_location_t *loc = rbs_location_current_token(parser); + *type = (rbs_node_t *) rbs_types_bases_top_new(ALLOCATOR(), loc); + return true; + } + case kVOID: { + rbs_location_t *loc = rbs_location_current_token(parser); + *type = (rbs_node_t *) rbs_types_bases_void_new(ALLOCATOR(), loc); + return true; + } + case kUNTYPED: { + rbs_location_t *loc = rbs_location_current_token(parser); + *type = (rbs_node_t *) rbs_types_bases_any_new(ALLOCATOR(), loc, false); + return true; + } + case k__TODO__: { + rbs_location_t *loc = rbs_location_current_token(parser); + *type = (rbs_node_t *) rbs_types_bases_any_new(ALLOCATOR(), loc, true); + return true; + } + case tINTEGER: { + rbs_location_t *loc = rbs_location_current_token(parser); + + rbs_string_t string = rbs_parser_peek_current_token(parser); + rbs_string_t stripped_string = rbs_string_strip_whitespace(&string); + + rbs_node_t *literal = (rbs_node_t *) rbs_ast_integer_new(ALLOCATOR(), loc, stripped_string); + *type = (rbs_node_t *) rbs_types_literal_new(ALLOCATOR(), loc, literal); + return true; + } + case kTRUE: { + rbs_location_t *loc = rbs_location_current_token(parser); + *type = (rbs_node_t *) rbs_types_literal_new(ALLOCATOR(), loc, (rbs_node_t *) rbs_ast_bool_new(ALLOCATOR(), loc, true)); + return true; + } + case kFALSE: { + rbs_location_t *loc = rbs_location_current_token(parser); + *type = (rbs_node_t *) rbs_types_literal_new(ALLOCATOR(), loc, (rbs_node_t *) rbs_ast_bool_new(ALLOCATOR(), loc, false)); + return true; + } + case tSQSTRING: + case tDQSTRING: { + rbs_location_t *loc = rbs_location_current_token(parser); + + rbs_string_t unquoted_str = rbs_unquote_string(ALLOCATOR(), rbs_parser_peek_current_token(parser)); + rbs_node_t *literal = (rbs_node_t *) rbs_ast_string_new(ALLOCATOR(), loc, unquoted_str); + *type = (rbs_node_t *) rbs_types_literal_new(ALLOCATOR(), loc, literal); + return true; + } + case tSYMBOL: + case tSQSYMBOL: + case tDQSYMBOL: { + rbs_location_t *loc = rbs_location_current_token(parser); + rbs_types_literal_t *literal = NULL; + CHECK_PARSE(parse_symbol(parser, loc, &literal)); + *type = (rbs_node_t *) literal; + return true; + } + case tUIDENT: { + const char *name_str = rbs_peek_token(parser->rbs_lexer_t, parser->current_token); + size_t name_len = rbs_token_bytes(parser->current_token); + + rbs_constant_id_t name = rbs_constant_pool_find(&parser->constant_pool, (const uint8_t *) name_str, name_len); + + if (parser_typevar_member(parser, name)) { + rbs_location_t *loc = rbs_location_current_token(parser); + rbs_ast_symbol_t *symbol = rbs_ast_symbol_new(ALLOCATOR(), loc, &parser->constant_pool, name); + *type = (rbs_node_t *) rbs_types_variable_new(ALLOCATOR(), loc, symbol); + return true; + } + + RBS_FALLTHROUGH // for type name + } + case tULIDENT: + case tLIDENT: + case pCOLON2: { + rbs_node_t *instance_type = NULL; + CHECK_PARSE(parse_instance_type(parser, true, &instance_type)); + *type = instance_type; + return true; + } + case kSINGLETON: { + rbs_types_class_singleton_t *singleton = NULL; + CHECK_PARSE(parse_singleton_type(parser, &singleton)); + *type = (rbs_node_t *) singleton; + return true; + } + case pLBRACKET: { + rbs_range_t rg; + rg.start = parser->current_token.range.start; + rbs_node_list_t *types = rbs_node_list_new(ALLOCATOR()); + if (parser->next_token.type != pRBRACKET) { + CHECK_PARSE(parse_type_list(parser, pRBRACKET, types)); + } + ADVANCE_ASSERT(parser, pRBRACKET); + rg.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), rg); + *type = (rbs_node_t *) rbs_types_tuple_new(ALLOCATOR(), loc, types); + return true; + } + case pAREF_OPR: { + rbs_location_t *loc = rbs_location_current_token(parser); + rbs_node_list_t *types = rbs_node_list_new(ALLOCATOR()); + *type = (rbs_node_t *) rbs_types_tuple_new(ALLOCATOR(), loc, types); + return true; + } + case pLBRACE: { + rbs_position_t start = parser->current_token.range.start; + rbs_hash_t *fields = NULL; + CHECK_PARSE(parse_record_attributes(parser, &fields)); + ADVANCE_ASSERT(parser, pRBRACE); + rbs_position_t end = parser->current_token.range.end; + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), (rbs_range_t) { .start = start, .end = end }); + *type = (rbs_node_t *) rbs_types_record_new(ALLOCATOR(), loc, fields); + return true; + } + case pHAT: { + rbs_types_proc_t *value = NULL; + CHECK_PARSE(parse_proc_type(parser, &value)); + *type = (rbs_node_t *) value; + return true; + } + default: + rbs_parser_set_error(parser, parser->current_token, true, "unexpected token for simple type"); + return false; + } +} + +/* + intersection ::= {} optional `&` ... '&' + | {} +*/ +NODISCARD +static bool parse_intersection(rbs_parser_t *parser, rbs_node_t **type) { + rbs_range_t rg; + rg.start = parser->next_token.range.start; + + rbs_node_t *optional = NULL; + CHECK_PARSE(parse_optional(parser, &optional)); + *type = optional; + + rbs_node_list_t *intersection_types = rbs_node_list_new(ALLOCATOR()); + + rbs_node_list_append(intersection_types, optional); + while (parser->next_token.type == pAMP) { + rbs_parser_advance(parser); + rbs_node_t *type = NULL; + CHECK_PARSE(parse_optional(parser, &type)); + rbs_node_list_append(intersection_types, type); + } + + rg.end = parser->current_token.range.end; + + if (intersection_types->length > 1) { + rbs_location_t *location = rbs_location_new(ALLOCATOR(), rg); + *type = (rbs_node_t *) rbs_types_intersection_new(ALLOCATOR(), location, intersection_types); + } + + return true; +} + +/* + union ::= {} intersection '|' ... '|' + | {} +*/ +bool rbs_parse_type(rbs_parser_t *parser, rbs_node_t **type) { + rbs_range_t rg; + rg.start = parser->next_token.range.start; + rbs_node_list_t *union_types = rbs_node_list_new(ALLOCATOR()); + + CHECK_PARSE(parse_intersection(parser, type)); + + rbs_node_list_append(union_types, *type); + + while (parser->next_token.type == pBAR) { + rbs_parser_advance(parser); + rbs_node_t *intersection = NULL; + CHECK_PARSE(parse_intersection(parser, &intersection)); + rbs_node_list_append(union_types, intersection); + } + + rg.end = parser->current_token.range.end; + + if (union_types->length > 1) { + rbs_location_t *location = rbs_location_new(ALLOCATOR(), rg); + *type = (rbs_node_t *) rbs_types_union_new(ALLOCATOR(), location, union_types); + } + + return true; +} + +/* + type_params ::= {} `[` type_param `,` ... <`]`> + | {<>} + + type_param ::= kUNCHECKED? (kIN|kOUT|) tUIDENT upper_bound? default_type? (module_type_params == true) + + type_param ::= tUIDENT upper_bound? default_type? (module_type_params == false) +*/ +NODISCARD +static bool parse_type_params(rbs_parser_t *parser, rbs_range_t *rg, bool module_type_params, rbs_node_list_t **params) { + *params = rbs_node_list_new(ALLOCATOR()); + + bool required_param_allowed = true; + + if (parser->next_token.type == pLBRACKET) { + rbs_parser_advance(parser); + + rg->start = parser->current_token.range.start; + + while (true) { + bool unchecked = false; + rbs_keyword_t *variance = rbs_keyword_new(ALLOCATOR(), rbs_location_current_token(parser), INTERN("invariant")); + rbs_node_t *upper_bound = NULL; + rbs_node_t *default_type = NULL; + + rbs_range_t param_range; + param_range.start = parser->next_token.range.start; + + rbs_range_t unchecked_range = NULL_RANGE; + rbs_range_t variance_range = NULL_RANGE; + if (module_type_params) { + if (parser->next_token.type == kUNCHECKED) { + unchecked = true; + rbs_parser_advance(parser); + unchecked_range = parser->current_token.range; + } + + if (parser->next_token.type == kIN || parser->next_token.type == kOUT) { + switch (parser->next_token.type) { + case kIN: + variance = rbs_keyword_new(ALLOCATOR(), rbs_location_current_token(parser), INTERN("contravariant")); + break; + case kOUT: + variance = rbs_keyword_new(ALLOCATOR(), rbs_location_current_token(parser), INTERN("covariant")); + break; + default: + rbs_parser_set_error(parser, parser->current_token, false, "Unexpected error"); + return false; + } + + rbs_parser_advance(parser); + variance_range = parser->current_token.range; + } + } + + ADVANCE_ASSERT(parser, tUIDENT); + rbs_range_t name_range = parser->current_token.range; + + rbs_string_t string = rbs_parser_peek_current_token(parser); + rbs_location_t *nameSymbolLoc = rbs_location_current_token(parser); + rbs_constant_id_t id = rbs_constant_pool_insert_string(&parser->constant_pool, string); + rbs_ast_symbol_t *name = rbs_ast_symbol_new(ALLOCATOR(), nameSymbolLoc, &parser->constant_pool, id); + + CHECK_PARSE(rbs_parser_insert_typevar(parser, id)); + + rbs_range_t upper_bound_range = NULL_RANGE; + if (parser->next_token.type == pLT) { + rbs_parser_advance(parser); + upper_bound_range.start = parser->current_token.range.start; + CHECK_PARSE(rbs_parse_type(parser, &upper_bound)); + upper_bound_range.end = parser->current_token.range.end; + } + + rbs_range_t default_type_range = NULL_RANGE; + if (module_type_params) { + if (parser->next_token.type == pEQ) { + rbs_parser_advance(parser); + + default_type_range.start = parser->current_token.range.start; + CHECK_PARSE(rbs_parse_type(parser, &default_type)); + default_type_range.end = parser->current_token.range.end; + + required_param_allowed = false; + } else { + if (!required_param_allowed) { + rbs_parser_set_error(parser, parser->current_token, true, "required type parameter is not allowed after optional type parameter"); + return false; + } + } + } + + param_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), param_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 5); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_optional_child(loc, INTERN("variance"), variance_range); + rbs_loc_add_optional_child(loc, INTERN("unchecked"), unchecked_range); + rbs_loc_add_optional_child(loc, INTERN("upper_bound"), upper_bound_range); + rbs_loc_add_optional_child(loc, INTERN("default"), default_type_range); + + rbs_ast_type_param_t *param = rbs_ast_type_param_new(ALLOCATOR(), loc, name, variance, upper_bound, default_type, unchecked); + + rbs_node_list_append(*params, (rbs_node_t *) param); + + if (parser->next_token.type == pCOMMA) { + rbs_parser_advance(parser); + } + + if (parser->next_token.type == pRBRACKET) { + break; + } + } + + ADVANCE_ASSERT(parser, pRBRACKET); + rg->end = parser->current_token.range.end; + } else { + *rg = NULL_RANGE; + } + + return true; +} + +NODISCARD +static bool parser_pop_typevar_table(rbs_parser_t *parser) { + id_table *table; + + if (parser->vars) { + table = parser->vars; + parser->vars = table->next; + } else { + rbs_parser_set_error(parser, parser->current_token, false, "Cannot pop empty table"); + return false; + } + + if (parser->vars && RESET_TABLE_P(parser->vars)) { + table = parser->vars; + parser->vars = table->next; + } + + return true; +} + +/* + method_type ::= {} type_params + */ +// TODO: Should this be NODISCARD? +bool rbs_parse_method_type(rbs_parser_t *parser, rbs_method_type_t **method_type) { + rbs_parser_push_typevar_table(parser, false); + + rbs_range_t rg; + rg.start = parser->next_token.range.start; + + rbs_range_t params_range = NULL_RANGE; + rbs_node_list_t *type_params; + CHECK_PARSE(parse_type_params(parser, ¶ms_range, false, &type_params)); + + rbs_range_t type_range; + type_range.start = parser->next_token.range.start; + + parse_function_result *result = rbs_allocator_alloc(ALLOCATOR(), parse_function_result); + CHECK_PARSE(parse_function(parser, false, &result)); + + rg.end = parser->current_token.range.end; + type_range.end = rg.end; + + CHECK_PARSE(parser_pop_typevar_table(parser)); + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), rg); + rbs_loc_alloc_children(ALLOCATOR(), loc, 2); + rbs_loc_add_required_child(loc, INTERN("type"), type_range); + rbs_loc_add_optional_child(loc, INTERN("type_params"), params_range); + + *method_type = rbs_method_type_new(ALLOCATOR(), loc, type_params, result->function, result->block); + return true; +} + +/* + global_decl ::= {tGIDENT} `:` +*/ +NODISCARD +static bool parse_global_decl(rbs_parser_t *parser, rbs_node_list_t *annotations, rbs_ast_declarations_global_t **global) { + rbs_range_t decl_range; + decl_range.start = parser->current_token.range.start; + + rbs_ast_comment_t *comment = rbs_parser_get_comment(parser, decl_range.start.line); + + rbs_range_t name_range = parser->current_token.range; + rbs_location_t *symbolLoc = rbs_location_new(ALLOCATOR(), name_range); + + rbs_ast_symbol_t *type_name = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->current_token)); + + ADVANCE_ASSERT(parser, pCOLON); + rbs_range_t colon_range = parser->current_token.range; + + rbs_node_t *type; + CHECK_PARSE(rbs_parse_type(parser, &type)); + decl_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), decl_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 2); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_required_child(loc, INTERN("colon"), colon_range); + + *global = rbs_ast_declarations_global_new(ALLOCATOR(), loc, type_name, type, comment, annotations); + return true; +} + +/* + const_decl ::= {const_name} `:` +*/ +NODISCARD +static bool parse_const_decl(rbs_parser_t *parser, rbs_node_list_t *annotations, rbs_ast_declarations_constant_t **constant) { + rbs_range_t decl_range; + + decl_range.start = parser->current_token.range.start; + rbs_ast_comment_t *comment = rbs_parser_get_comment(parser, decl_range.start.line); + + rbs_range_t name_range; + rbs_type_name_t *type_name = NULL; + CHECK_PARSE(parse_type_name(parser, CLASS_NAME, &name_range, &type_name)); + + ADVANCE_ASSERT(parser, pCOLON); + rbs_range_t colon_range = parser->current_token.range; + + rbs_node_t *type; + CHECK_PARSE(rbs_parse_type(parser, &type)); + + decl_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), decl_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 2); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_required_child(loc, INTERN("colon"), colon_range); + + *constant = rbs_ast_declarations_constant_new(ALLOCATOR(), loc, type_name, type, comment, annotations); + return true; +} + +/* + type_decl ::= {kTYPE} alias_name `=` +*/ +NODISCARD +static bool parse_type_decl(rbs_parser_t *parser, rbs_position_t comment_pos, rbs_node_list_t *annotations, rbs_ast_declarations_type_alias_t **typealias) { + rbs_parser_push_typevar_table(parser, true); + + rbs_range_t decl_range; + decl_range.start = parser->current_token.range.start; + comment_pos = rbs_nonnull_pos_or(comment_pos, decl_range.start); + + rbs_range_t keyword_range = parser->current_token.range; + + rbs_parser_advance(parser); + + rbs_range_t name_range; + rbs_type_name_t *type_name = NULL; + CHECK_PARSE(parse_type_name(parser, ALIAS_NAME, &name_range, &type_name)); + + rbs_range_t params_range; + rbs_node_list_t *type_params; + CHECK_PARSE(parse_type_params(parser, ¶ms_range, true, &type_params)); + + ADVANCE_ASSERT(parser, pEQ); + rbs_range_t eq_range = parser->current_token.range; + + rbs_node_t *type; + CHECK_PARSE(rbs_parse_type(parser, &type)); + + decl_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), decl_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 4); + rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_optional_child(loc, INTERN("type_params"), params_range); + rbs_loc_add_required_child(loc, INTERN("eq"), eq_range); + + CHECK_PARSE(parser_pop_typevar_table(parser)); + + rbs_ast_comment_t *comment = rbs_parser_get_comment(parser, comment_pos.line); + + *typealias = rbs_ast_declarations_type_alias_new(ALLOCATOR(), loc, type_name, type_params, type, annotations, comment); + return true; +} + +/* + annotation ::= {} +*/ +NODISCARD +static bool parse_annotation(rbs_parser_t *parser, rbs_ast_annotation_t **annotation) { + rbs_range_t rg = parser->current_token.range; + + size_t offset_bytes = + parser->rbs_lexer_t->encoding->char_width((const uint8_t *) "%", (size_t) 1) + + parser->rbs_lexer_t->encoding->char_width((const uint8_t *) "a", (size_t) 1); + + rbs_string_t str = rbs_string_new( + parser->rbs_lexer_t->string.start + rg.start.byte_pos + offset_bytes, + parser->rbs_lexer_t->string.end + ); + unsigned int open_char = rbs_utf8_string_to_codepoint(str); + + unsigned int close_char; + + switch (open_char) { + case '{': + close_char = '}'; + break; + case '(': + close_char = ')'; + break; + case '[': + close_char = ']'; + break; + case '<': + close_char = '>'; + break; + case '|': + close_char = '|'; + break; + default: + rbs_parser_set_error(parser, parser->current_token, false, "Unexpected error"); + return false; + } + + size_t open_bytes = parser->rbs_lexer_t->encoding->char_width((const uint8_t *) &open_char, (size_t) 1); + size_t close_bytes = parser->rbs_lexer_t->encoding->char_width((const uint8_t *) &close_char, (size_t) 1); + + rbs_string_t current_token = rbs_parser_peek_current_token(parser); + size_t total_offset = offset_bytes + open_bytes; + + rbs_string_t annotation_str = rbs_string_new( + current_token.start + total_offset, + current_token.end - close_bytes + ); + + rbs_string_t stripped_annotation_str = rbs_string_strip_whitespace(&annotation_str); + + *annotation = rbs_ast_annotation_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), rg), stripped_annotation_str); + return true; +} + +/* + annotations ::= {} annotation ... + | {<>} +*/ +NODISCARD +static bool parse_annotations(rbs_parser_t *parser, rbs_node_list_t *annotations, rbs_position_t *annot_pos) { + *annot_pos = NullPosition; + + while (true) { + if (parser->next_token.type == tANNOTATION) { + rbs_parser_advance(parser); + + if (rbs_null_position_p((*annot_pos))) { + *annot_pos = parser->current_token.range.start; + } + + rbs_ast_annotation_t *annotation = NULL; + CHECK_PARSE(parse_annotation(parser, &annotation)); + rbs_node_list_append(annotations, (rbs_node_t *) annotation); + } else { + break; + } + } + + return true; +} + +/* + method_name ::= {} + | {} (IDENT | keyword)~<`?`> +*/ +NODISCARD +static bool parse_method_name(rbs_parser_t *parser, rbs_range_t *range, rbs_ast_symbol_t **symbol) { + rbs_parser_advance(parser); + + switch (parser->current_token.type) { + case tUIDENT: + case tLIDENT: + case tULIDENT: + case tULLIDENT: + KEYWORD_CASES + if (parser->next_token.type == pQUESTION && parser->current_token.range.end.byte_pos == parser->next_token.range.start.byte_pos) { + range->start = parser->current_token.range.start; + range->end = parser->next_token.range.end; + rbs_parser_advance(parser); + + rbs_constant_id_t constant_id = rbs_constant_pool_insert_shared_with_encoding( + &parser->constant_pool, + (const uint8_t *) parser->rbs_lexer_t->string.start + range->start.byte_pos, + range->end.byte_pos - range->start.byte_pos, + parser->rbs_lexer_t->encoding + ); + + rbs_location_t *symbolLoc = rbs_location_new(ALLOCATOR(), *range); + *symbol = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, constant_id); + } else { + *range = parser->current_token.range; + rbs_location_t *symbolLoc = rbs_location_new(ALLOCATOR(), *range); + *symbol = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->current_token)); + } + return true; + + case tBANGIDENT: + case tEQIDENT: { + *range = parser->current_token.range; + rbs_location_t *symbolLoc = rbs_location_new(ALLOCATOR(), *range); + *symbol = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->current_token)); + return true; + } + case tQIDENT: { + rbs_string_t string = rbs_parser_peek_current_token(parser); + rbs_string_t unquoted_str = rbs_unquote_string(ALLOCATOR(), string); + rbs_constant_id_t constant_id = rbs_constant_pool_insert_string(&parser->constant_pool, unquoted_str); + rbs_location_t *symbolLoc = rbs_location_current_token(parser); + *symbol = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, constant_id); + return true; + } + + case pBAR: + case pHAT: + case pAMP: + case pSTAR: + case pSTAR2: + case pLT: + case pAREF_OPR: + case tOPERATOR: { + *range = parser->current_token.range; + rbs_location_t *symbolLoc = rbs_location_new(ALLOCATOR(), *range); + *symbol = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->current_token)); + return true; + } + + default: + rbs_parser_set_error(parser, parser->current_token, true, "unexpected token for method name"); + return false; + } +} + +typedef enum { + INSTANCE_KIND, + SINGLETON_KIND, + INSTANCE_SINGLETON_KIND +} InstanceSingletonKind; + +/* + instance_singleton_kind ::= {<>} + | {} kSELF <`.`> + | {} kSELF~`?` <`.`> + + @param allow_selfq `true` to accept `self?` kind. +*/ +static InstanceSingletonKind parse_instance_singleton_kind(rbs_parser_t *parser, bool allow_selfq, rbs_range_t *rg) { + InstanceSingletonKind kind = INSTANCE_KIND; + + if (parser->next_token.type == kSELF) { + rbs_range_t self_range = parser->next_token.range; + + if (parser->next_token2.type == pDOT) { + rbs_parser_advance(parser); + rbs_parser_advance(parser); + kind = SINGLETON_KIND; + } else if ( + parser->next_token2.type == pQUESTION && parser->next_token.range.end.char_pos == parser->next_token2.range.start.char_pos && parser->next_token3.type == pDOT && allow_selfq + ) { + rbs_parser_advance(parser); + rbs_parser_advance(parser); + rbs_parser_advance(parser); + kind = INSTANCE_SINGLETON_KIND; + } + + *rg = (rbs_range_t) { + .start = self_range.start, + .end = parser->current_token.range.end, + }; + } else { + *rg = NULL_RANGE; + } + + return kind; +} + +/** + * def_member ::= {kDEF} method_name `:` + * | {kPRIVATE} kDEF method_name `:` + * | {kPUBLIC} kDEF method_name `:` + * + * method_types ::= {} + * | {} <`...`> + * | {} method_type `|` + * + * @param instance_only `true` to reject singleton method definition. + * @param accept_overload `true` to accept overloading (...) definition. + * */ +NODISCARD +static bool parse_member_def(rbs_parser_t *parser, bool instance_only, bool accept_overload, rbs_position_t comment_pos, rbs_node_list_t *annotations, rbs_ast_members_method_definition_t **method_definition) { + rbs_range_t member_range; + member_range.start = parser->current_token.range.start; + comment_pos = rbs_nonnull_pos_or(comment_pos, member_range.start); + + rbs_ast_comment_t *comment = rbs_parser_get_comment(parser, comment_pos.line); + + rbs_range_t visibility_range; + rbs_keyword_t *visibility; + switch (parser->current_token.type) { + case kPRIVATE: { + visibility_range = parser->current_token.range; + visibility = rbs_keyword_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), visibility_range), INTERN("private")); + member_range.start = visibility_range.start; + rbs_parser_advance(parser); + break; + } + case kPUBLIC: { + visibility_range = parser->current_token.range; + visibility = rbs_keyword_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), visibility_range), INTERN("public")); + member_range.start = visibility_range.start; + rbs_parser_advance(parser); + break; + } + default: + visibility_range = NULL_RANGE; + visibility = NULL; + break; + } + + rbs_range_t keyword_range = parser->current_token.range; + + rbs_range_t kind_range; + InstanceSingletonKind kind; + if (instance_only) { + kind_range = NULL_RANGE; + kind = INSTANCE_KIND; + } else { + kind = parse_instance_singleton_kind(parser, visibility == NULL, &kind_range); + } + + rbs_range_t name_range; + rbs_ast_symbol_t *name = NULL; + CHECK_PARSE(parse_method_name(parser, &name_range, &name)); + +#define SELF_ID rbs_constant_pool_insert_constant(&parser->constant_pool, (const unsigned char *) "self?", strlen("self?")) + + if (parser->next_token.type == pDOT && name->constant_id == SELF_ID) { + rbs_parser_set_error(parser, parser->next_token, true, "`self?` method cannot have visibility"); + return false; + } else { + ADVANCE_ASSERT(parser, pCOLON); + } + + rbs_parser_push_typevar_table(parser, kind != INSTANCE_KIND); + + rbs_node_list_t *overloads = rbs_node_list_new(ALLOCATOR()); + bool overloading = false; + rbs_range_t overloading_range = NULL_RANGE; + bool loop = true; + while (loop) { + rbs_node_list_t *annotations = rbs_node_list_new(ALLOCATOR()); + rbs_position_t overload_annot_pos = NullPosition; + + rbs_range_t overload_range; + overload_range.start = parser->current_token.range.start; + + if (parser->next_token.type == tANNOTATION) { + CHECK_PARSE(parse_annotations(parser, annotations, &overload_annot_pos)); + } + + switch (parser->next_token.type) { + case pLPAREN: + case pARROW: + case pLBRACE: + case pLBRACKET: + case pQUESTION: { + rbs_method_type_t *method_type = NULL; + CHECK_PARSE(rbs_parse_method_type(parser, &method_type)); + + overload_range.end = parser->current_token.range.end; + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), overload_range); + rbs_node_t *overload = (rbs_node_t *) rbs_ast_members_method_definition_overload_new(ALLOCATOR(), loc, annotations, (rbs_node_t *) method_type); + rbs_node_list_append(overloads, overload); + member_range.end = parser->current_token.range.end; + break; + } + + case pDOT3: + if (accept_overload) { + overloading = true; + rbs_parser_advance(parser); + loop = false; + overloading_range = parser->current_token.range; + member_range.end = overloading_range.end; + break; + } else { + rbs_parser_set_error(parser, parser->next_token, true, "unexpected overloading method definition"); + return false; + } + + default: + rbs_parser_set_error(parser, parser->next_token, true, "unexpected token for method type"); + return false; + } + + if (parser->next_token.type == pBAR) { + rbs_parser_advance(parser); + } else { + loop = false; + } + } + + CHECK_PARSE(parser_pop_typevar_table(parser)); + + rbs_keyword_t *k; + switch (kind) { + case INSTANCE_KIND: { + k = rbs_keyword_new(ALLOCATOR(), rbs_location_current_token(parser), INTERN("instance")); + break; + } + case SINGLETON_KIND: { + k = rbs_keyword_new(ALLOCATOR(), rbs_location_current_token(parser), INTERN("singleton")); + break; + } + case INSTANCE_SINGLETON_KIND: { + k = rbs_keyword_new(ALLOCATOR(), rbs_location_current_token(parser), INTERN("singleton_instance")); + break; + } + default: + rbs_parser_set_error(parser, parser->current_token, false, "Unexpected error"); + return false; + } + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), member_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 5); + rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_optional_child(loc, INTERN("kind"), kind_range); + rbs_loc_add_optional_child(loc, INTERN("overloading"), overloading_range); + rbs_loc_add_optional_child(loc, INTERN("visibility"), visibility_range); + + *method_definition = rbs_ast_members_method_definition_new(ALLOCATOR(), loc, name, k, overloads, annotations, comment, overloading, visibility); + return true; +} + +/** + * class_instance_name ::= {} + * | {} class_name `[` type args <`]`> + * + * @param kind + * */ +NODISCARD +static bool class_instance_name(rbs_parser_t *parser, TypeNameKind kind, rbs_node_list_t *args, rbs_range_t *name_range, rbs_range_t *args_range, rbs_type_name_t **name) { + rbs_parser_advance(parser); + + rbs_type_name_t *type_name = NULL; + CHECK_PARSE(parse_type_name(parser, kind, name_range, &type_name)); + *name = type_name; + + if (parser->next_token.type == pLBRACKET) { + rbs_parser_advance(parser); + args_range->start = parser->current_token.range.start; + CHECK_PARSE(parse_type_list(parser, pRBRACKET, args)); + ADVANCE_ASSERT(parser, pRBRACKET); + args_range->end = parser->current_token.range.end; + } else { + *args_range = NULL_RANGE; + } + + return true; +} + +/** + * mixin_member ::= {kINCLUDE} + * | {kPREPEND} + * | {kEXTEND} + * + * @param from_interface `true` when the member is in an interface. + * */ +NODISCARD +static bool parse_mixin_member(rbs_parser_t *parser, bool from_interface, rbs_position_t comment_pos, rbs_node_list_t *annotations, rbs_node_t **mixin_member) { + rbs_range_t member_range; + member_range.start = parser->current_token.range.start; + comment_pos = rbs_nonnull_pos_or(comment_pos, member_range.start); + + enum RBSTokenType type = parser->current_token.type; + rbs_range_t keyword_range = parser->current_token.range; + + bool reset_typevar_scope; + switch (type) { + case kINCLUDE: + reset_typevar_scope = false; + break; + case kEXTEND: + reset_typevar_scope = true; + break; + case kPREPEND: + reset_typevar_scope = false; + break; + default: + rbs_parser_set_error(parser, parser->current_token, false, "Unexpected error"); + return false; + } + + if (from_interface) { + if (parser->current_token.type != kINCLUDE) { + rbs_parser_set_error(parser, parser->current_token, true, "unexpected mixin in interface declaration"); + return false; + } + } + + rbs_parser_push_typevar_table(parser, reset_typevar_scope); + + rbs_node_list_t *args = rbs_node_list_new(ALLOCATOR()); + rbs_range_t name_range; + rbs_range_t args_range = NULL_RANGE; + rbs_type_name_t *name = NULL; + CHECK_PARSE(class_instance_name( + parser, + from_interface ? INTERFACE_NAME : (INTERFACE_NAME | CLASS_NAME), + args, + &name_range, + &args_range, + &name + )); + + CHECK_PARSE(parser_pop_typevar_table(parser)); + + member_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), member_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 3); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); + rbs_loc_add_optional_child(loc, INTERN("args"), args_range); + + rbs_ast_comment_t *comment = rbs_parser_get_comment(parser, comment_pos.line); + switch (type) { + case kINCLUDE: + *mixin_member = (rbs_node_t *) rbs_ast_members_include_new(ALLOCATOR(), loc, name, args, annotations, comment); + return true; + case kEXTEND: + *mixin_member = (rbs_node_t *) rbs_ast_members_extend_new(ALLOCATOR(), loc, name, args, annotations, comment); + return true; + case kPREPEND: + *mixin_member = (rbs_node_t *) rbs_ast_members_prepend_new(ALLOCATOR(), loc, name, args, annotations, comment); + return true; + default: + rbs_parser_set_error(parser, parser->current_token, false, "Unexpected error"); + return false; + } +} + +/** + * @code + * alias_member ::= {kALIAS} method_name + * | {kALIAS} kSELF `.` method_name kSELF `.` + * @endcode + * + * @param[in] instance_only `true` to reject `self.` alias. + * */ +NODISCARD +static bool parse_alias_member(rbs_parser_t *parser, bool instance_only, rbs_position_t comment_pos, rbs_node_list_t *annotations, rbs_ast_members_alias_t **alias_member) { + rbs_range_t member_range; + member_range.start = parser->current_token.range.start; + rbs_range_t keyword_range = parser->current_token.range; + + comment_pos = rbs_nonnull_pos_or(comment_pos, member_range.start); + rbs_ast_comment_t *comment = rbs_parser_get_comment(parser, comment_pos.line); + + rbs_keyword_t *kind; + rbs_ast_symbol_t *new_name, *old_name; + rbs_range_t new_kind_range, old_kind_range, new_name_range, old_name_range; + + if (!instance_only && parser->next_token.type == kSELF) { + kind = rbs_keyword_new(ALLOCATOR(), rbs_location_current_token(parser), INTERN("singleton")); + + new_kind_range.start = parser->next_token.range.start; + new_kind_range.end = parser->next_token2.range.end; + ADVANCE_ASSERT(parser, kSELF); + ADVANCE_ASSERT(parser, pDOT); + CHECK_PARSE(parse_method_name(parser, &new_name_range, &new_name)); + + old_kind_range.start = parser->next_token.range.start; + old_kind_range.end = parser->next_token2.range.end; + ADVANCE_ASSERT(parser, kSELF); + ADVANCE_ASSERT(parser, pDOT); + CHECK_PARSE(parse_method_name(parser, &old_name_range, &old_name)); + } else { + kind = rbs_keyword_new(ALLOCATOR(), rbs_location_current_token(parser), INTERN("instance")); + CHECK_PARSE(parse_method_name(parser, &new_name_range, &new_name)); + CHECK_PARSE(parse_method_name(parser, &old_name_range, &old_name)); + new_kind_range = NULL_RANGE; + old_kind_range = NULL_RANGE; + } + + member_range.end = parser->current_token.range.end; + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), member_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 5); + rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); + rbs_loc_add_required_child(loc, INTERN("new_name"), new_name_range); + rbs_loc_add_required_child(loc, INTERN("old_name"), old_name_range); + rbs_loc_add_optional_child(loc, INTERN("new_kind"), new_kind_range); + rbs_loc_add_optional_child(loc, INTERN("old_kind"), old_kind_range); + + *alias_member = rbs_ast_members_alias_new(ALLOCATOR(), loc, new_name, old_name, kind, annotations, comment); + return true; +} + +/* + variable_member ::= {tAIDENT} `:` + | {kSELF} `.` tAIDENT `:` + | {tA2IDENT} `:` +*/ +NODISCARD +static bool parse_variable_member(rbs_parser_t *parser, rbs_position_t comment_pos, rbs_node_list_t *annotations, rbs_node_t **variable_member) { + if (annotations->length > 0) { + rbs_parser_set_error(parser, parser->current_token, true, "annotation cannot be given to variable members"); + return false; + } + + rbs_range_t member_range; + member_range.start = parser->current_token.range.start; + comment_pos = rbs_nonnull_pos_or(comment_pos, member_range.start); + rbs_ast_comment_t *comment = rbs_parser_get_comment(parser, comment_pos.line); + + switch (parser->current_token.type) { + case tAIDENT: { + rbs_range_t name_range = parser->current_token.range; + rbs_location_t *symbolLoc = rbs_location_new(ALLOCATOR(), name_range); + rbs_ast_symbol_t *name = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->current_token)); + + ADVANCE_ASSERT(parser, pCOLON); + rbs_range_t colon_range = parser->current_token.range; + + rbs_node_t *type; + CHECK_PARSE(rbs_parse_type(parser, &type)); + member_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), member_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 3); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_required_child(loc, INTERN("colon"), colon_range); + rbs_loc_add_optional_child(loc, INTERN("kind"), NULL_RANGE); + + *variable_member = (rbs_node_t *) rbs_ast_members_instance_variable_new(ALLOCATOR(), loc, name, type, comment); + return true; + } + case tA2IDENT: { + rbs_range_t name_range = parser->current_token.range; + rbs_location_t *symbolLoc = rbs_location_new(ALLOCATOR(), name_range); + rbs_ast_symbol_t *name = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->current_token)); + + ADVANCE_ASSERT(parser, pCOLON); + rbs_range_t colon_range = parser->current_token.range; + + rbs_parser_push_typevar_table(parser, true); + + rbs_node_t *type; + CHECK_PARSE(rbs_parse_type(parser, &type)); + + CHECK_PARSE(parser_pop_typevar_table(parser)); + + member_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), member_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 3); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_required_child(loc, INTERN("colon"), colon_range); + rbs_loc_add_optional_child(loc, INTERN("kind"), NULL_RANGE); + + *variable_member = (rbs_node_t *) rbs_ast_members_class_variable_new(ALLOCATOR(), loc, name, type, comment); + return true; + } + case kSELF: { + rbs_range_t kind_range = { + .start = parser->current_token.range.start, + .end = parser->next_token.range.end + }; + + ADVANCE_ASSERT(parser, pDOT); + if (parser->next_token.type == tAIDENT) { + rbs_parser_advance(parser); + } else { + rbs_parser_set_error(parser, parser->current_token, false, "Unexpected error"); + return false; + } + + rbs_range_t name_range = parser->current_token.range; + rbs_location_t *symbolLoc = rbs_location_new(ALLOCATOR(), name_range); + rbs_ast_symbol_t *name = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->current_token)); + + ADVANCE_ASSERT(parser, pCOLON); + rbs_range_t colon_range = parser->current_token.range; + + rbs_parser_push_typevar_table(parser, true); + + rbs_node_t *type; + CHECK_PARSE(rbs_parse_type(parser, &type)); + + CHECK_PARSE(parser_pop_typevar_table(parser)); + + member_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), member_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 3); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_required_child(loc, INTERN("colon"), colon_range); + rbs_loc_add_optional_child(loc, INTERN("kind"), kind_range); + + *variable_member = (rbs_node_t *) rbs_ast_members_class_instance_variable_new(ALLOCATOR(), loc, name, type, comment); + return true; + } + default: + rbs_parser_set_error(parser, parser->current_token, false, "Unexpected error"); + return false; + } +} + +/* + visibility_member ::= {<`public`>} + | {<`private`>} +*/ +NODISCARD +static bool parse_visibility_member(rbs_parser_t *parser, rbs_node_list_t *annotations, rbs_node_t **visibility_member) { + if (annotations->length > 0) { + rbs_parser_set_error(parser, parser->current_token, true, "annotation cannot be given to visibility members"); + return false; + } + + rbs_location_t *location = rbs_location_current_token(parser); + + switch (parser->current_token.type) { + case kPUBLIC: { + *visibility_member = (rbs_node_t *) rbs_ast_members_public_new(ALLOCATOR(), location); + return true; + } + case kPRIVATE: { + *visibility_member = (rbs_node_t *) rbs_ast_members_private_new(ALLOCATOR(), location); + return true; + } + default: + rbs_parser_set_error(parser, parser->current_token, false, "Unexpected error"); + return false; + } +} + +/* + attribute_member ::= {attr_keyword} attr_name attr_var `:` + | {visibility} attr_keyword attr_name attr_var `:` + | {attr_keyword} `self` `.` attr_name attr_var `:` + | {visibility} attr_keyword `self` `.` attr_name attr_var `:` + + attr_keyword ::= `attr_reader` | `attr_writer` | `attr_accessor` + + visibility ::= `public` | `private` + + attr_var ::= # empty + | `(` tAIDENT `)` # Ivar name + | `(` `)` # No variable +*/ +NODISCARD +static bool parse_attribute_member(rbs_parser_t *parser, rbs_position_t comment_pos, rbs_node_list_t *annotations, rbs_node_t **attribute_member) { + rbs_range_t member_range; + + member_range.start = parser->current_token.range.start; + comment_pos = rbs_nonnull_pos_or(comment_pos, member_range.start); + rbs_ast_comment_t *comment = rbs_parser_get_comment(parser, comment_pos.line); + + rbs_range_t visibility_range; + rbs_keyword_t *visibility; + switch (parser->current_token.type) { + case kPRIVATE: { + visibility_range = parser->current_token.range; + visibility = rbs_keyword_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), visibility_range), INTERN("private")); + rbs_parser_advance(parser); + break; + } + case kPUBLIC: { + visibility_range = parser->current_token.range; + visibility = rbs_keyword_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), visibility_range), INTERN("public")); + rbs_parser_advance(parser); + break; + } + default: + visibility = NULL; + visibility_range = NULL_RANGE; + break; + } + + enum RBSTokenType attr_type = parser->current_token.type; + rbs_range_t keyword_range = parser->current_token.range; + + rbs_range_t kind_range; + InstanceSingletonKind is_kind = parse_instance_singleton_kind(parser, false, &kind_range); + + rbs_keyword_t *kind = rbs_keyword_new( + ALLOCATOR(), + rbs_location_new(ALLOCATOR(), keyword_range), + INTERN(((is_kind == INSTANCE_KIND) ? "instance" : "singleton")) + ); + + rbs_range_t name_range; + rbs_ast_symbol_t *attr_name; + CHECK_PARSE(parse_method_name(parser, &name_range, &attr_name)); + + rbs_node_t *ivar_name; // rbs_ast_symbol_t, NULL or rbs_ast_bool_new(ALLOCATOR(), false) + rbs_range_t ivar_range, ivar_name_range; + if (parser->next_token.type == pLPAREN) { + ADVANCE_ASSERT(parser, pLPAREN); + ivar_range.start = parser->current_token.range.start; + + if (parser_advance_if(parser, tAIDENT)) { + rbs_location_t *symbolLoc = rbs_location_current_token(parser); + ivar_name = (rbs_node_t *) rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->current_token)); + ivar_name_range = parser->current_token.range; + } else { + rbs_range_t false_range = { + .start = parser->current_token.range.start, + .end = parser->current_token.range.end + }; + ivar_name = (rbs_node_t *) rbs_ast_bool_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), false_range), false); + ivar_name_range = NULL_RANGE; + } + + ADVANCE_ASSERT(parser, pRPAREN); + ivar_range.end = parser->current_token.range.end; + } else { + ivar_range = NULL_RANGE; + ivar_name = NULL; + ivar_name_range = NULL_RANGE; + } + + ADVANCE_ASSERT(parser, pCOLON); + rbs_range_t colon_range = parser->current_token.range; + + rbs_parser_push_typevar_table(parser, is_kind == SINGLETON_KIND); + + rbs_node_t *type; + CHECK_PARSE(rbs_parse_type(parser, &type)); + + CHECK_PARSE(parser_pop_typevar_table(parser)); + + member_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), member_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 7); + rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_required_child(loc, INTERN("colon"), colon_range); + rbs_loc_add_optional_child(loc, INTERN("kind"), kind_range); + rbs_loc_add_optional_child(loc, INTERN("ivar"), ivar_range); + rbs_loc_add_optional_child(loc, INTERN("ivar_name"), ivar_name_range); + rbs_loc_add_optional_child(loc, INTERN("visibility"), visibility_range); + + switch (attr_type) { + case kATTRREADER: + *attribute_member = (rbs_node_t *) rbs_ast_members_attr_reader_new(ALLOCATOR(), loc, attr_name, type, ivar_name, kind, annotations, comment, visibility); + return true; + case kATTRWRITER: + *attribute_member = (rbs_node_t *) rbs_ast_members_attr_writer_new(ALLOCATOR(), loc, attr_name, type, ivar_name, kind, annotations, comment, visibility); + return true; + case kATTRACCESSOR: + *attribute_member = (rbs_node_t *) rbs_ast_members_attr_accessor_new(ALLOCATOR(), loc, attr_name, type, ivar_name, kind, annotations, comment, visibility); + return true; + default: + rbs_parser_set_error(parser, parser->current_token, false, "Unexpected error"); + return false; + } +} + +/* + interface_members ::= {} ... kEND + + interface_member ::= def_member (instance method only && no overloading) + | mixin_member (interface only) + | alias_member (instance only) +*/ +NODISCARD +static bool parse_interface_members(rbs_parser_t *parser, rbs_node_list_t **members) { + *members = rbs_node_list_new(ALLOCATOR()); + + while (parser->next_token.type != kEND) { + rbs_node_list_t *annotations = rbs_node_list_new(ALLOCATOR()); + rbs_position_t annot_pos = NullPosition; + + CHECK_PARSE(parse_annotations(parser, annotations, &annot_pos)); + rbs_parser_advance(parser); + + rbs_node_t *member; + switch (parser->current_token.type) { + case kDEF: { + rbs_ast_members_method_definition_t *method_definition = NULL; + CHECK_PARSE(parse_member_def(parser, true, true, annot_pos, annotations, &method_definition)); + member = (rbs_node_t *) method_definition; + break; + } + + case kINCLUDE: + case kEXTEND: + case kPREPEND: { + CHECK_PARSE(parse_mixin_member(parser, true, annot_pos, annotations, &member)); + break; + } + + case kALIAS: { + rbs_ast_members_alias_t *alias_member = NULL; + CHECK_PARSE(parse_alias_member(parser, true, annot_pos, annotations, &alias_member)); + member = (rbs_node_t *) alias_member; + break; + } + + default: + rbs_parser_set_error(parser, parser->current_token, true, "unexpected token for interface declaration member"); + return false; + } + + rbs_node_list_append(*members, member); + } + + return true; +} + +/* + interface_decl ::= {`interface`} interface_name module_type_params interface_members +*/ +NODISCARD +static bool parse_interface_decl(rbs_parser_t *parser, rbs_position_t comment_pos, rbs_node_list_t *annotations, rbs_ast_declarations_interface_t **interface_decl) { + rbs_parser_push_typevar_table(parser, true); + + rbs_range_t member_range; + member_range.start = parser->current_token.range.start; + comment_pos = rbs_nonnull_pos_or(comment_pos, member_range.start); + + rbs_range_t keyword_range = parser->current_token.range; + + rbs_parser_advance(parser); + + rbs_range_t name_range; + rbs_type_name_t *name = NULL; + CHECK_PARSE(parse_type_name(parser, INTERFACE_NAME, &name_range, &name)); + + rbs_range_t type_params_range; + rbs_node_list_t *type_params; + CHECK_PARSE(parse_type_params(parser, &type_params_range, true, &type_params)); + + rbs_node_list_t *members = NULL; + CHECK_PARSE(parse_interface_members(parser, &members)); + + ADVANCE_ASSERT(parser, kEND); + rbs_range_t end_range = parser->current_token.range; + member_range.end = end_range.end; + + CHECK_PARSE(parser_pop_typevar_table(parser)); + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), member_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 4); + rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_required_child(loc, INTERN("end"), end_range); + rbs_loc_add_optional_child(loc, INTERN("type_params"), type_params_range); + + rbs_ast_comment_t *comment = rbs_parser_get_comment(parser, comment_pos.line); + + *interface_decl = rbs_ast_declarations_interface_new(ALLOCATOR(), loc, name, type_params, members, annotations, comment); + return true; +} + +/* + module_self_types ::= {`:`} module_self_type `,` ... `,` + + module_self_type ::= + | module_name `[` type_list <`]`> +*/ +NODISCARD +static bool parse_module_self_types(rbs_parser_t *parser, rbs_node_list_t *array) { + while (true) { + rbs_parser_advance(parser); + + rbs_range_t self_range; + self_range.start = parser->current_token.range.start; + + rbs_range_t name_range; + rbs_type_name_t *module_name = NULL; + CHECK_PARSE(parse_type_name(parser, CLASS_NAME | INTERFACE_NAME, &name_range, &module_name)); + self_range.end = name_range.end; + + rbs_node_list_t *args = rbs_node_list_new(ALLOCATOR()); + rbs_range_t args_range = NULL_RANGE; + if (parser->next_token.type == pLBRACKET) { + rbs_parser_advance(parser); + args_range.start = parser->current_token.range.start; + CHECK_PARSE(parse_type_list(parser, pRBRACKET, args)); + rbs_parser_advance(parser); + self_range.end = args_range.end = parser->current_token.range.end; + } + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), self_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 2); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_optional_child(loc, INTERN("args"), args_range); + + rbs_ast_declarations_module_self_t *self_type = rbs_ast_declarations_module_self_new(ALLOCATOR(), loc, module_name, args); + rbs_node_list_append(array, (rbs_node_t *) self_type); + + if (parser->next_token.type == pCOMMA) { + rbs_parser_advance(parser); + } else { + break; + } + } + + return true; +} + +NODISCARD +static bool parse_nested_decl(rbs_parser_t *parser, const char *nested_in, rbs_position_t annot_pos, rbs_node_list_t *annotations, rbs_node_t **decl); + +/* + module_members ::= {} ... kEND + + module_member ::= def_member + | variable_member + | mixin_member + | alias_member + | attribute_member + | `public` + | `private` +*/ +NODISCARD +static bool parse_module_members(rbs_parser_t *parser, rbs_node_list_t **members) { + *members = rbs_node_list_new(ALLOCATOR()); + + while (parser->next_token.type != kEND) { + rbs_node_list_t *annotations = rbs_node_list_new(ALLOCATOR()); + rbs_position_t annot_pos; + CHECK_PARSE(parse_annotations(parser, annotations, &annot_pos)); + + rbs_parser_advance(parser); + + rbs_node_t *member; + switch (parser->current_token.type) { + case kDEF: { + rbs_ast_members_method_definition_t *method_definition; + CHECK_PARSE(parse_member_def(parser, false, true, annot_pos, annotations, &method_definition)); + member = (rbs_node_t *) method_definition; + break; + } + + case kINCLUDE: + case kEXTEND: + case kPREPEND: { + CHECK_PARSE(parse_mixin_member(parser, false, annot_pos, annotations, &member)); + break; + } + case kALIAS: { + rbs_ast_members_alias_t *alias_member = NULL; + CHECK_PARSE(parse_alias_member(parser, false, annot_pos, annotations, &alias_member)); + member = (rbs_node_t *) alias_member; + break; + } + case tAIDENT: + case tA2IDENT: + case kSELF: { + CHECK_PARSE(parse_variable_member(parser, annot_pos, annotations, &member)); + break; + } + + case kATTRREADER: + case kATTRWRITER: + case kATTRACCESSOR: { + CHECK_PARSE(parse_attribute_member(parser, annot_pos, annotations, &member)); + break; + } + + case kPUBLIC: + case kPRIVATE: + if (parser->next_token.range.start.line == parser->current_token.range.start.line) { + switch (parser->next_token.type) { + case kDEF: { + rbs_ast_members_method_definition_t *method_definition = NULL; + CHECK_PARSE(parse_member_def(parser, false, true, annot_pos, annotations, &method_definition)); + member = (rbs_node_t *) method_definition; + break; + } + case kATTRREADER: + case kATTRWRITER: + case kATTRACCESSOR: { + CHECK_PARSE(parse_attribute_member(parser, annot_pos, annotations, &member)); + break; + } + default: + rbs_parser_set_error(parser, parser->next_token, true, "method or attribute definition is expected after visibility modifier"); + return false; + } + } else { + CHECK_PARSE(parse_visibility_member(parser, annotations, &member)); + } + break; + + default: + CHECK_PARSE(parse_nested_decl(parser, "module", annot_pos, annotations, &member)); + break; + } + + rbs_node_list_append(*members, member); + } + + return true; +} + +/* + module_decl ::= {module_name} module_type_params module_members + | {module_name} module_name module_type_params `:` module_self_types module_members +*/ +NODISCARD +static bool parse_module_decl0(rbs_parser_t *parser, rbs_range_t keyword_range, rbs_type_name_t *module_name, rbs_range_t name_range, rbs_ast_comment_t *comment, rbs_node_list_t *annotations, rbs_ast_declarations_module_t **module_decl) { + rbs_parser_push_typevar_table(parser, true); + + rbs_range_t decl_range; + decl_range.start = keyword_range.start; + + rbs_range_t type_params_range; + rbs_node_list_t *type_params; + CHECK_PARSE(parse_type_params(parser, &type_params_range, true, &type_params)); + + rbs_node_list_t *self_types = rbs_node_list_new(ALLOCATOR()); + rbs_range_t colon_range; + rbs_range_t self_types_range; + if (parser->next_token.type == pCOLON) { + rbs_parser_advance(parser); + colon_range = parser->current_token.range; + self_types_range.start = parser->next_token.range.start; + CHECK_PARSE(parse_module_self_types(parser, self_types)); + self_types_range.end = parser->current_token.range.end; + } else { + colon_range = NULL_RANGE; + self_types_range = NULL_RANGE; + } + + rbs_node_list_t *members = NULL; + CHECK_PARSE(parse_module_members(parser, &members)); + + ADVANCE_ASSERT(parser, kEND); + rbs_range_t end_range = parser->current_token.range; + decl_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), decl_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 6); + rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_required_child(loc, INTERN("end"), end_range); + rbs_loc_add_optional_child(loc, INTERN("type_params"), type_params_range); + rbs_loc_add_optional_child(loc, INTERN("colon"), colon_range); + rbs_loc_add_optional_child(loc, INTERN("self_types"), self_types_range); + + CHECK_PARSE(parser_pop_typevar_table(parser)); + + *module_decl = rbs_ast_declarations_module_new(ALLOCATOR(), loc, module_name, type_params, self_types, members, annotations, comment); + return true; +} + +/* + module_decl ::= {`module`} module_name `=` old_module_name + | {`module`} module_name module_decl0 + +*/ +NODISCARD +static bool parse_module_decl(rbs_parser_t *parser, rbs_position_t comment_pos, rbs_node_list_t *annotations, rbs_node_t **module_decl) { + rbs_range_t keyword_range = parser->current_token.range; + + comment_pos = rbs_nonnull_pos_or(comment_pos, parser->current_token.range.start); + rbs_ast_comment_t *comment = rbs_parser_get_comment(parser, comment_pos.line); + + rbs_parser_advance(parser); + + rbs_range_t module_name_range; + rbs_type_name_t *module_name; + CHECK_PARSE(parse_type_name(parser, CLASS_NAME, &module_name_range, &module_name)); + + if (parser->next_token.type == pEQ) { + rbs_range_t eq_range = parser->next_token.range; + rbs_parser_advance(parser); + rbs_parser_advance(parser); + + rbs_range_t old_name_range; + rbs_type_name_t *old_name = NULL; + CHECK_PARSE(parse_type_name(parser, CLASS_NAME, &old_name_range, &old_name)); + + rbs_range_t decl_range = { + .start = keyword_range.start, + .end = old_name_range.end + }; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), decl_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 4); + rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); + rbs_loc_add_required_child(loc, INTERN("new_name"), module_name_range); + rbs_loc_add_required_child(loc, INTERN("eq"), eq_range); + rbs_loc_add_optional_child(loc, INTERN("old_name"), old_name_range); + + *module_decl = (rbs_node_t *) rbs_ast_declarations_module_alias_new(ALLOCATOR(), loc, module_name, old_name, comment, annotations); + } else { + rbs_ast_declarations_module_t *module_decl0 = NULL; + CHECK_PARSE(parse_module_decl0(parser, keyword_range, module_name, module_name_range, comment, annotations, &module_decl0)); + *module_decl = (rbs_node_t *) module_decl0; + } + + return true; +} + +/* + class_decl_super ::= {} `<` + | {<>} +*/ +NODISCARD +static bool parse_class_decl_super(rbs_parser_t *parser, rbs_range_t *lt_range, rbs_ast_declarations_class_super_t **super) { + if (parser_advance_if(parser, pLT)) { + *lt_range = parser->current_token.range; + + rbs_range_t super_range; + super_range.start = parser->next_token.range.start; + + rbs_node_list_t *args = rbs_node_list_new(ALLOCATOR()); + rbs_type_name_t *name = NULL; + rbs_range_t name_range, args_range; + CHECK_PARSE(class_instance_name(parser, CLASS_NAME, args, &name_range, &args_range, &name)); + + super_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), super_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 2); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_optional_child(loc, INTERN("args"), args_range); + + *super = rbs_ast_declarations_class_super_new(ALLOCATOR(), loc, name, args); + } else { + *lt_range = NULL_RANGE; + } + + return true; +} + +/* + class_decl ::= {class_name} type_params class_decl_super class_members <`end`> +*/ +NODISCARD +static bool parse_class_decl0(rbs_parser_t *parser, rbs_range_t keyword_range, rbs_type_name_t *name, rbs_range_t name_range, rbs_ast_comment_t *comment, rbs_node_list_t *annotations, rbs_ast_declarations_class_t **class_decl) { + rbs_parser_push_typevar_table(parser, true); + + rbs_range_t decl_range; + decl_range.start = keyword_range.start; + + rbs_range_t type_params_range; + rbs_node_list_t *type_params; + CHECK_PARSE(parse_type_params(parser, &type_params_range, true, &type_params)); + + rbs_range_t lt_range; + rbs_ast_declarations_class_super_t *super = NULL; + CHECK_PARSE(parse_class_decl_super(parser, <_range, &super)); + + rbs_node_list_t *members = NULL; + CHECK_PARSE(parse_module_members(parser, &members)); + + ADVANCE_ASSERT(parser, kEND); + + rbs_range_t end_range = parser->current_token.range; + + decl_range.end = end_range.end; + + CHECK_PARSE(parser_pop_typevar_table(parser)); + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), decl_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 5); + rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); + rbs_loc_add_required_child(loc, INTERN("name"), name_range); + rbs_loc_add_required_child(loc, INTERN("end"), end_range); + rbs_loc_add_optional_child(loc, INTERN("type_params"), type_params_range); + rbs_loc_add_optional_child(loc, INTERN("lt"), lt_range); + + *class_decl = rbs_ast_declarations_class_new(ALLOCATOR(), loc, name, type_params, super, members, annotations, comment); + return true; +} + +/* + class_decl ::= {`class`} class_name `=` + | {`class`} class_name +*/ +NODISCARD +static bool parse_class_decl(rbs_parser_t *parser, rbs_position_t comment_pos, rbs_node_list_t *annotations, rbs_node_t **class_decl) { + rbs_range_t keyword_range = parser->current_token.range; + + comment_pos = rbs_nonnull_pos_or(comment_pos, parser->current_token.range.start); + rbs_ast_comment_t *comment = rbs_parser_get_comment(parser, comment_pos.line); + + rbs_parser_advance(parser); + rbs_range_t class_name_range; + rbs_type_name_t *class_name = NULL; + CHECK_PARSE(parse_type_name(parser, CLASS_NAME, &class_name_range, &class_name)); + + if (parser->next_token.type == pEQ) { + rbs_range_t eq_range = parser->next_token.range; + rbs_parser_advance(parser); + rbs_parser_advance(parser); + + rbs_range_t old_name_range; + rbs_type_name_t *old_name = NULL; + CHECK_PARSE(parse_type_name(parser, CLASS_NAME, &old_name_range, &old_name)); + + rbs_range_t decl_range = { + .start = keyword_range.start, + .end = old_name_range.end, + }; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), decl_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 4); + rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); + rbs_loc_add_required_child(loc, INTERN("new_name"), class_name_range); + rbs_loc_add_required_child(loc, INTERN("eq"), eq_range); + rbs_loc_add_optional_child(loc, INTERN("old_name"), old_name_range); + + *class_decl = (rbs_node_t *) rbs_ast_declarations_class_alias_new(ALLOCATOR(), loc, class_name, old_name, comment, annotations); + } else { + rbs_ast_declarations_class_t *class_decl0 = NULL; + CHECK_PARSE(parse_class_decl0(parser, keyword_range, class_name, class_name_range, comment, annotations, &class_decl0)); + *class_decl = (rbs_node_t *) class_decl0; + } + + return true; +} + +/* + nested_decl ::= {} + | {} + | {} + | {} + | {} +*/ +NODISCARD +static bool parse_nested_decl(rbs_parser_t *parser, const char *nested_in, rbs_position_t annot_pos, rbs_node_list_t *annotations, rbs_node_t **decl) { + rbs_parser_push_typevar_table(parser, true); + + switch (parser->current_token.type) { + case tUIDENT: + case pCOLON2: { + rbs_ast_declarations_constant_t *constant = NULL; + CHECK_PARSE(parse_const_decl(parser, annotations, &constant)); + *decl = (rbs_node_t *) constant; + break; + } + case tGIDENT: { + rbs_ast_declarations_global_t *global = NULL; + CHECK_PARSE(parse_global_decl(parser, annotations, &global)); + *decl = (rbs_node_t *) global; + break; + } + case kTYPE: { + rbs_ast_declarations_type_alias_t *typealias = NULL; + CHECK_PARSE(parse_type_decl(parser, annot_pos, annotations, &typealias)); + *decl = (rbs_node_t *) typealias; + break; + } + case kINTERFACE: { + rbs_ast_declarations_interface_t *interface_decl = NULL; + CHECK_PARSE(parse_interface_decl(parser, annot_pos, annotations, &interface_decl)); + *decl = (rbs_node_t *) interface_decl; + break; + } + case kMODULE: { + rbs_node_t *module_decl = NULL; + CHECK_PARSE(parse_module_decl(parser, annot_pos, annotations, &module_decl)); + *decl = module_decl; + break; + } + case kCLASS: { + rbs_node_t *class_decl = NULL; + CHECK_PARSE(parse_class_decl(parser, annot_pos, annotations, &class_decl)); + *decl = class_decl; + break; + } + default: + rbs_parser_set_error(parser, parser->current_token, true, "unexpected token for class/module declaration member"); + return false; + } + + CHECK_PARSE(parser_pop_typevar_table(parser)); + + return true; +} + +NODISCARD +static bool parse_decl(rbs_parser_t *parser, rbs_node_t **decl) { + rbs_node_list_t *annotations = rbs_node_list_new(ALLOCATOR()); + rbs_position_t annot_pos = NullPosition; + + CHECK_PARSE(parse_annotations(parser, annotations, &annot_pos)); + rbs_parser_advance(parser); + + switch (parser->current_token.type) { + case tUIDENT: + case pCOLON2: { + rbs_ast_declarations_constant_t *constant = NULL; + CHECK_PARSE(parse_const_decl(parser, annotations, &constant)); + *decl = (rbs_node_t *) constant; + return true; + } + case tGIDENT: { + rbs_ast_declarations_global_t *global = NULL; + CHECK_PARSE(parse_global_decl(parser, annotations, &global)); + *decl = (rbs_node_t *) global; + return true; + } + case kTYPE: { + rbs_ast_declarations_type_alias_t *typealias = NULL; + CHECK_PARSE(parse_type_decl(parser, annot_pos, annotations, &typealias)); + *decl = (rbs_node_t *) typealias; + return true; + } + case kINTERFACE: { + rbs_ast_declarations_interface_t *interface_decl = NULL; + CHECK_PARSE(parse_interface_decl(parser, annot_pos, annotations, &interface_decl)); + *decl = (rbs_node_t *) interface_decl; + return true; + } + case kMODULE: { + rbs_node_t *module_decl = NULL; + CHECK_PARSE(parse_module_decl(parser, annot_pos, annotations, &module_decl)); + *decl = module_decl; + return true; + } + case kCLASS: { + rbs_node_t *class_decl = NULL; + CHECK_PARSE(parse_class_decl(parser, annot_pos, annotations, &class_decl)); + *decl = class_decl; + return true; + } + default: + rbs_parser_set_error(parser, parser->current_token, true, "cannot start a declaration"); + return false; + } +} + +/* + namespace ::= {} (`::`)? (`tUIDENT` `::`)* `tUIDENT` <`::`> + | {} <> (empty -- returns empty namespace) +*/ +NODISCARD +static bool parse_namespace(rbs_parser_t *parser, rbs_range_t *rg, rbs_namespace_t **namespace) { + bool is_absolute = false; + + if (parser->next_token.type == pCOLON2) { + *rg = (rbs_range_t) { + .start = parser->next_token.range.start, + .end = parser->next_token.range.end, + }; + is_absolute = true; + + rbs_parser_advance(parser); + } + + rbs_node_list_t *path = rbs_node_list_new(ALLOCATOR()); + + while (true) { + if (parser->next_token.type == tUIDENT && parser->next_token2.type == pCOLON2) { + rbs_location_t *symbolLoc = rbs_location_new(ALLOCATOR(), parser->next_token.range); + rbs_ast_symbol_t *symbol = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->next_token)); + rbs_node_list_append(path, (rbs_node_t *) symbol); + if (rbs_null_position_p(rg->start)) { + rg->start = parser->next_token.range.start; + } + rg->end = parser->next_token2.range.end; + rbs_parser_advance(parser); + rbs_parser_advance(parser); + } else { + break; + } + } + + *namespace = rbs_namespace_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), *rg), path, is_absolute); + return true; +} + +/* + use_clauses ::= {} use_clause `,` ... `,` + + use_clause ::= {} namespace + | {} namespace tUIDENT `as` + | {} namespace +*/ +NODISCARD +static bool parse_use_clauses(rbs_parser_t *parser, rbs_node_list_t *clauses) { + while (true) { + rbs_range_t namespace_range = NULL_RANGE; + rbs_namespace_t *namespace = NULL; + CHECK_PARSE(parse_namespace(parser, &namespace_range, &namespace)); + + switch (parser->next_token.type) { + case tLIDENT: + case tULIDENT: + case tUIDENT: { + rbs_parser_advance(parser); + + enum RBSTokenType ident_type = parser->current_token.type; + + rbs_range_t type_name_range = rbs_null_range_p(namespace_range) ? parser->current_token.range : (rbs_range_t) { .start = namespace_range.start, .end = parser->current_token.range.end }; + + rbs_location_t *symbolLoc = rbs_location_current_token(parser); + rbs_ast_symbol_t *symbol = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->current_token)); + rbs_type_name_t *type_name = rbs_type_name_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), type_name_range), namespace, symbol); + + rbs_range_t keyword_range = NULL_RANGE; + rbs_range_t new_name_range = NULL_RANGE; + rbs_ast_symbol_t *new_name = NULL; + rbs_range_t clause_range = type_name_range; + if (parser->next_token.type == kAS) { + rbs_parser_advance(parser); + keyword_range = parser->current_token.range; + + if (ident_type == tUIDENT) ADVANCE_ASSERT(parser, tUIDENT); + if (ident_type == tLIDENT) ADVANCE_ASSERT(parser, tLIDENT); + if (ident_type == tULIDENT) ADVANCE_ASSERT(parser, tULIDENT); + + rbs_location_t *symbolLoc = rbs_location_new(ALLOCATOR(), new_name_range); + new_name = rbs_ast_symbol_new(ALLOCATOR(), symbolLoc, &parser->constant_pool, INTERN_TOKEN(parser, parser->current_token)); + new_name_range = parser->current_token.range; + clause_range.end = new_name_range.end; + } + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), clause_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 3); + rbs_loc_add_required_child(loc, INTERN("type_name"), type_name_range); + rbs_loc_add_optional_child(loc, INTERN("keyword"), keyword_range); + rbs_loc_add_optional_child(loc, INTERN("new_name"), new_name_range); + + rbs_ast_directives_use_single_clause_t *clause = rbs_ast_directives_use_single_clause_new(ALLOCATOR(), loc, type_name, new_name); + rbs_node_list_append(clauses, (rbs_node_t *) clause); + + break; + } + case pSTAR: { + rbs_range_t clause_range = namespace_range; + rbs_parser_advance(parser); + + rbs_range_t star_range = parser->current_token.range; + clause_range.end = star_range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), clause_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 2); + rbs_loc_add_required_child(loc, INTERN("namespace"), namespace_range); + rbs_loc_add_required_child(loc, INTERN("star"), star_range); + + rbs_ast_directives_use_wildcard_clause_t *clause = rbs_ast_directives_use_wildcard_clause_new(ALLOCATOR(), loc, namespace); + rbs_node_list_append(clauses, (rbs_node_t *) clause); + + break; + } + default: + rbs_parser_set_error(parser, parser->next_token, true, "use clause is expected"); + return false; + } + + if (parser->next_token.type == pCOMMA) { + rbs_parser_advance(parser); + } else { + break; + } + } + + return true; +} + +/* + use_directive ::= {} `use` + */ +NODISCARD +static bool parse_use_directive(rbs_parser_t *parser, rbs_ast_directives_use_t **use_directive) { + if (parser->next_token.type == kUSE) { + rbs_parser_advance(parser); + + rbs_range_t keyword_range = parser->current_token.range; + + rbs_node_list_t *clauses = rbs_node_list_new(ALLOCATOR()); + CHECK_PARSE(parse_use_clauses(parser, clauses)); + + rbs_range_t directive_range = keyword_range; + directive_range.end = parser->current_token.range.end; + + rbs_location_t *loc = rbs_location_new(ALLOCATOR(), directive_range); + rbs_loc_alloc_children(ALLOCATOR(), loc, 1); + rbs_loc_add_required_child(loc, INTERN("keyword"), keyword_range); + + *use_directive = rbs_ast_directives_use_new(ALLOCATOR(), loc, clauses); + } + + return true; +} + +static rbs_ast_comment_t *parse_comment_lines(rbs_parser_t *parser, rbs_comment_t *com) { + size_t hash_bytes = parser->rbs_lexer_t->encoding->char_width((const uint8_t *) "#", (size_t) 1); + size_t space_bytes = parser->rbs_lexer_t->encoding->char_width((const uint8_t *) " ", (size_t) 1); + + rbs_buffer_t rbs_buffer; + rbs_buffer_init(ALLOCATOR(), &rbs_buffer); + + for (size_t i = 0; i < com->line_tokens_count; i++) { + rbs_token_t tok = com->line_tokens[i]; + + const char *comment_start = parser->rbs_lexer_t->string.start + tok.range.start.byte_pos + hash_bytes; + size_t comment_bytes = RBS_RANGE_BYTES(tok.range) - hash_bytes; + + rbs_string_t str = rbs_string_new( + comment_start, + parser->rbs_lexer_t->string.end + ); + unsigned char c = rbs_utf8_string_to_codepoint(str); + + if (c == ' ') { + comment_start += space_bytes; + comment_bytes -= space_bytes; + } + + rbs_buffer_append_string(ALLOCATOR(), &rbs_buffer, comment_start, comment_bytes); + rbs_buffer_append_cstr(ALLOCATOR(), &rbs_buffer, "\n"); + } + + return rbs_ast_comment_new( + ALLOCATOR(), + rbs_location_new(ALLOCATOR(), (rbs_range_t) { .start = com->start, .end = com->end }), + rbs_buffer_to_string(&rbs_buffer) + ); +} + +static rbs_comment_t *comment_get_comment(rbs_comment_t *com, int line) { + if (com == NULL) { + return NULL; + } + + if (com->end.line < line) { + return NULL; + } + + if (com->end.line == line) { + return com; + } + + return comment_get_comment(com->next_comment, line); +} + +static void comment_insert_new_line(rbs_allocator_t *allocator, rbs_comment_t *com, rbs_token_t comment_token) { + if (com->line_tokens_count == com->line_tokens_capacity) { + size_t old_size = com->line_tokens_capacity; + size_t new_size = old_size * 2; + com->line_tokens_capacity = new_size; + + com->line_tokens = rbs_allocator_realloc( + allocator, + com->line_tokens, + sizeof(rbs_token_t) * old_size, + sizeof(rbs_token_t) * new_size, + rbs_token_t + ); + } + + com->line_tokens[com->line_tokens_count++] = comment_token; + com->end = comment_token.range.end; +} + +static rbs_comment_t *alloc_comment(rbs_allocator_t *allocator, rbs_token_t comment_token, rbs_comment_t *last_comment) { + rbs_comment_t *new_comment = rbs_allocator_alloc(allocator, rbs_comment_t); + + size_t initial_line_capacity = 10; + + rbs_token_t *tokens = rbs_allocator_calloc(allocator, initial_line_capacity, rbs_token_t); + tokens[0] = comment_token; + + *new_comment = (rbs_comment_t) { + .start = comment_token.range.start, + .end = comment_token.range.end, + + .line_tokens_capacity = initial_line_capacity, + .line_tokens_count = 1, + .line_tokens = tokens, + + .next_comment = last_comment, + }; + + return new_comment; +} + +/** + * Insert new comment line token. + * */ +static void insert_comment_line(rbs_parser_t *parser, rbs_token_t tok) { + int prev_line = tok.range.start.line - 1; + + rbs_comment_t *com = comment_get_comment(parser->last_comment, prev_line); + + if (com) { + comment_insert_new_line(ALLOCATOR(), com, tok); + } else { + parser->last_comment = alloc_comment(ALLOCATOR(), tok, parser->last_comment); + } +} + +bool rbs_parse_signature(rbs_parser_t *parser, rbs_signature_t **signature) { + rbs_range_t signature_range; + signature_range.start = parser->current_token.range.start; + + rbs_node_list_t *dirs = rbs_node_list_new(ALLOCATOR()); + rbs_node_list_t *decls = rbs_node_list_new(ALLOCATOR()); + + while (parser->next_token.type == kUSE) { + rbs_ast_directives_use_t *use_node; + CHECK_PARSE(parse_use_directive(parser, &use_node)); + + if (use_node == NULL) { + rbs_node_list_append(dirs, NULL); + } else { + rbs_node_list_append(dirs, (rbs_node_t *) use_node); + } + } + + while (parser->next_token.type != pEOF) { + rbs_node_t *decl = NULL; + CHECK_PARSE(parse_decl(parser, &decl)); + rbs_node_list_append(decls, decl); + } + + signature_range.end = parser->current_token.range.end; + *signature = rbs_signature_new(ALLOCATOR(), rbs_location_new(ALLOCATOR(), signature_range), dirs, decls); + return true; +} + +id_table *alloc_empty_table(rbs_allocator_t *allocator) { + id_table *table = rbs_allocator_alloc(allocator, id_table); + + *table = (id_table) { + .size = 10, + .count = 0, + .ids = rbs_allocator_calloc(allocator, 10, rbs_constant_id_t), + .next = NULL, + }; + + return table; +} + +id_table *alloc_reset_table(rbs_allocator_t *allocator) { + id_table *table = rbs_allocator_alloc(allocator, id_table); + + *table = (id_table) { + .size = 0, + .count = 0, + .ids = NULL, + .next = NULL, + }; + + return table; +} + +void rbs_parser_push_typevar_table(rbs_parser_t *parser, bool reset) { + if (reset) { + id_table *table = alloc_reset_table(ALLOCATOR()); + table->next = parser->vars; + parser->vars = table; + } + + id_table *table = alloc_empty_table(ALLOCATOR()); + table->next = parser->vars; + parser->vars = table; +} + +NODISCARD +bool rbs_parser_insert_typevar(rbs_parser_t *parser, rbs_constant_id_t id) { + id_table *table = parser->vars; + + if (RESET_TABLE_P(table)) { + rbs_parser_set_error(parser, parser->current_token, false, "Cannot insert to reset table"); + return false; + } + + if (table->size == table->count) { + // expand + rbs_constant_id_t *ptr = table->ids; + table->size += 10; + table->ids = rbs_allocator_calloc(ALLOCATOR(), table->size, rbs_constant_id_t); + memcpy(table->ids, ptr, sizeof(rbs_constant_id_t) * table->count); + } + + table->ids[table->count++] = id; + + return true; +} + +void rbs_parser_print(rbs_parser_t *parser) { + printf(" current_token = %s (%d...%d)\n", rbs_token_type_str(parser->current_token.type), parser->current_token.range.start.char_pos, parser->current_token.range.end.char_pos); + printf(" next_token = %s (%d...%d)\n", rbs_token_type_str(parser->next_token.type), parser->next_token.range.start.char_pos, parser->next_token.range.end.char_pos); + printf(" next_token2 = %s (%d...%d)\n", rbs_token_type_str(parser->next_token2.type), parser->next_token2.range.start.char_pos, parser->next_token2.range.end.char_pos); + printf(" next_token3 = %s (%d...%d)\n", rbs_token_type_str(parser->next_token3.type), parser->next_token3.range.start.char_pos, parser->next_token3.range.end.char_pos); +} + +void rbs_parser_advance(rbs_parser_t *parser) { + parser->current_token = parser->next_token; + parser->next_token = parser->next_token2; + parser->next_token2 = parser->next_token3; + + while (true) { + if (parser->next_token3.type == pEOF) { + break; + } + + parser->next_token3 = rbs_lexer_next_token(parser->rbs_lexer_t); + + if (parser->next_token3.type == tCOMMENT) { + // skip + } else if (parser->next_token3.type == tLINECOMMENT) { + insert_comment_line(parser, parser->next_token3); + } else if (parser->next_token3.type == tTRIVIA) { + //skip + } else { + break; + } + } +} + +void rbs_print_token(rbs_token_t tok) { + printf( + "%s char=%d...%d\n", + rbs_token_type_str(tok.type), + tok.range.start.char_pos, + tok.range.end.char_pos + ); +} + +void rbs_print_lexer(rbs_lexer_t *lexer) { + printf("Lexer: (range = %d...%d, encoding = %s\n", lexer->start_pos, lexer->end_pos, lexer->encoding->name); + printf(" start = { char_pos = %d, byte_pos = %d }\n", lexer->start.char_pos, lexer->start.byte_pos); + printf(" current = { char_pos = %d, byte_pos = %d }\n", lexer->current.char_pos, lexer->current.byte_pos); + printf(" character = { code_point = %d (%c), bytes = %zu }\n", lexer->current_code_point, lexer->current_code_point < 256 ? lexer->current_code_point : '?', lexer->current_character_bytes); + printf(" first_token_of_line = %s\n", lexer->first_token_of_line ? "true" : "false"); +} + +rbs_ast_comment_t *rbs_parser_get_comment(rbs_parser_t *parser, int subject_line) { + int comment_line = subject_line - 1; + + rbs_comment_t *com = comment_get_comment(parser->last_comment, comment_line); + + if (com) { + return parse_comment_lines(parser, com); + } else { + return NULL; + } +} + +rbs_lexer_t *rbs_lexer_new(rbs_allocator_t *allocator, rbs_string_t string, const rbs_encoding_t *encoding, int start_pos, int end_pos) { + rbs_lexer_t *lexer = rbs_allocator_alloc(allocator, rbs_lexer_t); + + rbs_position_t start_position = (rbs_position_t) { + .byte_pos = 0, + .char_pos = 0, + .line = 1, + .column = 0, + }; + + *lexer = (rbs_lexer_t) { + .string = string, + .start_pos = start_pos, + .end_pos = end_pos, + .current = start_position, + .start = { 0 }, + .first_token_of_line = true, + .current_character_bytes = 0, + .current_code_point = '\0', + .encoding = encoding, + }; + + unsigned int codepoint; + size_t bytes; + + if (rbs_next_char(lexer, &codepoint, &bytes)) { + lexer->current_code_point = codepoint; + lexer->current_character_bytes = bytes; + } else { + lexer->current_code_point = '\0'; + lexer->current_character_bytes = 1; + } + + if (start_pos > 0) { + rbs_skipn(lexer, start_pos); + } + + lexer->start = lexer->current; + + return lexer; +} + +rbs_parser_t *rbs_parser_new(rbs_string_t string, const rbs_encoding_t *encoding, int start_pos, int end_pos) { + rbs_allocator_t *allocator = rbs_allocator_init(); + + rbs_lexer_t *lexer = rbs_lexer_new(allocator, string, encoding, start_pos, end_pos); + rbs_parser_t *parser = rbs_allocator_alloc(allocator, rbs_parser_t); + + *parser = (rbs_parser_t) { + .rbs_lexer_t = lexer, + + .current_token = NullToken, + .next_token = NullToken, + .next_token2 = NullToken, + .next_token3 = NullToken, + + .vars = NULL, + .last_comment = NULL, + + .constant_pool = { 0 }, + .allocator = allocator, + .error = NULL, + }; + + // The parser's constant pool is mainly used for storing the names of type variables, which usually aren't many. + // Below are some statistics gathered from the current test suite. We can see that 56% of parsers never add to their + // constant pool at all. The initial capacity needs to be a power of 2. Picking 2 means that we won't need to realloc + // in 85% of cases. + // + // TODO: recalculate these statistics based on a real world codebase, rather than the test suite. + // + // | Size | Count | Cumulative | % Coverage | + // |------|-------|------------|------------| + // | 0 | 7,862 | 7,862 | 56% | + // | 1 | 3,196 | 11,058 | 79% | + // | 2 | 778 | 12,719 | 85% | + // | 3 | 883 | 11,941 | 91% | + // | 4 | 478 | 13,197 | 95% | + // | 5 | 316 | 13,513 | 97% | + // | 6 | 288 | 13,801 | 99% | + // | 7 | 144 | 13,945 | 100% | + const size_t initial_pool_capacity = 2; + rbs_constant_pool_init(&parser->constant_pool, initial_pool_capacity); + + rbs_parser_advance(parser); + rbs_parser_advance(parser); + rbs_parser_advance(parser); + + return parser; +} + +void rbs_parser_free(rbs_parser_t *parser) { + rbs_constant_pool_free(&parser->constant_pool); + rbs_allocator_free(ALLOCATOR()); +} + +void rbs_parser_set_error(rbs_parser_t *parser, rbs_token_t tok, bool syntax_error, const char *fmt, ...) { + if (parser->error) { + return; + } + + va_list args; + + va_start(args, fmt); + int length = vsnprintf(NULL, 0, fmt, args); + va_end(args); + + char *message = rbs_allocator_alloc_many(ALLOCATOR(), length + 1, char); + + va_start(args, fmt); + vsnprintf(message, length + 1, fmt, args); + va_end(args); + + parser->error = rbs_allocator_alloc(ALLOCATOR(), rbs_error_t); + parser->error->token = tok; + parser->error->message = message; + parser->error->syntax_error = syntax_error; +} diff --git a/src/ruby_objs.c b/src/ruby_objs.c deleted file mode 100644 index be3e717f8..000000000 --- a/src/ruby_objs.c +++ /dev/null @@ -1,799 +0,0 @@ -/*----------------------------------------------------------------------------*/ -/* This file is generated by the templates/template.rb script and should not */ -/* be modified manually. */ -/* To change the template see */ -/* templates/src/ruby_objs.c.erb */ -/*----------------------------------------------------------------------------*/ - -#include "rbs_extension.h" - -#ifdef RB_PASS_KEYWORDS - // Ruby 2.7 or later - #define CLASS_NEW_INSTANCE(klass, argc, argv)\ - rb_class_new_instance_kw(argc, argv, klass, RB_PASS_KEYWORDS) -#else - // Ruby 2.6 - #define CLASS_NEW_INSTANCE(receiver, argc, argv)\ - rb_class_new_instance(argc, argv, receiver) -#endif - -VALUE rbs_ast_annotation(VALUE string, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("string")), string); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_AST_Annotation, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_comment(VALUE string, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("string")), string); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_AST_Comment, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_decl_class(VALUE name, VALUE type_params, VALUE super_class, VALUE members, VALUE annotations, VALUE location, VALUE comment) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type_params")), type_params); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("super_class")), super_class); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("members")), members); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - - return CLASS_NEW_INSTANCE( - RBS_AST_Declarations_Class, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_decl_class_super(VALUE name, VALUE args, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("args")), args); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_AST_Declarations_Class_Super, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_decl_class_alias(VALUE new_name, VALUE old_name, VALUE location, VALUE comment, VALUE annotations) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("new_name")), new_name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("old_name")), old_name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - - return CLASS_NEW_INSTANCE( - RBS_AST_Declarations_ClassAlias, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_decl_constant(VALUE name, VALUE type, VALUE location, VALUE comment, VALUE annotations) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - - return CLASS_NEW_INSTANCE( - RBS_AST_Declarations_Constant, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_decl_global(VALUE name, VALUE type, VALUE location, VALUE comment, VALUE annotations) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - - return CLASS_NEW_INSTANCE( - RBS_AST_Declarations_Global, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_decl_interface(VALUE name, VALUE type_params, VALUE members, VALUE annotations, VALUE location, VALUE comment) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type_params")), type_params); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("members")), members); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - - return CLASS_NEW_INSTANCE( - RBS_AST_Declarations_Interface, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_decl_module(VALUE name, VALUE type_params, VALUE self_types, VALUE members, VALUE annotations, VALUE location, VALUE comment) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type_params")), type_params); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("self_types")), self_types); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("members")), members); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - - return CLASS_NEW_INSTANCE( - RBS_AST_Declarations_Module, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_decl_module_self(VALUE name, VALUE args, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("args")), args); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_AST_Declarations_Module_Self, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_decl_module_alias(VALUE new_name, VALUE old_name, VALUE location, VALUE comment, VALUE annotations) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("new_name")), new_name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("old_name")), old_name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - - return CLASS_NEW_INSTANCE( - RBS_AST_Declarations_ModuleAlias, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_decl_type_alias(VALUE name, VALUE type_params, VALUE type, VALUE annotations, VALUE location, VALUE comment) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type_params")), type_params); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - - return CLASS_NEW_INSTANCE( - RBS_AST_Declarations_TypeAlias, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_directives_use(VALUE clauses, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("clauses")), clauses); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_AST_Directives_Use, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_directives_use_single_clause(VALUE type_name, VALUE new_name, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type_name")), type_name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("new_name")), new_name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_AST_Directives_Use_SingleClause, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_directives_use_wildcard_clause(VALUE namespace, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("namespace")), namespace); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_AST_Directives_Use_WildcardClause, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_alias(VALUE new_name, VALUE old_name, VALUE kind, VALUE annotations, VALUE location, VALUE comment) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("new_name")), new_name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("old_name")), old_name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("kind")), kind); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_Alias, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_attr_accessor(VALUE name, VALUE type, VALUE ivar_name, VALUE kind, VALUE annotations, VALUE location, VALUE comment, VALUE visibility) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("ivar_name")), ivar_name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("kind")), kind); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("visibility")), visibility); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_AttrAccessor, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_attr_reader(VALUE name, VALUE type, VALUE ivar_name, VALUE kind, VALUE annotations, VALUE location, VALUE comment, VALUE visibility) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("ivar_name")), ivar_name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("kind")), kind); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("visibility")), visibility); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_AttrReader, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_attr_writer(VALUE name, VALUE type, VALUE ivar_name, VALUE kind, VALUE annotations, VALUE location, VALUE comment, VALUE visibility) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("ivar_name")), ivar_name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("kind")), kind); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("visibility")), visibility); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_AttrWriter, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_class_instance_variable(VALUE name, VALUE type, VALUE location, VALUE comment) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_ClassInstanceVariable, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_class_variable(VALUE name, VALUE type, VALUE location, VALUE comment) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_ClassVariable, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_extend(VALUE name, VALUE args, VALUE annotations, VALUE location, VALUE comment) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("args")), args); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_Extend, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_include(VALUE name, VALUE args, VALUE annotations, VALUE location, VALUE comment) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("args")), args); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_Include, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_instance_variable(VALUE name, VALUE type, VALUE location, VALUE comment) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_InstanceVariable, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_method_definition(VALUE name, VALUE kind, VALUE overloads, VALUE annotations, VALUE location, VALUE comment, VALUE overloading, VALUE visibility) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("kind")), kind); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("overloads")), overloads); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("overloading")), overloading); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("visibility")), visibility); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_MethodDefinition, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_method_definition_overload(VALUE annotations, VALUE method_type) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("method_type")), method_type); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_MethodDefinition_Overload, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_prepend(VALUE name, VALUE args, VALUE annotations, VALUE location, VALUE comment) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("args")), args); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("annotations")), annotations); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("comment")), comment); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_Prepend, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_private(VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_Private, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_members_public(VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_AST_Members_Public, - 1, - &_init_kwargs - ); -} - -VALUE rbs_ast_type_param(VALUE name, VALUE variance, VALUE upper_bound, VALUE default_type, VALUE unchecked, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("variance")), variance); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("upper_bound")), upper_bound); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("default_type")), default_type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("unchecked")), unchecked); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_AST_TypeParam, - 1, - &_init_kwargs - ); -} - -VALUE rbs_method_type(VALUE type_params, VALUE type, VALUE block, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type_params")), type_params); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("block")), block); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_MethodType, - 1, - &_init_kwargs - ); -} - -VALUE rbs_namespace(VALUE path, VALUE absolute) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("path")), path); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("absolute")), absolute); - - return CLASS_NEW_INSTANCE( - RBS_Namespace, - 1, - &_init_kwargs - ); -} - -VALUE rbs_type_name(VALUE namespace, VALUE name) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("namespace")), namespace); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - - return CLASS_NEW_INSTANCE( - RBS_TypeName, - 1, - &_init_kwargs - ); -} - -VALUE rbs_alias(VALUE name, VALUE args, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("args")), args); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Alias, - 1, - &_init_kwargs - ); -} - -VALUE rbs_bases_any(VALUE todo, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("todo")), todo); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Bases_Any, - 1, - &_init_kwargs - ); -} - -VALUE rbs_bases_bool(VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Bases_Bool, - 1, - &_init_kwargs - ); -} - -VALUE rbs_bases_bottom(VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Bases_Bottom, - 1, - &_init_kwargs - ); -} - -VALUE rbs_bases_class(VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Bases_Class, - 1, - &_init_kwargs - ); -} - -VALUE rbs_bases_instance(VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Bases_Instance, - 1, - &_init_kwargs - ); -} - -VALUE rbs_bases_nil(VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Bases_Nil, - 1, - &_init_kwargs - ); -} - -VALUE rbs_bases_self(VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Bases_Self, - 1, - &_init_kwargs - ); -} - -VALUE rbs_bases_top(VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Bases_Top, - 1, - &_init_kwargs - ); -} - -VALUE rbs_bases_void(VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Bases_Void, - 1, - &_init_kwargs - ); -} - -VALUE rbs_block(VALUE type, VALUE required, VALUE self_type) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("required")), required); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("self_type")), self_type); - - return CLASS_NEW_INSTANCE( - RBS_Types_Block, - 1, - &_init_kwargs - ); -} - -VALUE rbs_class_instance(VALUE name, VALUE args, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("args")), args); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_ClassInstance, - 1, - &_init_kwargs - ); -} - -VALUE rbs_class_singleton(VALUE name, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_ClassSingleton, - 1, - &_init_kwargs - ); -} - -VALUE rbs_function(VALUE required_positionals, VALUE optional_positionals, VALUE rest_positionals, VALUE trailing_positionals, VALUE required_keywords, VALUE optional_keywords, VALUE rest_keywords, VALUE return_type) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("required_positionals")), required_positionals); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("optional_positionals")), optional_positionals); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("rest_positionals")), rest_positionals); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("trailing_positionals")), trailing_positionals); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("required_keywords")), required_keywords); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("optional_keywords")), optional_keywords); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("rest_keywords")), rest_keywords); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("return_type")), return_type); - - return CLASS_NEW_INSTANCE( - RBS_Types_Function, - 1, - &_init_kwargs - ); -} - -VALUE rbs_function_param(VALUE type, VALUE name, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Function_Param, - 1, - &_init_kwargs - ); -} - -VALUE rbs_interface(VALUE name, VALUE args, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("args")), args); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Interface, - 1, - &_init_kwargs - ); -} - -VALUE rbs_intersection(VALUE types, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("types")), types); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Intersection, - 1, - &_init_kwargs - ); -} - -VALUE rbs_literal(VALUE literal, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("literal")), literal); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Literal, - 1, - &_init_kwargs - ); -} - -VALUE rbs_optional(VALUE type, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Optional, - 1, - &_init_kwargs - ); -} - -VALUE rbs_proc(VALUE type, VALUE block, VALUE location, VALUE self_type) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("type")), type); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("block")), block); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("self_type")), self_type); - - return CLASS_NEW_INSTANCE( - RBS_Types_Proc, - 1, - &_init_kwargs - ); -} - -VALUE rbs_record(VALUE all_fields, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("all_fields")), all_fields); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Record, - 1, - &_init_kwargs - ); -} - -VALUE rbs_tuple(VALUE types, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("types")), types); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Tuple, - 1, - &_init_kwargs - ); -} - -VALUE rbs_union(VALUE types, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("types")), types); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Union, - 1, - &_init_kwargs - ); -} - -VALUE rbs_untyped_function(VALUE return_type) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("return_type")), return_type); - - return CLASS_NEW_INSTANCE( - RBS_Types_UntypedFunction, - 1, - &_init_kwargs - ); -} - -VALUE rbs_variable(VALUE name, VALUE location) { - VALUE _init_kwargs = rb_hash_new(); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("name")), name); - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("location")), location); - - return CLASS_NEW_INSTANCE( - RBS_Types_Variable, - 1, - &_init_kwargs - ); -} - diff --git a/src/string.c b/src/string.c new file mode 100644 index 000000000..cc7de5e98 --- /dev/null +++ b/src/string.c @@ -0,0 +1,90 @@ +#include "rbs/string.h" +#include "rbs/defines.h" + +#include +#include +#include +#include + +unsigned int rbs_utf8_string_to_codepoint(const rbs_string_t string) { + unsigned int codepoint = 0; + int remaining_bytes = 0; + + const char *s = string.start; + const char *end = string.end; + + if (s >= end) return 0; // End of string + + if (RBS_LIKELY((*s & 0x80) == 0)) { + // Single byte character (0xxxxxxx) + return *s; + } else if ((*s & 0xE0) == 0xC0) { + // Two byte character (110xxxxx 10xxxxxx) + codepoint = *s & 0x1F; + remaining_bytes = 1; + } else if ((*s & 0xF0) == 0xE0) { + // Three byte character (1110xxxx 10xxxxxx 10xxxxxx) + codepoint = *s & 0x0F; + remaining_bytes = 2; + } else if ((*s & 0xF8) == 0xF0) { + // Four byte character (11110xxx 10xxxxxx 10xxxxxx 10xxxxxx) + codepoint = *s & 0x07; + remaining_bytes = 3; + } else { + // Invalid UTF-8 sequence + return 0xFFFD; // Unicode replacement character + } + + s++; + while (remaining_bytes > 0 && s < end) { + if ((*s & 0xC0) != 0x80) { + // Invalid continuation byte + return 0xFFFD; + } + codepoint = (codepoint << 6) | (*s & 0x3F); + s++; + remaining_bytes--; + } + + if (remaining_bytes > 0) { + // Incomplete sequence + return 0xFFFD; + } + + return codepoint; +} + +rbs_string_t rbs_string_new(const char *start, const char *end) { + return (rbs_string_t) { + .start = start, + .end = end, + }; +} + +rbs_string_t rbs_string_strip_whitespace(rbs_string_t *self) { + const char *new_start = self->start; + while (isspace(*new_start) && new_start < self->end) { + new_start++; + } + + if (new_start == self->end) { // Handle empty string case + return rbs_string_new(new_start, new_start); + } + + const char *new_end = self->end - 1; + while (isspace(*new_end) && new_start < new_end) { + new_end--; + } + + return rbs_string_new(new_start, new_end + 1); +} + +size_t rbs_string_len(const rbs_string_t self) { + return self.end - self.start; +} + +bool rbs_string_equal(const rbs_string_t lhs, const rbs_string_t rhs) { + if (lhs.start == rhs.start && lhs.end == rhs.end) return true; + if (rbs_string_len(lhs) != rbs_string_len(rhs)) return false; + return strncmp(lhs.start, rhs.start, rbs_string_len(lhs)) == 0; +} diff --git a/src/util/rbs_allocator.c b/src/util/rbs_allocator.c new file mode 100644 index 000000000..a9ec8b7f0 --- /dev/null +++ b/src/util/rbs_allocator.c @@ -0,0 +1,152 @@ +/** + * @file rbs_allocator.c + * + * A simple arena allocator that can be freed all at once. + * +* This allocator maintains a linked list of pages, which come in two flavours: + * 1. Small allocation pages, which are the same size as the system page size. + * 2. Large allocation pages, which are the exact size requested, for sizes greater than the small page size. + * + * Small allocations always fit into the unused space at the end of the "head" page. If there isn't enough room, a new + * page is allocated, and the small allocation is placed at its start. This approach wastes that unused slack at the + * end of the previous page, but it means that allocations are instant and never scan the linked list to find a gap. + * + * This allocator doesn't support freeing individual allocations. Only the whole arena can be freed at once at the end. + */ + +#include "rbs/util/rbs_allocator.h" +#include "rbs/util/rbs_assert.h" + +#include +#include // for memset() +#include +#include + +#ifdef _WIN32 +#include +#else +#include +#include +#include +#include +#endif + +typedef struct rbs_allocator_page { + // The previously allocated page, or NULL if this is the first page. + struct rbs_allocator_page *next; + + // The size of the payload in bytes. + size_t size; + + // The offset of the next available byte. + size_t used; +} rbs_allocator_page_t; + +static size_t get_system_page_size(void) { +#ifdef _WIN32 + SYSTEM_INFO si; + GetSystemInfo(&si); + return si.dwPageSize; +#else + long sz = sysconf(_SC_PAGESIZE); + if (sz == -1) return 4096; // Fallback to the common 4KB page size + return (size_t) sz; +#endif +} + +static rbs_allocator_page_t *rbs_allocator_page_new(size_t payload_size) { + const size_t page_header_size = sizeof(rbs_allocator_page_t); + + rbs_allocator_page_t *page = malloc(page_header_size + payload_size); + page->size = payload_size; + page->used = 0; + + return page; +} + +rbs_allocator_t *rbs_allocator_init() { + rbs_allocator_t *allocator = malloc(sizeof(rbs_allocator_t)); + + const size_t system_page_size = get_system_page_size(); + + allocator->default_page_payload_size = system_page_size - sizeof(rbs_allocator_page_t); + + allocator->page = rbs_allocator_page_new(allocator->default_page_payload_size); + allocator->page->next = NULL; + + return allocator; +} + +void rbs_allocator_free(rbs_allocator_t *allocator) { + rbs_allocator_page_t *page = allocator->page; + while (page) { + rbs_allocator_page_t *next = page->next; + free(page); + page = next; + } + free(allocator); +} + +// Allocates `new_size` bytes from `allocator`, aligned to an `alignment`-byte boundary. +// Copies `old_size` bytes from `ptr` to the new allocation. +// It always reallocates the memory in new space and thus wastes the old space. +void *rbs_allocator_realloc_impl(rbs_allocator_t *allocator, void *ptr, size_t old_size, size_t new_size, size_t alignment) { + void *p = rbs_allocator_malloc_impl(allocator, new_size, alignment); + memcpy(p, ptr, old_size); + return p; +} + +// Allocates `size` bytes from `allocator`, aligned to an `alignment`-byte boundary. +void *rbs_allocator_malloc_impl(rbs_allocator_t *allocator, size_t size, size_t alignment) { + rbs_assert(size % alignment == 0, "size must be a multiple of the alignment. size: %zu, alignment: %zu", size, alignment); + + if (allocator->default_page_payload_size < size) { // Big allocation, give it its own page. + rbs_allocator_page_t *new_page = rbs_allocator_page_new(size); + + // This simple allocator can only put small allocations into the head page. + // Naively prepending this large allocation page to the head of the allocator before the previous head page + // would waste the remaining space in the head page. + // So instead, we'll splice in the large page *after* the head page. + // + // +-------+ +-----------+ +-----------+ + // | arena | | head page | | new_page | + // |-------| |-----------+ |-----------+ + // | *page |--->| size | +--->| size | +---> ... previous tail + // +-------+ | offset | | | offset | | + // | *next ----+---+ | *next ----+---+ + // | ... | | ... | + // +-----------+ +-----------+ + // + new_page->next = allocator->page->next; + allocator->page->next = new_page; + + uintptr_t pointer = (uintptr_t) new_page + sizeof(rbs_allocator_page_t); + return (void *) pointer; + } + + rbs_allocator_page_t *page = allocator->page; + if (page->used + size > page->size) { + // Not enough space. Allocate a new small page and prepend it to the allocator's linked list. + rbs_allocator_page_t *new_page = rbs_allocator_page_new(allocator->default_page_payload_size); + new_page->next = allocator->page; + allocator->page = new_page; + page = new_page; + } + + uintptr_t pointer = (uintptr_t) page + sizeof(rbs_allocator_page_t) + page->used; + page->used += size; + return (void *) pointer; +} + +// Note: This will eagerly fill with zeroes, unlike `calloc()` which can map a page in a page to be zeroed lazily. +// It's assumed that callers to this function will immediately write to the allocated memory, anyway. +void *rbs_allocator_calloc_impl(rbs_allocator_t *allocator, size_t count, size_t size, size_t alignment) { + void *p = rbs_allocator_malloc_many_impl(allocator, count, size, alignment); + memset(p, 0, count * size); + return p; +} + +// Similar to `rbs_allocator_malloc_impl()`, but allocates `count` instances of `size` bytes, aligned to an `alignment`-byte boundary. +void *rbs_allocator_malloc_many_impl(rbs_allocator_t *allocator, size_t count, size_t size, size_t alignment) { + return rbs_allocator_malloc_impl(allocator, count * size, alignment); +} diff --git a/src/util/rbs_assert.c b/src/util/rbs_assert.c new file mode 100644 index 000000000..63f17b09a --- /dev/null +++ b/src/util/rbs_assert.c @@ -0,0 +1,19 @@ +#include "rbs/util/rbs_assert.h" + +#include +#include +#include +#include + +void rbs_assert(bool condition, const char *fmt, ...) { + if (condition) { + return; + } + + va_list args; + va_start(args, fmt); + vfprintf(stderr, fmt, args); + va_end(args); + fprintf(stderr, "\n"); + exit(EXIT_FAILURE); +} diff --git a/src/util/rbs_buffer.c b/src/util/rbs_buffer.c new file mode 100644 index 000000000..71421b975 --- /dev/null +++ b/src/util/rbs_buffer.c @@ -0,0 +1,54 @@ +#include "rbs/util/rbs_buffer.h" +#include "rbs/util/rbs_assert.h" + +bool rbs_buffer_init(rbs_allocator_t *allocator, rbs_buffer_t *buffer) { + size_t capacity = RBS_BUFFER_DEFAULT_CAPACITY; + + buffer->length = 0; + buffer->capacity = capacity; + + buffer->value = rbs_allocator_calloc(allocator, capacity, char); + return buffer->value != NULL; +} + +char *rbs_buffer_value(const rbs_buffer_t *buffer) { + return buffer->value; +} + +size_t rbs_buffer_length(const rbs_buffer_t *buffer) { + return buffer->length; +} + +void rbs_buffer_append_string(rbs_allocator_t *allocator, rbs_buffer_t *buffer, const char *source, size_t length) { + size_t next_length = buffer->length + length; + + if (next_length > buffer->capacity) { + size_t old_capacity = buffer->capacity; + + rbs_assert(old_capacity != 0, "Precondition: capacity must be at least 1. Got %zu", old_capacity); + + size_t new_capacity = buffer->capacity * 2; + + while (next_length > new_capacity) { + new_capacity *= 2; + } + + char *new_value = rbs_allocator_realloc(allocator, buffer->value, old_capacity, new_capacity, char); + rbs_assert(new_value != NULL, "Failed to append to buffer. Old capacity: %zu, new capacity: %zu", old_capacity, new_capacity); + + buffer->value = new_value; + buffer->capacity = new_capacity; + } + + size_t cursor = buffer->length; + buffer->length = next_length; + memcpy(buffer->value + cursor, source, length); +} + +void rbs_buffer_append_cstr(rbs_allocator_t *allocator, rbs_buffer_t *buffer, const char *value) { + rbs_buffer_append_string(allocator, buffer, value, strlen(value)); +} + +rbs_string_t rbs_buffer_to_string(rbs_buffer_t *buffer) { + return rbs_string_new(buffer->value, buffer->value + buffer->length); +} diff --git a/src/util/rbs_constant_pool.c b/src/util/rbs_constant_pool.c index 96f9246f3..27ef9cd7e 100644 --- a/src/util/rbs_constant_pool.c +++ b/src/util/rbs_constant_pool.c @@ -1,75 +1,5 @@ #include "rbs/util/rbs_constant_pool.h" - -/** - * Initialize a list of constant ids. - */ -void -rbs_constant_id_list_init(rbs_constant_id_list_t *list) { - list->ids = NULL; - list->size = 0; - list->capacity = 0; -} - -/** - * Initialize a list of constant ids with a given capacity. - */ -void -rbs_constant_id_list_init_capacity(rbs_constant_id_list_t *list, size_t capacity) { - list->ids = calloc(capacity, sizeof(rbs_constant_id_t)); - if (list->ids == NULL) abort(); - - list->size = 0; - list->capacity = capacity; -} - -/** - * Append a constant id to a list of constant ids. Returns false if any - * potential reallocations fail. - */ -bool -rbs_constant_id_list_append(rbs_constant_id_list_t *list, rbs_constant_id_t id) { - if (list->size >= list->capacity) { - list->capacity = list->capacity == 0 ? 8 : list->capacity * 2; - list->ids = (rbs_constant_id_t *) realloc(list->ids, sizeof(rbs_constant_id_t) * list->capacity); - if (list->ids == NULL) return false; - } - - list->ids[list->size++] = id; - return true; -} - -/** - * Insert a constant id into a list of constant ids at the specified index. - */ -void -rbs_constant_id_list_insert(rbs_constant_id_list_t *list, size_t index, rbs_constant_id_t id) { - assert(index < list->capacity); - assert(list->ids[index] == RBS_CONSTANT_ID_UNSET); - - list->ids[index] = id; - list->size++; -} - -/** - * Checks if the current constant id list includes the given constant id. - */ -bool -rbs_constant_id_list_includes(rbs_constant_id_list_t *list, rbs_constant_id_t id) { - for (size_t index = 0; index < list->size; index++) { - if (list->ids[index] == id) return true; - } - return false; -} - -/** - * Free the memory associated with a list of constant ids. - */ -void -rbs_constant_id_list_free(rbs_constant_id_list_t *list) { - if (list->ids != NULL) { - free(list->ids); - } -} +#include "rbs/util/rbs_assert.h" /** * A relatively simple hash function (djb2) that is used to hash strings. We are @@ -107,19 +37,16 @@ next_power_of_two(uint32_t v) { return v; } -#ifndef NDEBUG -static bool -is_power_of_two(uint32_t size) { +static bool is_power_of_two(uint32_t size) { return (size & (size - 1)) == 0; } -#endif /** * Resize a constant pool to a given capacity. */ static inline bool rbs_constant_pool_resize(rbs_constant_pool_t *pool) { - assert(is_power_of_two(pool->capacity)); + rbs_assert(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity); uint32_t next_capacity = pool->capacity * 2; if (next_capacity < pool->capacity) return false; @@ -131,7 +58,7 @@ rbs_constant_pool_resize(rbs_constant_pool_t *pool) { if (next == NULL) return false; rbs_constant_pool_bucket_t *next_buckets = next; - rbs_constant_t *next_constants = (void *)(((char *) next) + next_capacity * sizeof(rbs_constant_pool_bucket_t)); + rbs_constant_t *next_constants = (void *) (((char *) next) + next_capacity * sizeof(rbs_constant_pool_bucket_t)); // For each bucket in the current constant pool, find the index in the // next constant pool, and insert it. @@ -175,8 +102,7 @@ rbs_constant_pool_t *RBS_GLOBAL_CONSTANT_POOL = &RBS_GLOBAL_CONSTANT_POOL_STORAG /** * Initialize a new constant pool with a given capacity. */ -bool -rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) { +bool rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) { const uint32_t maximum = (~((uint32_t) 0)); if (capacity >= ((maximum / 2) + 1)) return false; @@ -186,7 +112,7 @@ rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) { if (memory == NULL) return false; pool->buckets = memory; - pool->constants = (void *)(((char *)memory) + capacity * sizeof(rbs_constant_pool_bucket_t)); + pool->constants = (void *) (((char *) memory) + capacity * sizeof(rbs_constant_pool_bucket_t)); pool->size = 0; pool->capacity = capacity; return true; @@ -197,7 +123,7 @@ rbs_constant_pool_init(rbs_constant_pool_t *pool, uint32_t capacity) { */ rbs_constant_t * rbs_constant_pool_id_to_constant(const rbs_constant_pool_t *pool, rbs_constant_id_t constant_id) { - assert(constant_id != RBS_CONSTANT_ID_UNSET && constant_id <= pool->size); + rbs_assert(constant_id != RBS_CONSTANT_ID_UNSET && constant_id <= pool->size, "constant_id is not valid. Got %i, pool->size: %i", constant_id, pool->size); return &pool->constants[constant_id - 1]; } @@ -207,7 +133,7 @@ rbs_constant_pool_id_to_constant(const rbs_constant_pool_t *pool, rbs_constant_i */ rbs_constant_id_t rbs_constant_pool_find(const rbs_constant_pool_t *pool, const uint8_t *start, size_t length) { - assert(is_power_of_two(pool->capacity)); + rbs_assert(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity); const uint32_t mask = pool->capacity - 1; uint32_t hash = rbs_constant_pool_hash(start, length); @@ -235,7 +161,7 @@ rbs_constant_pool_insert(rbs_constant_pool_t *pool, const uint8_t *start, size_t if (!rbs_constant_pool_resize(pool)) return RBS_CONSTANT_ID_UNSET; } - assert(is_power_of_two(pool->capacity)); + rbs_assert(is_power_of_two(pool->capacity), "pool->capacity is not a power of two. Got %i", pool->capacity); const uint32_t mask = pool->capacity - 1; uint32_t hash = rbs_constant_pool_hash(start, length); @@ -276,7 +202,7 @@ rbs_constant_pool_insert(rbs_constant_pool_t *pool, const uint8_t *start, size_t // IDs are allocated starting at 1, since the value 0 denotes a non-existent // constant. uint32_t id = ++pool->size; - assert(pool->size < ((uint32_t) (1 << 30))); + rbs_assert(pool->size < ((uint32_t) (1 << 30)), "pool->size is too large. Got %i", pool->size); *bucket = (rbs_constant_pool_bucket_t) { .id = (unsigned int) (id & 0x3fffffff), @@ -301,6 +227,11 @@ rbs_constant_pool_insert_shared(rbs_constant_pool_t *pool, const uint8_t *start, return rbs_constant_pool_insert(pool, start, length, RBS_CONSTANT_POOL_BUCKET_DEFAULT); } +rbs_constant_id_t +rbs_constant_pool_insert_shared_with_encoding(rbs_constant_pool_t *pool, const uint8_t *start, size_t length, const rbs_encoding_t *encoding) { + return rbs_constant_pool_insert_shared(pool, start, length); +} + /** * Insert a constant into a constant pool from memory that is now owned by the * constant pool. Returns the id of the constant, or RBS_CONSTANT_ID_UNSET if any @@ -324,8 +255,7 @@ rbs_constant_pool_insert_constant(rbs_constant_pool_t *pool, const uint8_t *star /** * Free the memory associated with a constant pool. */ -void -rbs_constant_pool_free(rbs_constant_pool_t *pool) { +void rbs_constant_pool_free(rbs_constant_pool_t *pool) { // For each constant in the current constant pool, free the contents if the // contents are owned. for (uint32_t index = 0; index < pool->capacity; index++) { diff --git a/src/util/rbs_encoding.c b/src/util/rbs_encoding.c new file mode 100644 index 000000000..b8c5e58a5 --- /dev/null +++ b/src/util/rbs_encoding.c @@ -0,0 +1,21314 @@ +#include "rbs/util/rbs_encoding.h" +#include "rbs/util/rbs_assert.h" + +#include + +#if defined(__GNUC__) +#define RBS_ATTRIBUTE_UNUSED __attribute__((unused)) +#else +#define RBS_ATTRIBUTE_UNUSED +#endif + +typedef uint32_t rbs_unicode_codepoint_t; + +#define UNICODE_ALPHA_CODEPOINTS_LENGTH 1450 +static const rbs_unicode_codepoint_t unicode_alpha_codepoints[UNICODE_ALPHA_CODEPOINTS_LENGTH] = { + 0x100, + 0x2C1, + 0x2C6, + 0x2D1, + 0x2E0, + 0x2E4, + 0x2EC, + 0x2EC, + 0x2EE, + 0x2EE, + 0x345, + 0x345, + 0x370, + 0x374, + 0x376, + 0x377, + 0x37A, + 0x37D, + 0x37F, + 0x37F, + 0x386, + 0x386, + 0x388, + 0x38A, + 0x38C, + 0x38C, + 0x38E, + 0x3A1, + 0x3A3, + 0x3F5, + 0x3F7, + 0x481, + 0x48A, + 0x52F, + 0x531, + 0x556, + 0x559, + 0x559, + 0x560, + 0x588, + 0x5B0, + 0x5BD, + 0x5BF, + 0x5BF, + 0x5C1, + 0x5C2, + 0x5C4, + 0x5C5, + 0x5C7, + 0x5C7, + 0x5D0, + 0x5EA, + 0x5EF, + 0x5F2, + 0x610, + 0x61A, + 0x620, + 0x657, + 0x659, + 0x65F, + 0x66E, + 0x6D3, + 0x6D5, + 0x6DC, + 0x6E1, + 0x6E8, + 0x6ED, + 0x6EF, + 0x6FA, + 0x6FC, + 0x6FF, + 0x6FF, + 0x710, + 0x73F, + 0x74D, + 0x7B1, + 0x7CA, + 0x7EA, + 0x7F4, + 0x7F5, + 0x7FA, + 0x7FA, + 0x800, + 0x817, + 0x81A, + 0x82C, + 0x840, + 0x858, + 0x860, + 0x86A, + 0x870, + 0x887, + 0x889, + 0x88E, + 0x8A0, + 0x8C9, + 0x8D4, + 0x8DF, + 0x8E3, + 0x8E9, + 0x8F0, + 0x93B, + 0x93D, + 0x94C, + 0x94E, + 0x950, + 0x955, + 0x963, + 0x971, + 0x983, + 0x985, + 0x98C, + 0x98F, + 0x990, + 0x993, + 0x9A8, + 0x9AA, + 0x9B0, + 0x9B2, + 0x9B2, + 0x9B6, + 0x9B9, + 0x9BD, + 0x9C4, + 0x9C7, + 0x9C8, + 0x9CB, + 0x9CC, + 0x9CE, + 0x9CE, + 0x9D7, + 0x9D7, + 0x9DC, + 0x9DD, + 0x9DF, + 0x9E3, + 0x9F0, + 0x9F1, + 0x9FC, + 0x9FC, + 0xA01, + 0xA03, + 0xA05, + 0xA0A, + 0xA0F, + 0xA10, + 0xA13, + 0xA28, + 0xA2A, + 0xA30, + 0xA32, + 0xA33, + 0xA35, + 0xA36, + 0xA38, + 0xA39, + 0xA3E, + 0xA42, + 0xA47, + 0xA48, + 0xA4B, + 0xA4C, + 0xA51, + 0xA51, + 0xA59, + 0xA5C, + 0xA5E, + 0xA5E, + 0xA70, + 0xA75, + 0xA81, + 0xA83, + 0xA85, + 0xA8D, + 0xA8F, + 0xA91, + 0xA93, + 0xAA8, + 0xAAA, + 0xAB0, + 0xAB2, + 0xAB3, + 0xAB5, + 0xAB9, + 0xABD, + 0xAC5, + 0xAC7, + 0xAC9, + 0xACB, + 0xACC, + 0xAD0, + 0xAD0, + 0xAE0, + 0xAE3, + 0xAF9, + 0xAFC, + 0xB01, + 0xB03, + 0xB05, + 0xB0C, + 0xB0F, + 0xB10, + 0xB13, + 0xB28, + 0xB2A, + 0xB30, + 0xB32, + 0xB33, + 0xB35, + 0xB39, + 0xB3D, + 0xB44, + 0xB47, + 0xB48, + 0xB4B, + 0xB4C, + 0xB56, + 0xB57, + 0xB5C, + 0xB5D, + 0xB5F, + 0xB63, + 0xB71, + 0xB71, + 0xB82, + 0xB83, + 0xB85, + 0xB8A, + 0xB8E, + 0xB90, + 0xB92, + 0xB95, + 0xB99, + 0xB9A, + 0xB9C, + 0xB9C, + 0xB9E, + 0xB9F, + 0xBA3, + 0xBA4, + 0xBA8, + 0xBAA, + 0xBAE, + 0xBB9, + 0xBBE, + 0xBC2, + 0xBC6, + 0xBC8, + 0xBCA, + 0xBCC, + 0xBD0, + 0xBD0, + 0xBD7, + 0xBD7, + 0xC00, + 0xC0C, + 0xC0E, + 0xC10, + 0xC12, + 0xC28, + 0xC2A, + 0xC39, + 0xC3D, + 0xC44, + 0xC46, + 0xC48, + 0xC4A, + 0xC4C, + 0xC55, + 0xC56, + 0xC58, + 0xC5A, + 0xC5D, + 0xC5D, + 0xC60, + 0xC63, + 0xC80, + 0xC83, + 0xC85, + 0xC8C, + 0xC8E, + 0xC90, + 0xC92, + 0xCA8, + 0xCAA, + 0xCB3, + 0xCB5, + 0xCB9, + 0xCBD, + 0xCC4, + 0xCC6, + 0xCC8, + 0xCCA, + 0xCCC, + 0xCD5, + 0xCD6, + 0xCDD, + 0xCDE, + 0xCE0, + 0xCE3, + 0xCF1, + 0xCF3, + 0xD00, + 0xD0C, + 0xD0E, + 0xD10, + 0xD12, + 0xD3A, + 0xD3D, + 0xD44, + 0xD46, + 0xD48, + 0xD4A, + 0xD4C, + 0xD4E, + 0xD4E, + 0xD54, + 0xD57, + 0xD5F, + 0xD63, + 0xD7A, + 0xD7F, + 0xD81, + 0xD83, + 0xD85, + 0xD96, + 0xD9A, + 0xDB1, + 0xDB3, + 0xDBB, + 0xDBD, + 0xDBD, + 0xDC0, + 0xDC6, + 0xDCF, + 0xDD4, + 0xDD6, + 0xDD6, + 0xDD8, + 0xDDF, + 0xDF2, + 0xDF3, + 0xE01, + 0xE3A, + 0xE40, + 0xE46, + 0xE4D, + 0xE4D, + 0xE81, + 0xE82, + 0xE84, + 0xE84, + 0xE86, + 0xE8A, + 0xE8C, + 0xEA3, + 0xEA5, + 0xEA5, + 0xEA7, + 0xEB9, + 0xEBB, + 0xEBD, + 0xEC0, + 0xEC4, + 0xEC6, + 0xEC6, + 0xECD, + 0xECD, + 0xEDC, + 0xEDF, + 0xF00, + 0xF00, + 0xF40, + 0xF47, + 0xF49, + 0xF6C, + 0xF71, + 0xF83, + 0xF88, + 0xF97, + 0xF99, + 0xFBC, + 0x1000, + 0x1036, + 0x1038, + 0x1038, + 0x103B, + 0x103F, + 0x1050, + 0x108F, + 0x109A, + 0x109D, + 0x10A0, + 0x10C5, + 0x10C7, + 0x10C7, + 0x10CD, + 0x10CD, + 0x10D0, + 0x10FA, + 0x10FC, + 0x1248, + 0x124A, + 0x124D, + 0x1250, + 0x1256, + 0x1258, + 0x1258, + 0x125A, + 0x125D, + 0x1260, + 0x1288, + 0x128A, + 0x128D, + 0x1290, + 0x12B0, + 0x12B2, + 0x12B5, + 0x12B8, + 0x12BE, + 0x12C0, + 0x12C0, + 0x12C2, + 0x12C5, + 0x12C8, + 0x12D6, + 0x12D8, + 0x1310, + 0x1312, + 0x1315, + 0x1318, + 0x135A, + 0x1380, + 0x138F, + 0x13A0, + 0x13F5, + 0x13F8, + 0x13FD, + 0x1401, + 0x166C, + 0x166F, + 0x167F, + 0x1681, + 0x169A, + 0x16A0, + 0x16EA, + 0x16EE, + 0x16F8, + 0x1700, + 0x1713, + 0x171F, + 0x1733, + 0x1740, + 0x1753, + 0x1760, + 0x176C, + 0x176E, + 0x1770, + 0x1772, + 0x1773, + 0x1780, + 0x17B3, + 0x17B6, + 0x17C8, + 0x17D7, + 0x17D7, + 0x17DC, + 0x17DC, + 0x1820, + 0x1878, + 0x1880, + 0x18AA, + 0x18B0, + 0x18F5, + 0x1900, + 0x191E, + 0x1920, + 0x192B, + 0x1930, + 0x1938, + 0x1950, + 0x196D, + 0x1970, + 0x1974, + 0x1980, + 0x19AB, + 0x19B0, + 0x19C9, + 0x1A00, + 0x1A1B, + 0x1A20, + 0x1A5E, + 0x1A61, + 0x1A74, + 0x1AA7, + 0x1AA7, + 0x1ABF, + 0x1AC0, + 0x1ACC, + 0x1ACE, + 0x1B00, + 0x1B33, + 0x1B35, + 0x1B43, + 0x1B45, + 0x1B4C, + 0x1B80, + 0x1BA9, + 0x1BAC, + 0x1BAF, + 0x1BBA, + 0x1BE5, + 0x1BE7, + 0x1BF1, + 0x1C00, + 0x1C36, + 0x1C4D, + 0x1C4F, + 0x1C5A, + 0x1C7D, + 0x1C80, + 0x1C88, + 0x1C90, + 0x1CBA, + 0x1CBD, + 0x1CBF, + 0x1CE9, + 0x1CEC, + 0x1CEE, + 0x1CF3, + 0x1CF5, + 0x1CF6, + 0x1CFA, + 0x1CFA, + 0x1D00, + 0x1DBF, + 0x1DE7, + 0x1DF4, + 0x1E00, + 0x1F15, + 0x1F18, + 0x1F1D, + 0x1F20, + 0x1F45, + 0x1F48, + 0x1F4D, + 0x1F50, + 0x1F57, + 0x1F59, + 0x1F59, + 0x1F5B, + 0x1F5B, + 0x1F5D, + 0x1F5D, + 0x1F5F, + 0x1F7D, + 0x1F80, + 0x1FB4, + 0x1FB6, + 0x1FBC, + 0x1FBE, + 0x1FBE, + 0x1FC2, + 0x1FC4, + 0x1FC6, + 0x1FCC, + 0x1FD0, + 0x1FD3, + 0x1FD6, + 0x1FDB, + 0x1FE0, + 0x1FEC, + 0x1FF2, + 0x1FF4, + 0x1FF6, + 0x1FFC, + 0x2071, + 0x2071, + 0x207F, + 0x207F, + 0x2090, + 0x209C, + 0x2102, + 0x2102, + 0x2107, + 0x2107, + 0x210A, + 0x2113, + 0x2115, + 0x2115, + 0x2119, + 0x211D, + 0x2124, + 0x2124, + 0x2126, + 0x2126, + 0x2128, + 0x2128, + 0x212A, + 0x212D, + 0x212F, + 0x2139, + 0x213C, + 0x213F, + 0x2145, + 0x2149, + 0x214E, + 0x214E, + 0x2160, + 0x2188, + 0x24B6, + 0x24E9, + 0x2C00, + 0x2CE4, + 0x2CEB, + 0x2CEE, + 0x2CF2, + 0x2CF3, + 0x2D00, + 0x2D25, + 0x2D27, + 0x2D27, + 0x2D2D, + 0x2D2D, + 0x2D30, + 0x2D67, + 0x2D6F, + 0x2D6F, + 0x2D80, + 0x2D96, + 0x2DA0, + 0x2DA6, + 0x2DA8, + 0x2DAE, + 0x2DB0, + 0x2DB6, + 0x2DB8, + 0x2DBE, + 0x2DC0, + 0x2DC6, + 0x2DC8, + 0x2DCE, + 0x2DD0, + 0x2DD6, + 0x2DD8, + 0x2DDE, + 0x2DE0, + 0x2DFF, + 0x2E2F, + 0x2E2F, + 0x3005, + 0x3007, + 0x3021, + 0x3029, + 0x3031, + 0x3035, + 0x3038, + 0x303C, + 0x3041, + 0x3096, + 0x309D, + 0x309F, + 0x30A1, + 0x30FA, + 0x30FC, + 0x30FF, + 0x3105, + 0x312F, + 0x3131, + 0x318E, + 0x31A0, + 0x31BF, + 0x31F0, + 0x31FF, + 0x3400, + 0x4DBF, + 0x4E00, + 0xA48C, + 0xA4D0, + 0xA4FD, + 0xA500, + 0xA60C, + 0xA610, + 0xA61F, + 0xA62A, + 0xA62B, + 0xA640, + 0xA66E, + 0xA674, + 0xA67B, + 0xA67F, + 0xA6EF, + 0xA717, + 0xA71F, + 0xA722, + 0xA788, + 0xA78B, + 0xA7CA, + 0xA7D0, + 0xA7D1, + 0xA7D3, + 0xA7D3, + 0xA7D5, + 0xA7D9, + 0xA7F2, + 0xA805, + 0xA807, + 0xA827, + 0xA840, + 0xA873, + 0xA880, + 0xA8C3, + 0xA8C5, + 0xA8C5, + 0xA8F2, + 0xA8F7, + 0xA8FB, + 0xA8FB, + 0xA8FD, + 0xA8FF, + 0xA90A, + 0xA92A, + 0xA930, + 0xA952, + 0xA960, + 0xA97C, + 0xA980, + 0xA9B2, + 0xA9B4, + 0xA9BF, + 0xA9CF, + 0xA9CF, + 0xA9E0, + 0xA9EF, + 0xA9FA, + 0xA9FE, + 0xAA00, + 0xAA36, + 0xAA40, + 0xAA4D, + 0xAA60, + 0xAA76, + 0xAA7A, + 0xAABE, + 0xAAC0, + 0xAAC0, + 0xAAC2, + 0xAAC2, + 0xAADB, + 0xAADD, + 0xAAE0, + 0xAAEF, + 0xAAF2, + 0xAAF5, + 0xAB01, + 0xAB06, + 0xAB09, + 0xAB0E, + 0xAB11, + 0xAB16, + 0xAB20, + 0xAB26, + 0xAB28, + 0xAB2E, + 0xAB30, + 0xAB5A, + 0xAB5C, + 0xAB69, + 0xAB70, + 0xABEA, + 0xAC00, + 0xD7A3, + 0xD7B0, + 0xD7C6, + 0xD7CB, + 0xD7FB, + 0xF900, + 0xFA6D, + 0xFA70, + 0xFAD9, + 0xFB00, + 0xFB06, + 0xFB13, + 0xFB17, + 0xFB1D, + 0xFB28, + 0xFB2A, + 0xFB36, + 0xFB38, + 0xFB3C, + 0xFB3E, + 0xFB3E, + 0xFB40, + 0xFB41, + 0xFB43, + 0xFB44, + 0xFB46, + 0xFBB1, + 0xFBD3, + 0xFD3D, + 0xFD50, + 0xFD8F, + 0xFD92, + 0xFDC7, + 0xFDF0, + 0xFDFB, + 0xFE70, + 0xFE74, + 0xFE76, + 0xFEFC, + 0xFF21, + 0xFF3A, + 0xFF41, + 0xFF5A, + 0xFF66, + 0xFFBE, + 0xFFC2, + 0xFFC7, + 0xFFCA, + 0xFFCF, + 0xFFD2, + 0xFFD7, + 0xFFDA, + 0xFFDC, + 0x10000, + 0x1000B, + 0x1000D, + 0x10026, + 0x10028, + 0x1003A, + 0x1003C, + 0x1003D, + 0x1003F, + 0x1004D, + 0x10050, + 0x1005D, + 0x10080, + 0x100FA, + 0x10140, + 0x10174, + 0x10280, + 0x1029C, + 0x102A0, + 0x102D0, + 0x10300, + 0x1031F, + 0x1032D, + 0x1034A, + 0x10350, + 0x1037A, + 0x10380, + 0x1039D, + 0x103A0, + 0x103C3, + 0x103C8, + 0x103CF, + 0x103D1, + 0x103D5, + 0x10400, + 0x1049D, + 0x104B0, + 0x104D3, + 0x104D8, + 0x104FB, + 0x10500, + 0x10527, + 0x10530, + 0x10563, + 0x10570, + 0x1057A, + 0x1057C, + 0x1058A, + 0x1058C, + 0x10592, + 0x10594, + 0x10595, + 0x10597, + 0x105A1, + 0x105A3, + 0x105B1, + 0x105B3, + 0x105B9, + 0x105BB, + 0x105BC, + 0x10600, + 0x10736, + 0x10740, + 0x10755, + 0x10760, + 0x10767, + 0x10780, + 0x10785, + 0x10787, + 0x107B0, + 0x107B2, + 0x107BA, + 0x10800, + 0x10805, + 0x10808, + 0x10808, + 0x1080A, + 0x10835, + 0x10837, + 0x10838, + 0x1083C, + 0x1083C, + 0x1083F, + 0x10855, + 0x10860, + 0x10876, + 0x10880, + 0x1089E, + 0x108E0, + 0x108F2, + 0x108F4, + 0x108F5, + 0x10900, + 0x10915, + 0x10920, + 0x10939, + 0x10980, + 0x109B7, + 0x109BE, + 0x109BF, + 0x10A00, + 0x10A03, + 0x10A05, + 0x10A06, + 0x10A0C, + 0x10A13, + 0x10A15, + 0x10A17, + 0x10A19, + 0x10A35, + 0x10A60, + 0x10A7C, + 0x10A80, + 0x10A9C, + 0x10AC0, + 0x10AC7, + 0x10AC9, + 0x10AE4, + 0x10B00, + 0x10B35, + 0x10B40, + 0x10B55, + 0x10B60, + 0x10B72, + 0x10B80, + 0x10B91, + 0x10C00, + 0x10C48, + 0x10C80, + 0x10CB2, + 0x10CC0, + 0x10CF2, + 0x10D00, + 0x10D27, + 0x10E80, + 0x10EA9, + 0x10EAB, + 0x10EAC, + 0x10EB0, + 0x10EB1, + 0x10F00, + 0x10F1C, + 0x10F27, + 0x10F27, + 0x10F30, + 0x10F45, + 0x10F70, + 0x10F81, + 0x10FB0, + 0x10FC4, + 0x10FE0, + 0x10FF6, + 0x11000, + 0x11045, + 0x11071, + 0x11075, + 0x11080, + 0x110B8, + 0x110C2, + 0x110C2, + 0x110D0, + 0x110E8, + 0x11100, + 0x11132, + 0x11144, + 0x11147, + 0x11150, + 0x11172, + 0x11176, + 0x11176, + 0x11180, + 0x111BF, + 0x111C1, + 0x111C4, + 0x111CE, + 0x111CF, + 0x111DA, + 0x111DA, + 0x111DC, + 0x111DC, + 0x11200, + 0x11211, + 0x11213, + 0x11234, + 0x11237, + 0x11237, + 0x1123E, + 0x11241, + 0x11280, + 0x11286, + 0x11288, + 0x11288, + 0x1128A, + 0x1128D, + 0x1128F, + 0x1129D, + 0x1129F, + 0x112A8, + 0x112B0, + 0x112E8, + 0x11300, + 0x11303, + 0x11305, + 0x1130C, + 0x1130F, + 0x11310, + 0x11313, + 0x11328, + 0x1132A, + 0x11330, + 0x11332, + 0x11333, + 0x11335, + 0x11339, + 0x1133D, + 0x11344, + 0x11347, + 0x11348, + 0x1134B, + 0x1134C, + 0x11350, + 0x11350, + 0x11357, + 0x11357, + 0x1135D, + 0x11363, + 0x11400, + 0x11441, + 0x11443, + 0x11445, + 0x11447, + 0x1144A, + 0x1145F, + 0x11461, + 0x11480, + 0x114C1, + 0x114C4, + 0x114C5, + 0x114C7, + 0x114C7, + 0x11580, + 0x115B5, + 0x115B8, + 0x115BE, + 0x115D8, + 0x115DD, + 0x11600, + 0x1163E, + 0x11640, + 0x11640, + 0x11644, + 0x11644, + 0x11680, + 0x116B5, + 0x116B8, + 0x116B8, + 0x11700, + 0x1171A, + 0x1171D, + 0x1172A, + 0x11740, + 0x11746, + 0x11800, + 0x11838, + 0x118A0, + 0x118DF, + 0x118FF, + 0x11906, + 0x11909, + 0x11909, + 0x1190C, + 0x11913, + 0x11915, + 0x11916, + 0x11918, + 0x11935, + 0x11937, + 0x11938, + 0x1193B, + 0x1193C, + 0x1193F, + 0x11942, + 0x119A0, + 0x119A7, + 0x119AA, + 0x119D7, + 0x119DA, + 0x119DF, + 0x119E1, + 0x119E1, + 0x119E3, + 0x119E4, + 0x11A00, + 0x11A32, + 0x11A35, + 0x11A3E, + 0x11A50, + 0x11A97, + 0x11A9D, + 0x11A9D, + 0x11AB0, + 0x11AF8, + 0x11C00, + 0x11C08, + 0x11C0A, + 0x11C36, + 0x11C38, + 0x11C3E, + 0x11C40, + 0x11C40, + 0x11C72, + 0x11C8F, + 0x11C92, + 0x11CA7, + 0x11CA9, + 0x11CB6, + 0x11D00, + 0x11D06, + 0x11D08, + 0x11D09, + 0x11D0B, + 0x11D36, + 0x11D3A, + 0x11D3A, + 0x11D3C, + 0x11D3D, + 0x11D3F, + 0x11D41, + 0x11D43, + 0x11D43, + 0x11D46, + 0x11D47, + 0x11D60, + 0x11D65, + 0x11D67, + 0x11D68, + 0x11D6A, + 0x11D8E, + 0x11D90, + 0x11D91, + 0x11D93, + 0x11D96, + 0x11D98, + 0x11D98, + 0x11EE0, + 0x11EF6, + 0x11F00, + 0x11F10, + 0x11F12, + 0x11F3A, + 0x11F3E, + 0x11F40, + 0x11FB0, + 0x11FB0, + 0x12000, + 0x12399, + 0x12400, + 0x1246E, + 0x12480, + 0x12543, + 0x12F90, + 0x12FF0, + 0x13000, + 0x1342F, + 0x13441, + 0x13446, + 0x14400, + 0x14646, + 0x16800, + 0x16A38, + 0x16A40, + 0x16A5E, + 0x16A70, + 0x16ABE, + 0x16AD0, + 0x16AED, + 0x16B00, + 0x16B2F, + 0x16B40, + 0x16B43, + 0x16B63, + 0x16B77, + 0x16B7D, + 0x16B8F, + 0x16E40, + 0x16E7F, + 0x16F00, + 0x16F4A, + 0x16F4F, + 0x16F87, + 0x16F8F, + 0x16F9F, + 0x16FE0, + 0x16FE1, + 0x16FE3, + 0x16FE3, + 0x16FF0, + 0x16FF1, + 0x17000, + 0x187F7, + 0x18800, + 0x18CD5, + 0x18D00, + 0x18D08, + 0x1AFF0, + 0x1AFF3, + 0x1AFF5, + 0x1AFFB, + 0x1AFFD, + 0x1AFFE, + 0x1B000, + 0x1B122, + 0x1B132, + 0x1B132, + 0x1B150, + 0x1B152, + 0x1B155, + 0x1B155, + 0x1B164, + 0x1B167, + 0x1B170, + 0x1B2FB, + 0x1BC00, + 0x1BC6A, + 0x1BC70, + 0x1BC7C, + 0x1BC80, + 0x1BC88, + 0x1BC90, + 0x1BC99, + 0x1BC9E, + 0x1BC9E, + 0x1D400, + 0x1D454, + 0x1D456, + 0x1D49C, + 0x1D49E, + 0x1D49F, + 0x1D4A2, + 0x1D4A2, + 0x1D4A5, + 0x1D4A6, + 0x1D4A9, + 0x1D4AC, + 0x1D4AE, + 0x1D4B9, + 0x1D4BB, + 0x1D4BB, + 0x1D4BD, + 0x1D4C3, + 0x1D4C5, + 0x1D505, + 0x1D507, + 0x1D50A, + 0x1D50D, + 0x1D514, + 0x1D516, + 0x1D51C, + 0x1D51E, + 0x1D539, + 0x1D53B, + 0x1D53E, + 0x1D540, + 0x1D544, + 0x1D546, + 0x1D546, + 0x1D54A, + 0x1D550, + 0x1D552, + 0x1D6A5, + 0x1D6A8, + 0x1D6C0, + 0x1D6C2, + 0x1D6DA, + 0x1D6DC, + 0x1D6FA, + 0x1D6FC, + 0x1D714, + 0x1D716, + 0x1D734, + 0x1D736, + 0x1D74E, + 0x1D750, + 0x1D76E, + 0x1D770, + 0x1D788, + 0x1D78A, + 0x1D7A8, + 0x1D7AA, + 0x1D7C2, + 0x1D7C4, + 0x1D7CB, + 0x1DF00, + 0x1DF1E, + 0x1DF25, + 0x1DF2A, + 0x1E000, + 0x1E006, + 0x1E008, + 0x1E018, + 0x1E01B, + 0x1E021, + 0x1E023, + 0x1E024, + 0x1E026, + 0x1E02A, + 0x1E030, + 0x1E06D, + 0x1E08F, + 0x1E08F, + 0x1E100, + 0x1E12C, + 0x1E137, + 0x1E13D, + 0x1E14E, + 0x1E14E, + 0x1E290, + 0x1E2AD, + 0x1E2C0, + 0x1E2EB, + 0x1E4D0, + 0x1E4EB, + 0x1E7E0, + 0x1E7E6, + 0x1E7E8, + 0x1E7EB, + 0x1E7ED, + 0x1E7EE, + 0x1E7F0, + 0x1E7FE, + 0x1E800, + 0x1E8C4, + 0x1E900, + 0x1E943, + 0x1E947, + 0x1E947, + 0x1E94B, + 0x1E94B, + 0x1EE00, + 0x1EE03, + 0x1EE05, + 0x1EE1F, + 0x1EE21, + 0x1EE22, + 0x1EE24, + 0x1EE24, + 0x1EE27, + 0x1EE27, + 0x1EE29, + 0x1EE32, + 0x1EE34, + 0x1EE37, + 0x1EE39, + 0x1EE39, + 0x1EE3B, + 0x1EE3B, + 0x1EE42, + 0x1EE42, + 0x1EE47, + 0x1EE47, + 0x1EE49, + 0x1EE49, + 0x1EE4B, + 0x1EE4B, + 0x1EE4D, + 0x1EE4F, + 0x1EE51, + 0x1EE52, + 0x1EE54, + 0x1EE54, + 0x1EE57, + 0x1EE57, + 0x1EE59, + 0x1EE59, + 0x1EE5B, + 0x1EE5B, + 0x1EE5D, + 0x1EE5D, + 0x1EE5F, + 0x1EE5F, + 0x1EE61, + 0x1EE62, + 0x1EE64, + 0x1EE64, + 0x1EE67, + 0x1EE6A, + 0x1EE6C, + 0x1EE72, + 0x1EE74, + 0x1EE77, + 0x1EE79, + 0x1EE7C, + 0x1EE7E, + 0x1EE7E, + 0x1EE80, + 0x1EE89, + 0x1EE8B, + 0x1EE9B, + 0x1EEA1, + 0x1EEA3, + 0x1EEA5, + 0x1EEA9, + 0x1EEAB, + 0x1EEBB, + 0x1F130, + 0x1F149, + 0x1F150, + 0x1F169, + 0x1F170, + 0x1F189, + 0x20000, + 0x2A6DF, + 0x2A700, + 0x2B739, + 0x2B740, + 0x2B81D, + 0x2B820, + 0x2CEA1, + 0x2CEB0, + 0x2EBE0, + 0x2F800, + 0x2FA1D, + 0x30000, + 0x3134A, + 0x31350, + 0x323AF, +}; + +#define UNICODE_ALNUM_CODEPOINTS_LENGTH 1528 +static const rbs_unicode_codepoint_t unicode_alnum_codepoints[UNICODE_ALNUM_CODEPOINTS_LENGTH] = { + 0x100, + 0x2C1, + 0x2C6, + 0x2D1, + 0x2E0, + 0x2E4, + 0x2EC, + 0x2EC, + 0x2EE, + 0x2EE, + 0x345, + 0x345, + 0x370, + 0x374, + 0x376, + 0x377, + 0x37A, + 0x37D, + 0x37F, + 0x37F, + 0x386, + 0x386, + 0x388, + 0x38A, + 0x38C, + 0x38C, + 0x38E, + 0x3A1, + 0x3A3, + 0x3F5, + 0x3F7, + 0x481, + 0x48A, + 0x52F, + 0x531, + 0x556, + 0x559, + 0x559, + 0x560, + 0x588, + 0x5B0, + 0x5BD, + 0x5BF, + 0x5BF, + 0x5C1, + 0x5C2, + 0x5C4, + 0x5C5, + 0x5C7, + 0x5C7, + 0x5D0, + 0x5EA, + 0x5EF, + 0x5F2, + 0x610, + 0x61A, + 0x620, + 0x657, + 0x659, + 0x669, + 0x66E, + 0x6D3, + 0x6D5, + 0x6DC, + 0x6E1, + 0x6E8, + 0x6ED, + 0x6FC, + 0x6FF, + 0x6FF, + 0x710, + 0x73F, + 0x74D, + 0x7B1, + 0x7C0, + 0x7EA, + 0x7F4, + 0x7F5, + 0x7FA, + 0x7FA, + 0x800, + 0x817, + 0x81A, + 0x82C, + 0x840, + 0x858, + 0x860, + 0x86A, + 0x870, + 0x887, + 0x889, + 0x88E, + 0x8A0, + 0x8C9, + 0x8D4, + 0x8DF, + 0x8E3, + 0x8E9, + 0x8F0, + 0x93B, + 0x93D, + 0x94C, + 0x94E, + 0x950, + 0x955, + 0x963, + 0x966, + 0x96F, + 0x971, + 0x983, + 0x985, + 0x98C, + 0x98F, + 0x990, + 0x993, + 0x9A8, + 0x9AA, + 0x9B0, + 0x9B2, + 0x9B2, + 0x9B6, + 0x9B9, + 0x9BD, + 0x9C4, + 0x9C7, + 0x9C8, + 0x9CB, + 0x9CC, + 0x9CE, + 0x9CE, + 0x9D7, + 0x9D7, + 0x9DC, + 0x9DD, + 0x9DF, + 0x9E3, + 0x9E6, + 0x9F1, + 0x9FC, + 0x9FC, + 0xA01, + 0xA03, + 0xA05, + 0xA0A, + 0xA0F, + 0xA10, + 0xA13, + 0xA28, + 0xA2A, + 0xA30, + 0xA32, + 0xA33, + 0xA35, + 0xA36, + 0xA38, + 0xA39, + 0xA3E, + 0xA42, + 0xA47, + 0xA48, + 0xA4B, + 0xA4C, + 0xA51, + 0xA51, + 0xA59, + 0xA5C, + 0xA5E, + 0xA5E, + 0xA66, + 0xA75, + 0xA81, + 0xA83, + 0xA85, + 0xA8D, + 0xA8F, + 0xA91, + 0xA93, + 0xAA8, + 0xAAA, + 0xAB0, + 0xAB2, + 0xAB3, + 0xAB5, + 0xAB9, + 0xABD, + 0xAC5, + 0xAC7, + 0xAC9, + 0xACB, + 0xACC, + 0xAD0, + 0xAD0, + 0xAE0, + 0xAE3, + 0xAE6, + 0xAEF, + 0xAF9, + 0xAFC, + 0xB01, + 0xB03, + 0xB05, + 0xB0C, + 0xB0F, + 0xB10, + 0xB13, + 0xB28, + 0xB2A, + 0xB30, + 0xB32, + 0xB33, + 0xB35, + 0xB39, + 0xB3D, + 0xB44, + 0xB47, + 0xB48, + 0xB4B, + 0xB4C, + 0xB56, + 0xB57, + 0xB5C, + 0xB5D, + 0xB5F, + 0xB63, + 0xB66, + 0xB6F, + 0xB71, + 0xB71, + 0xB82, + 0xB83, + 0xB85, + 0xB8A, + 0xB8E, + 0xB90, + 0xB92, + 0xB95, + 0xB99, + 0xB9A, + 0xB9C, + 0xB9C, + 0xB9E, + 0xB9F, + 0xBA3, + 0xBA4, + 0xBA8, + 0xBAA, + 0xBAE, + 0xBB9, + 0xBBE, + 0xBC2, + 0xBC6, + 0xBC8, + 0xBCA, + 0xBCC, + 0xBD0, + 0xBD0, + 0xBD7, + 0xBD7, + 0xBE6, + 0xBEF, + 0xC00, + 0xC0C, + 0xC0E, + 0xC10, + 0xC12, + 0xC28, + 0xC2A, + 0xC39, + 0xC3D, + 0xC44, + 0xC46, + 0xC48, + 0xC4A, + 0xC4C, + 0xC55, + 0xC56, + 0xC58, + 0xC5A, + 0xC5D, + 0xC5D, + 0xC60, + 0xC63, + 0xC66, + 0xC6F, + 0xC80, + 0xC83, + 0xC85, + 0xC8C, + 0xC8E, + 0xC90, + 0xC92, + 0xCA8, + 0xCAA, + 0xCB3, + 0xCB5, + 0xCB9, + 0xCBD, + 0xCC4, + 0xCC6, + 0xCC8, + 0xCCA, + 0xCCC, + 0xCD5, + 0xCD6, + 0xCDD, + 0xCDE, + 0xCE0, + 0xCE3, + 0xCE6, + 0xCEF, + 0xCF1, + 0xCF3, + 0xD00, + 0xD0C, + 0xD0E, + 0xD10, + 0xD12, + 0xD3A, + 0xD3D, + 0xD44, + 0xD46, + 0xD48, + 0xD4A, + 0xD4C, + 0xD4E, + 0xD4E, + 0xD54, + 0xD57, + 0xD5F, + 0xD63, + 0xD66, + 0xD6F, + 0xD7A, + 0xD7F, + 0xD81, + 0xD83, + 0xD85, + 0xD96, + 0xD9A, + 0xDB1, + 0xDB3, + 0xDBB, + 0xDBD, + 0xDBD, + 0xDC0, + 0xDC6, + 0xDCF, + 0xDD4, + 0xDD6, + 0xDD6, + 0xDD8, + 0xDDF, + 0xDE6, + 0xDEF, + 0xDF2, + 0xDF3, + 0xE01, + 0xE3A, + 0xE40, + 0xE46, + 0xE4D, + 0xE4D, + 0xE50, + 0xE59, + 0xE81, + 0xE82, + 0xE84, + 0xE84, + 0xE86, + 0xE8A, + 0xE8C, + 0xEA3, + 0xEA5, + 0xEA5, + 0xEA7, + 0xEB9, + 0xEBB, + 0xEBD, + 0xEC0, + 0xEC4, + 0xEC6, + 0xEC6, + 0xECD, + 0xECD, + 0xED0, + 0xED9, + 0xEDC, + 0xEDF, + 0xF00, + 0xF00, + 0xF20, + 0xF29, + 0xF40, + 0xF47, + 0xF49, + 0xF6C, + 0xF71, + 0xF83, + 0xF88, + 0xF97, + 0xF99, + 0xFBC, + 0x1000, + 0x1036, + 0x1038, + 0x1038, + 0x103B, + 0x1049, + 0x1050, + 0x109D, + 0x10A0, + 0x10C5, + 0x10C7, + 0x10C7, + 0x10CD, + 0x10CD, + 0x10D0, + 0x10FA, + 0x10FC, + 0x1248, + 0x124A, + 0x124D, + 0x1250, + 0x1256, + 0x1258, + 0x1258, + 0x125A, + 0x125D, + 0x1260, + 0x1288, + 0x128A, + 0x128D, + 0x1290, + 0x12B0, + 0x12B2, + 0x12B5, + 0x12B8, + 0x12BE, + 0x12C0, + 0x12C0, + 0x12C2, + 0x12C5, + 0x12C8, + 0x12D6, + 0x12D8, + 0x1310, + 0x1312, + 0x1315, + 0x1318, + 0x135A, + 0x1380, + 0x138F, + 0x13A0, + 0x13F5, + 0x13F8, + 0x13FD, + 0x1401, + 0x166C, + 0x166F, + 0x167F, + 0x1681, + 0x169A, + 0x16A0, + 0x16EA, + 0x16EE, + 0x16F8, + 0x1700, + 0x1713, + 0x171F, + 0x1733, + 0x1740, + 0x1753, + 0x1760, + 0x176C, + 0x176E, + 0x1770, + 0x1772, + 0x1773, + 0x1780, + 0x17B3, + 0x17B6, + 0x17C8, + 0x17D7, + 0x17D7, + 0x17DC, + 0x17DC, + 0x17E0, + 0x17E9, + 0x1810, + 0x1819, + 0x1820, + 0x1878, + 0x1880, + 0x18AA, + 0x18B0, + 0x18F5, + 0x1900, + 0x191E, + 0x1920, + 0x192B, + 0x1930, + 0x1938, + 0x1946, + 0x196D, + 0x1970, + 0x1974, + 0x1980, + 0x19AB, + 0x19B0, + 0x19C9, + 0x19D0, + 0x19D9, + 0x1A00, + 0x1A1B, + 0x1A20, + 0x1A5E, + 0x1A61, + 0x1A74, + 0x1A80, + 0x1A89, + 0x1A90, + 0x1A99, + 0x1AA7, + 0x1AA7, + 0x1ABF, + 0x1AC0, + 0x1ACC, + 0x1ACE, + 0x1B00, + 0x1B33, + 0x1B35, + 0x1B43, + 0x1B45, + 0x1B4C, + 0x1B50, + 0x1B59, + 0x1B80, + 0x1BA9, + 0x1BAC, + 0x1BE5, + 0x1BE7, + 0x1BF1, + 0x1C00, + 0x1C36, + 0x1C40, + 0x1C49, + 0x1C4D, + 0x1C7D, + 0x1C80, + 0x1C88, + 0x1C90, + 0x1CBA, + 0x1CBD, + 0x1CBF, + 0x1CE9, + 0x1CEC, + 0x1CEE, + 0x1CF3, + 0x1CF5, + 0x1CF6, + 0x1CFA, + 0x1CFA, + 0x1D00, + 0x1DBF, + 0x1DE7, + 0x1DF4, + 0x1E00, + 0x1F15, + 0x1F18, + 0x1F1D, + 0x1F20, + 0x1F45, + 0x1F48, + 0x1F4D, + 0x1F50, + 0x1F57, + 0x1F59, + 0x1F59, + 0x1F5B, + 0x1F5B, + 0x1F5D, + 0x1F5D, + 0x1F5F, + 0x1F7D, + 0x1F80, + 0x1FB4, + 0x1FB6, + 0x1FBC, + 0x1FBE, + 0x1FBE, + 0x1FC2, + 0x1FC4, + 0x1FC6, + 0x1FCC, + 0x1FD0, + 0x1FD3, + 0x1FD6, + 0x1FDB, + 0x1FE0, + 0x1FEC, + 0x1FF2, + 0x1FF4, + 0x1FF6, + 0x1FFC, + 0x2071, + 0x2071, + 0x207F, + 0x207F, + 0x2090, + 0x209C, + 0x2102, + 0x2102, + 0x2107, + 0x2107, + 0x210A, + 0x2113, + 0x2115, + 0x2115, + 0x2119, + 0x211D, + 0x2124, + 0x2124, + 0x2126, + 0x2126, + 0x2128, + 0x2128, + 0x212A, + 0x212D, + 0x212F, + 0x2139, + 0x213C, + 0x213F, + 0x2145, + 0x2149, + 0x214E, + 0x214E, + 0x2160, + 0x2188, + 0x24B6, + 0x24E9, + 0x2C00, + 0x2CE4, + 0x2CEB, + 0x2CEE, + 0x2CF2, + 0x2CF3, + 0x2D00, + 0x2D25, + 0x2D27, + 0x2D27, + 0x2D2D, + 0x2D2D, + 0x2D30, + 0x2D67, + 0x2D6F, + 0x2D6F, + 0x2D80, + 0x2D96, + 0x2DA0, + 0x2DA6, + 0x2DA8, + 0x2DAE, + 0x2DB0, + 0x2DB6, + 0x2DB8, + 0x2DBE, + 0x2DC0, + 0x2DC6, + 0x2DC8, + 0x2DCE, + 0x2DD0, + 0x2DD6, + 0x2DD8, + 0x2DDE, + 0x2DE0, + 0x2DFF, + 0x2E2F, + 0x2E2F, + 0x3005, + 0x3007, + 0x3021, + 0x3029, + 0x3031, + 0x3035, + 0x3038, + 0x303C, + 0x3041, + 0x3096, + 0x309D, + 0x309F, + 0x30A1, + 0x30FA, + 0x30FC, + 0x30FF, + 0x3105, + 0x312F, + 0x3131, + 0x318E, + 0x31A0, + 0x31BF, + 0x31F0, + 0x31FF, + 0x3400, + 0x4DBF, + 0x4E00, + 0xA48C, + 0xA4D0, + 0xA4FD, + 0xA500, + 0xA60C, + 0xA610, + 0xA62B, + 0xA640, + 0xA66E, + 0xA674, + 0xA67B, + 0xA67F, + 0xA6EF, + 0xA717, + 0xA71F, + 0xA722, + 0xA788, + 0xA78B, + 0xA7CA, + 0xA7D0, + 0xA7D1, + 0xA7D3, + 0xA7D3, + 0xA7D5, + 0xA7D9, + 0xA7F2, + 0xA805, + 0xA807, + 0xA827, + 0xA840, + 0xA873, + 0xA880, + 0xA8C3, + 0xA8C5, + 0xA8C5, + 0xA8D0, + 0xA8D9, + 0xA8F2, + 0xA8F7, + 0xA8FB, + 0xA8FB, + 0xA8FD, + 0xA92A, + 0xA930, + 0xA952, + 0xA960, + 0xA97C, + 0xA980, + 0xA9B2, + 0xA9B4, + 0xA9BF, + 0xA9CF, + 0xA9D9, + 0xA9E0, + 0xA9FE, + 0xAA00, + 0xAA36, + 0xAA40, + 0xAA4D, + 0xAA50, + 0xAA59, + 0xAA60, + 0xAA76, + 0xAA7A, + 0xAABE, + 0xAAC0, + 0xAAC0, + 0xAAC2, + 0xAAC2, + 0xAADB, + 0xAADD, + 0xAAE0, + 0xAAEF, + 0xAAF2, + 0xAAF5, + 0xAB01, + 0xAB06, + 0xAB09, + 0xAB0E, + 0xAB11, + 0xAB16, + 0xAB20, + 0xAB26, + 0xAB28, + 0xAB2E, + 0xAB30, + 0xAB5A, + 0xAB5C, + 0xAB69, + 0xAB70, + 0xABEA, + 0xABF0, + 0xABF9, + 0xAC00, + 0xD7A3, + 0xD7B0, + 0xD7C6, + 0xD7CB, + 0xD7FB, + 0xF900, + 0xFA6D, + 0xFA70, + 0xFAD9, + 0xFB00, + 0xFB06, + 0xFB13, + 0xFB17, + 0xFB1D, + 0xFB28, + 0xFB2A, + 0xFB36, + 0xFB38, + 0xFB3C, + 0xFB3E, + 0xFB3E, + 0xFB40, + 0xFB41, + 0xFB43, + 0xFB44, + 0xFB46, + 0xFBB1, + 0xFBD3, + 0xFD3D, + 0xFD50, + 0xFD8F, + 0xFD92, + 0xFDC7, + 0xFDF0, + 0xFDFB, + 0xFE70, + 0xFE74, + 0xFE76, + 0xFEFC, + 0xFF10, + 0xFF19, + 0xFF21, + 0xFF3A, + 0xFF41, + 0xFF5A, + 0xFF66, + 0xFFBE, + 0xFFC2, + 0xFFC7, + 0xFFCA, + 0xFFCF, + 0xFFD2, + 0xFFD7, + 0xFFDA, + 0xFFDC, + 0x10000, + 0x1000B, + 0x1000D, + 0x10026, + 0x10028, + 0x1003A, + 0x1003C, + 0x1003D, + 0x1003F, + 0x1004D, + 0x10050, + 0x1005D, + 0x10080, + 0x100FA, + 0x10140, + 0x10174, + 0x10280, + 0x1029C, + 0x102A0, + 0x102D0, + 0x10300, + 0x1031F, + 0x1032D, + 0x1034A, + 0x10350, + 0x1037A, + 0x10380, + 0x1039D, + 0x103A0, + 0x103C3, + 0x103C8, + 0x103CF, + 0x103D1, + 0x103D5, + 0x10400, + 0x1049D, + 0x104A0, + 0x104A9, + 0x104B0, + 0x104D3, + 0x104D8, + 0x104FB, + 0x10500, + 0x10527, + 0x10530, + 0x10563, + 0x10570, + 0x1057A, + 0x1057C, + 0x1058A, + 0x1058C, + 0x10592, + 0x10594, + 0x10595, + 0x10597, + 0x105A1, + 0x105A3, + 0x105B1, + 0x105B3, + 0x105B9, + 0x105BB, + 0x105BC, + 0x10600, + 0x10736, + 0x10740, + 0x10755, + 0x10760, + 0x10767, + 0x10780, + 0x10785, + 0x10787, + 0x107B0, + 0x107B2, + 0x107BA, + 0x10800, + 0x10805, + 0x10808, + 0x10808, + 0x1080A, + 0x10835, + 0x10837, + 0x10838, + 0x1083C, + 0x1083C, + 0x1083F, + 0x10855, + 0x10860, + 0x10876, + 0x10880, + 0x1089E, + 0x108E0, + 0x108F2, + 0x108F4, + 0x108F5, + 0x10900, + 0x10915, + 0x10920, + 0x10939, + 0x10980, + 0x109B7, + 0x109BE, + 0x109BF, + 0x10A00, + 0x10A03, + 0x10A05, + 0x10A06, + 0x10A0C, + 0x10A13, + 0x10A15, + 0x10A17, + 0x10A19, + 0x10A35, + 0x10A60, + 0x10A7C, + 0x10A80, + 0x10A9C, + 0x10AC0, + 0x10AC7, + 0x10AC9, + 0x10AE4, + 0x10B00, + 0x10B35, + 0x10B40, + 0x10B55, + 0x10B60, + 0x10B72, + 0x10B80, + 0x10B91, + 0x10C00, + 0x10C48, + 0x10C80, + 0x10CB2, + 0x10CC0, + 0x10CF2, + 0x10D00, + 0x10D27, + 0x10D30, + 0x10D39, + 0x10E80, + 0x10EA9, + 0x10EAB, + 0x10EAC, + 0x10EB0, + 0x10EB1, + 0x10F00, + 0x10F1C, + 0x10F27, + 0x10F27, + 0x10F30, + 0x10F45, + 0x10F70, + 0x10F81, + 0x10FB0, + 0x10FC4, + 0x10FE0, + 0x10FF6, + 0x11000, + 0x11045, + 0x11066, + 0x1106F, + 0x11071, + 0x11075, + 0x11080, + 0x110B8, + 0x110C2, + 0x110C2, + 0x110D0, + 0x110E8, + 0x110F0, + 0x110F9, + 0x11100, + 0x11132, + 0x11136, + 0x1113F, + 0x11144, + 0x11147, + 0x11150, + 0x11172, + 0x11176, + 0x11176, + 0x11180, + 0x111BF, + 0x111C1, + 0x111C4, + 0x111CE, + 0x111DA, + 0x111DC, + 0x111DC, + 0x11200, + 0x11211, + 0x11213, + 0x11234, + 0x11237, + 0x11237, + 0x1123E, + 0x11241, + 0x11280, + 0x11286, + 0x11288, + 0x11288, + 0x1128A, + 0x1128D, + 0x1128F, + 0x1129D, + 0x1129F, + 0x112A8, + 0x112B0, + 0x112E8, + 0x112F0, + 0x112F9, + 0x11300, + 0x11303, + 0x11305, + 0x1130C, + 0x1130F, + 0x11310, + 0x11313, + 0x11328, + 0x1132A, + 0x11330, + 0x11332, + 0x11333, + 0x11335, + 0x11339, + 0x1133D, + 0x11344, + 0x11347, + 0x11348, + 0x1134B, + 0x1134C, + 0x11350, + 0x11350, + 0x11357, + 0x11357, + 0x1135D, + 0x11363, + 0x11400, + 0x11441, + 0x11443, + 0x11445, + 0x11447, + 0x1144A, + 0x11450, + 0x11459, + 0x1145F, + 0x11461, + 0x11480, + 0x114C1, + 0x114C4, + 0x114C5, + 0x114C7, + 0x114C7, + 0x114D0, + 0x114D9, + 0x11580, + 0x115B5, + 0x115B8, + 0x115BE, + 0x115D8, + 0x115DD, + 0x11600, + 0x1163E, + 0x11640, + 0x11640, + 0x11644, + 0x11644, + 0x11650, + 0x11659, + 0x11680, + 0x116B5, + 0x116B8, + 0x116B8, + 0x116C0, + 0x116C9, + 0x11700, + 0x1171A, + 0x1171D, + 0x1172A, + 0x11730, + 0x11739, + 0x11740, + 0x11746, + 0x11800, + 0x11838, + 0x118A0, + 0x118E9, + 0x118FF, + 0x11906, + 0x11909, + 0x11909, + 0x1190C, + 0x11913, + 0x11915, + 0x11916, + 0x11918, + 0x11935, + 0x11937, + 0x11938, + 0x1193B, + 0x1193C, + 0x1193F, + 0x11942, + 0x11950, + 0x11959, + 0x119A0, + 0x119A7, + 0x119AA, + 0x119D7, + 0x119DA, + 0x119DF, + 0x119E1, + 0x119E1, + 0x119E3, + 0x119E4, + 0x11A00, + 0x11A32, + 0x11A35, + 0x11A3E, + 0x11A50, + 0x11A97, + 0x11A9D, + 0x11A9D, + 0x11AB0, + 0x11AF8, + 0x11C00, + 0x11C08, + 0x11C0A, + 0x11C36, + 0x11C38, + 0x11C3E, + 0x11C40, + 0x11C40, + 0x11C50, + 0x11C59, + 0x11C72, + 0x11C8F, + 0x11C92, + 0x11CA7, + 0x11CA9, + 0x11CB6, + 0x11D00, + 0x11D06, + 0x11D08, + 0x11D09, + 0x11D0B, + 0x11D36, + 0x11D3A, + 0x11D3A, + 0x11D3C, + 0x11D3D, + 0x11D3F, + 0x11D41, + 0x11D43, + 0x11D43, + 0x11D46, + 0x11D47, + 0x11D50, + 0x11D59, + 0x11D60, + 0x11D65, + 0x11D67, + 0x11D68, + 0x11D6A, + 0x11D8E, + 0x11D90, + 0x11D91, + 0x11D93, + 0x11D96, + 0x11D98, + 0x11D98, + 0x11DA0, + 0x11DA9, + 0x11EE0, + 0x11EF6, + 0x11F00, + 0x11F10, + 0x11F12, + 0x11F3A, + 0x11F3E, + 0x11F40, + 0x11F50, + 0x11F59, + 0x11FB0, + 0x11FB0, + 0x12000, + 0x12399, + 0x12400, + 0x1246E, + 0x12480, + 0x12543, + 0x12F90, + 0x12FF0, + 0x13000, + 0x1342F, + 0x13441, + 0x13446, + 0x14400, + 0x14646, + 0x16800, + 0x16A38, + 0x16A40, + 0x16A5E, + 0x16A60, + 0x16A69, + 0x16A70, + 0x16ABE, + 0x16AC0, + 0x16AC9, + 0x16AD0, + 0x16AED, + 0x16B00, + 0x16B2F, + 0x16B40, + 0x16B43, + 0x16B50, + 0x16B59, + 0x16B63, + 0x16B77, + 0x16B7D, + 0x16B8F, + 0x16E40, + 0x16E7F, + 0x16F00, + 0x16F4A, + 0x16F4F, + 0x16F87, + 0x16F8F, + 0x16F9F, + 0x16FE0, + 0x16FE1, + 0x16FE3, + 0x16FE3, + 0x16FF0, + 0x16FF1, + 0x17000, + 0x187F7, + 0x18800, + 0x18CD5, + 0x18D00, + 0x18D08, + 0x1AFF0, + 0x1AFF3, + 0x1AFF5, + 0x1AFFB, + 0x1AFFD, + 0x1AFFE, + 0x1B000, + 0x1B122, + 0x1B132, + 0x1B132, + 0x1B150, + 0x1B152, + 0x1B155, + 0x1B155, + 0x1B164, + 0x1B167, + 0x1B170, + 0x1B2FB, + 0x1BC00, + 0x1BC6A, + 0x1BC70, + 0x1BC7C, + 0x1BC80, + 0x1BC88, + 0x1BC90, + 0x1BC99, + 0x1BC9E, + 0x1BC9E, + 0x1D400, + 0x1D454, + 0x1D456, + 0x1D49C, + 0x1D49E, + 0x1D49F, + 0x1D4A2, + 0x1D4A2, + 0x1D4A5, + 0x1D4A6, + 0x1D4A9, + 0x1D4AC, + 0x1D4AE, + 0x1D4B9, + 0x1D4BB, + 0x1D4BB, + 0x1D4BD, + 0x1D4C3, + 0x1D4C5, + 0x1D505, + 0x1D507, + 0x1D50A, + 0x1D50D, + 0x1D514, + 0x1D516, + 0x1D51C, + 0x1D51E, + 0x1D539, + 0x1D53B, + 0x1D53E, + 0x1D540, + 0x1D544, + 0x1D546, + 0x1D546, + 0x1D54A, + 0x1D550, + 0x1D552, + 0x1D6A5, + 0x1D6A8, + 0x1D6C0, + 0x1D6C2, + 0x1D6DA, + 0x1D6DC, + 0x1D6FA, + 0x1D6FC, + 0x1D714, + 0x1D716, + 0x1D734, + 0x1D736, + 0x1D74E, + 0x1D750, + 0x1D76E, + 0x1D770, + 0x1D788, + 0x1D78A, + 0x1D7A8, + 0x1D7AA, + 0x1D7C2, + 0x1D7C4, + 0x1D7CB, + 0x1D7CE, + 0x1D7FF, + 0x1DF00, + 0x1DF1E, + 0x1DF25, + 0x1DF2A, + 0x1E000, + 0x1E006, + 0x1E008, + 0x1E018, + 0x1E01B, + 0x1E021, + 0x1E023, + 0x1E024, + 0x1E026, + 0x1E02A, + 0x1E030, + 0x1E06D, + 0x1E08F, + 0x1E08F, + 0x1E100, + 0x1E12C, + 0x1E137, + 0x1E13D, + 0x1E140, + 0x1E149, + 0x1E14E, + 0x1E14E, + 0x1E290, + 0x1E2AD, + 0x1E2C0, + 0x1E2EB, + 0x1E2F0, + 0x1E2F9, + 0x1E4D0, + 0x1E4EB, + 0x1E4F0, + 0x1E4F9, + 0x1E7E0, + 0x1E7E6, + 0x1E7E8, + 0x1E7EB, + 0x1E7ED, + 0x1E7EE, + 0x1E7F0, + 0x1E7FE, + 0x1E800, + 0x1E8C4, + 0x1E900, + 0x1E943, + 0x1E947, + 0x1E947, + 0x1E94B, + 0x1E94B, + 0x1E950, + 0x1E959, + 0x1EE00, + 0x1EE03, + 0x1EE05, + 0x1EE1F, + 0x1EE21, + 0x1EE22, + 0x1EE24, + 0x1EE24, + 0x1EE27, + 0x1EE27, + 0x1EE29, + 0x1EE32, + 0x1EE34, + 0x1EE37, + 0x1EE39, + 0x1EE39, + 0x1EE3B, + 0x1EE3B, + 0x1EE42, + 0x1EE42, + 0x1EE47, + 0x1EE47, + 0x1EE49, + 0x1EE49, + 0x1EE4B, + 0x1EE4B, + 0x1EE4D, + 0x1EE4F, + 0x1EE51, + 0x1EE52, + 0x1EE54, + 0x1EE54, + 0x1EE57, + 0x1EE57, + 0x1EE59, + 0x1EE59, + 0x1EE5B, + 0x1EE5B, + 0x1EE5D, + 0x1EE5D, + 0x1EE5F, + 0x1EE5F, + 0x1EE61, + 0x1EE62, + 0x1EE64, + 0x1EE64, + 0x1EE67, + 0x1EE6A, + 0x1EE6C, + 0x1EE72, + 0x1EE74, + 0x1EE77, + 0x1EE79, + 0x1EE7C, + 0x1EE7E, + 0x1EE7E, + 0x1EE80, + 0x1EE89, + 0x1EE8B, + 0x1EE9B, + 0x1EEA1, + 0x1EEA3, + 0x1EEA5, + 0x1EEA9, + 0x1EEAB, + 0x1EEBB, + 0x1F130, + 0x1F149, + 0x1F150, + 0x1F169, + 0x1F170, + 0x1F189, + 0x1FBF0, + 0x1FBF9, + 0x20000, + 0x2A6DF, + 0x2A700, + 0x2B739, + 0x2B740, + 0x2B81D, + 0x2B820, + 0x2CEA1, + 0x2CEB0, + 0x2EBE0, + 0x2F800, + 0x2FA1D, + 0x30000, + 0x3134A, + 0x31350, + 0x323AF, +}; + +#define UNICODE_ISUPPER_CODEPOINTS_LENGTH 1302 +static const rbs_unicode_codepoint_t unicode_isupper_codepoints[UNICODE_ISUPPER_CODEPOINTS_LENGTH] = { + 0x100, + 0x100, + 0x102, + 0x102, + 0x104, + 0x104, + 0x106, + 0x106, + 0x108, + 0x108, + 0x10A, + 0x10A, + 0x10C, + 0x10C, + 0x10E, + 0x10E, + 0x110, + 0x110, + 0x112, + 0x112, + 0x114, + 0x114, + 0x116, + 0x116, + 0x118, + 0x118, + 0x11A, + 0x11A, + 0x11C, + 0x11C, + 0x11E, + 0x11E, + 0x120, + 0x120, + 0x122, + 0x122, + 0x124, + 0x124, + 0x126, + 0x126, + 0x128, + 0x128, + 0x12A, + 0x12A, + 0x12C, + 0x12C, + 0x12E, + 0x12E, + 0x130, + 0x130, + 0x132, + 0x132, + 0x134, + 0x134, + 0x136, + 0x136, + 0x139, + 0x139, + 0x13B, + 0x13B, + 0x13D, + 0x13D, + 0x13F, + 0x13F, + 0x141, + 0x141, + 0x143, + 0x143, + 0x145, + 0x145, + 0x147, + 0x147, + 0x14A, + 0x14A, + 0x14C, + 0x14C, + 0x14E, + 0x14E, + 0x150, + 0x150, + 0x152, + 0x152, + 0x154, + 0x154, + 0x156, + 0x156, + 0x158, + 0x158, + 0x15A, + 0x15A, + 0x15C, + 0x15C, + 0x15E, + 0x15E, + 0x160, + 0x160, + 0x162, + 0x162, + 0x164, + 0x164, + 0x166, + 0x166, + 0x168, + 0x168, + 0x16A, + 0x16A, + 0x16C, + 0x16C, + 0x16E, + 0x16E, + 0x170, + 0x170, + 0x172, + 0x172, + 0x174, + 0x174, + 0x176, + 0x176, + 0x178, + 0x179, + 0x17B, + 0x17B, + 0x17D, + 0x17D, + 0x181, + 0x182, + 0x184, + 0x184, + 0x186, + 0x187, + 0x189, + 0x18B, + 0x18E, + 0x191, + 0x193, + 0x194, + 0x196, + 0x198, + 0x19C, + 0x19D, + 0x19F, + 0x1A0, + 0x1A2, + 0x1A2, + 0x1A4, + 0x1A4, + 0x1A6, + 0x1A7, + 0x1A9, + 0x1A9, + 0x1AC, + 0x1AC, + 0x1AE, + 0x1AF, + 0x1B1, + 0x1B3, + 0x1B5, + 0x1B5, + 0x1B7, + 0x1B8, + 0x1BC, + 0x1BC, + 0x1C4, + 0x1C5, + 0x1C7, + 0x1C8, + 0x1CA, + 0x1CB, + 0x1CD, + 0x1CD, + 0x1CF, + 0x1CF, + 0x1D1, + 0x1D1, + 0x1D3, + 0x1D3, + 0x1D5, + 0x1D5, + 0x1D7, + 0x1D7, + 0x1D9, + 0x1D9, + 0x1DB, + 0x1DB, + 0x1DE, + 0x1DE, + 0x1E0, + 0x1E0, + 0x1E2, + 0x1E2, + 0x1E4, + 0x1E4, + 0x1E6, + 0x1E6, + 0x1E8, + 0x1E8, + 0x1EA, + 0x1EA, + 0x1EC, + 0x1EC, + 0x1EE, + 0x1EE, + 0x1F1, + 0x1F2, + 0x1F4, + 0x1F4, + 0x1F6, + 0x1F8, + 0x1FA, + 0x1FA, + 0x1FC, + 0x1FC, + 0x1FE, + 0x1FE, + 0x200, + 0x200, + 0x202, + 0x202, + 0x204, + 0x204, + 0x206, + 0x206, + 0x208, + 0x208, + 0x20A, + 0x20A, + 0x20C, + 0x20C, + 0x20E, + 0x20E, + 0x210, + 0x210, + 0x212, + 0x212, + 0x214, + 0x214, + 0x216, + 0x216, + 0x218, + 0x218, + 0x21A, + 0x21A, + 0x21C, + 0x21C, + 0x21E, + 0x21E, + 0x220, + 0x220, + 0x222, + 0x222, + 0x224, + 0x224, + 0x226, + 0x226, + 0x228, + 0x228, + 0x22A, + 0x22A, + 0x22C, + 0x22C, + 0x22E, + 0x22E, + 0x230, + 0x230, + 0x232, + 0x232, + 0x23A, + 0x23B, + 0x23D, + 0x23E, + 0x241, + 0x241, + 0x243, + 0x246, + 0x248, + 0x248, + 0x24A, + 0x24A, + 0x24C, + 0x24C, + 0x24E, + 0x24E, + 0x370, + 0x370, + 0x372, + 0x372, + 0x376, + 0x376, + 0x37F, + 0x37F, + 0x386, + 0x386, + 0x388, + 0x38A, + 0x38C, + 0x38C, + 0x38E, + 0x38F, + 0x391, + 0x3A1, + 0x3A3, + 0x3AB, + 0x3CF, + 0x3CF, + 0x3D2, + 0x3D4, + 0x3D8, + 0x3D8, + 0x3DA, + 0x3DA, + 0x3DC, + 0x3DC, + 0x3DE, + 0x3DE, + 0x3E0, + 0x3E0, + 0x3E2, + 0x3E2, + 0x3E4, + 0x3E4, + 0x3E6, + 0x3E6, + 0x3E8, + 0x3E8, + 0x3EA, + 0x3EA, + 0x3EC, + 0x3EC, + 0x3EE, + 0x3EE, + 0x3F4, + 0x3F4, + 0x3F7, + 0x3F7, + 0x3F9, + 0x3FA, + 0x3FD, + 0x42F, + 0x460, + 0x460, + 0x462, + 0x462, + 0x464, + 0x464, + 0x466, + 0x466, + 0x468, + 0x468, + 0x46A, + 0x46A, + 0x46C, + 0x46C, + 0x46E, + 0x46E, + 0x470, + 0x470, + 0x472, + 0x472, + 0x474, + 0x474, + 0x476, + 0x476, + 0x478, + 0x478, + 0x47A, + 0x47A, + 0x47C, + 0x47C, + 0x47E, + 0x47E, + 0x480, + 0x480, + 0x48A, + 0x48A, + 0x48C, + 0x48C, + 0x48E, + 0x48E, + 0x490, + 0x490, + 0x492, + 0x492, + 0x494, + 0x494, + 0x496, + 0x496, + 0x498, + 0x498, + 0x49A, + 0x49A, + 0x49C, + 0x49C, + 0x49E, + 0x49E, + 0x4A0, + 0x4A0, + 0x4A2, + 0x4A2, + 0x4A4, + 0x4A4, + 0x4A6, + 0x4A6, + 0x4A8, + 0x4A8, + 0x4AA, + 0x4AA, + 0x4AC, + 0x4AC, + 0x4AE, + 0x4AE, + 0x4B0, + 0x4B0, + 0x4B2, + 0x4B2, + 0x4B4, + 0x4B4, + 0x4B6, + 0x4B6, + 0x4B8, + 0x4B8, + 0x4BA, + 0x4BA, + 0x4BC, + 0x4BC, + 0x4BE, + 0x4BE, + 0x4C0, + 0x4C1, + 0x4C3, + 0x4C3, + 0x4C5, + 0x4C5, + 0x4C7, + 0x4C7, + 0x4C9, + 0x4C9, + 0x4CB, + 0x4CB, + 0x4CD, + 0x4CD, + 0x4D0, + 0x4D0, + 0x4D2, + 0x4D2, + 0x4D4, + 0x4D4, + 0x4D6, + 0x4D6, + 0x4D8, + 0x4D8, + 0x4DA, + 0x4DA, + 0x4DC, + 0x4DC, + 0x4DE, + 0x4DE, + 0x4E0, + 0x4E0, + 0x4E2, + 0x4E2, + 0x4E4, + 0x4E4, + 0x4E6, + 0x4E6, + 0x4E8, + 0x4E8, + 0x4EA, + 0x4EA, + 0x4EC, + 0x4EC, + 0x4EE, + 0x4EE, + 0x4F0, + 0x4F0, + 0x4F2, + 0x4F2, + 0x4F4, + 0x4F4, + 0x4F6, + 0x4F6, + 0x4F8, + 0x4F8, + 0x4FA, + 0x4FA, + 0x4FC, + 0x4FC, + 0x4FE, + 0x4FE, + 0x500, + 0x500, + 0x502, + 0x502, + 0x504, + 0x504, + 0x506, + 0x506, + 0x508, + 0x508, + 0x50A, + 0x50A, + 0x50C, + 0x50C, + 0x50E, + 0x50E, + 0x510, + 0x510, + 0x512, + 0x512, + 0x514, + 0x514, + 0x516, + 0x516, + 0x518, + 0x518, + 0x51A, + 0x51A, + 0x51C, + 0x51C, + 0x51E, + 0x51E, + 0x520, + 0x520, + 0x522, + 0x522, + 0x524, + 0x524, + 0x526, + 0x526, + 0x528, + 0x528, + 0x52A, + 0x52A, + 0x52C, + 0x52C, + 0x52E, + 0x52E, + 0x531, + 0x556, + 0x10A0, + 0x10C5, + 0x10C7, + 0x10C7, + 0x10CD, + 0x10CD, + 0x13A0, + 0x13F5, + 0x1C90, + 0x1CBA, + 0x1CBD, + 0x1CBF, + 0x1E00, + 0x1E00, + 0x1E02, + 0x1E02, + 0x1E04, + 0x1E04, + 0x1E06, + 0x1E06, + 0x1E08, + 0x1E08, + 0x1E0A, + 0x1E0A, + 0x1E0C, + 0x1E0C, + 0x1E0E, + 0x1E0E, + 0x1E10, + 0x1E10, + 0x1E12, + 0x1E12, + 0x1E14, + 0x1E14, + 0x1E16, + 0x1E16, + 0x1E18, + 0x1E18, + 0x1E1A, + 0x1E1A, + 0x1E1C, + 0x1E1C, + 0x1E1E, + 0x1E1E, + 0x1E20, + 0x1E20, + 0x1E22, + 0x1E22, + 0x1E24, + 0x1E24, + 0x1E26, + 0x1E26, + 0x1E28, + 0x1E28, + 0x1E2A, + 0x1E2A, + 0x1E2C, + 0x1E2C, + 0x1E2E, + 0x1E2E, + 0x1E30, + 0x1E30, + 0x1E32, + 0x1E32, + 0x1E34, + 0x1E34, + 0x1E36, + 0x1E36, + 0x1E38, + 0x1E38, + 0x1E3A, + 0x1E3A, + 0x1E3C, + 0x1E3C, + 0x1E3E, + 0x1E3E, + 0x1E40, + 0x1E40, + 0x1E42, + 0x1E42, + 0x1E44, + 0x1E44, + 0x1E46, + 0x1E46, + 0x1E48, + 0x1E48, + 0x1E4A, + 0x1E4A, + 0x1E4C, + 0x1E4C, + 0x1E4E, + 0x1E4E, + 0x1E50, + 0x1E50, + 0x1E52, + 0x1E52, + 0x1E54, + 0x1E54, + 0x1E56, + 0x1E56, + 0x1E58, + 0x1E58, + 0x1E5A, + 0x1E5A, + 0x1E5C, + 0x1E5C, + 0x1E5E, + 0x1E5E, + 0x1E60, + 0x1E60, + 0x1E62, + 0x1E62, + 0x1E64, + 0x1E64, + 0x1E66, + 0x1E66, + 0x1E68, + 0x1E68, + 0x1E6A, + 0x1E6A, + 0x1E6C, + 0x1E6C, + 0x1E6E, + 0x1E6E, + 0x1E70, + 0x1E70, + 0x1E72, + 0x1E72, + 0x1E74, + 0x1E74, + 0x1E76, + 0x1E76, + 0x1E78, + 0x1E78, + 0x1E7A, + 0x1E7A, + 0x1E7C, + 0x1E7C, + 0x1E7E, + 0x1E7E, + 0x1E80, + 0x1E80, + 0x1E82, + 0x1E82, + 0x1E84, + 0x1E84, + 0x1E86, + 0x1E86, + 0x1E88, + 0x1E88, + 0x1E8A, + 0x1E8A, + 0x1E8C, + 0x1E8C, + 0x1E8E, + 0x1E8E, + 0x1E90, + 0x1E90, + 0x1E92, + 0x1E92, + 0x1E94, + 0x1E94, + 0x1E9E, + 0x1E9E, + 0x1EA0, + 0x1EA0, + 0x1EA2, + 0x1EA2, + 0x1EA4, + 0x1EA4, + 0x1EA6, + 0x1EA6, + 0x1EA8, + 0x1EA8, + 0x1EAA, + 0x1EAA, + 0x1EAC, + 0x1EAC, + 0x1EAE, + 0x1EAE, + 0x1EB0, + 0x1EB0, + 0x1EB2, + 0x1EB2, + 0x1EB4, + 0x1EB4, + 0x1EB6, + 0x1EB6, + 0x1EB8, + 0x1EB8, + 0x1EBA, + 0x1EBA, + 0x1EBC, + 0x1EBC, + 0x1EBE, + 0x1EBE, + 0x1EC0, + 0x1EC0, + 0x1EC2, + 0x1EC2, + 0x1EC4, + 0x1EC4, + 0x1EC6, + 0x1EC6, + 0x1EC8, + 0x1EC8, + 0x1ECA, + 0x1ECA, + 0x1ECC, + 0x1ECC, + 0x1ECE, + 0x1ECE, + 0x1ED0, + 0x1ED0, + 0x1ED2, + 0x1ED2, + 0x1ED4, + 0x1ED4, + 0x1ED6, + 0x1ED6, + 0x1ED8, + 0x1ED8, + 0x1EDA, + 0x1EDA, + 0x1EDC, + 0x1EDC, + 0x1EDE, + 0x1EDE, + 0x1EE0, + 0x1EE0, + 0x1EE2, + 0x1EE2, + 0x1EE4, + 0x1EE4, + 0x1EE6, + 0x1EE6, + 0x1EE8, + 0x1EE8, + 0x1EEA, + 0x1EEA, + 0x1EEC, + 0x1EEC, + 0x1EEE, + 0x1EEE, + 0x1EF0, + 0x1EF0, + 0x1EF2, + 0x1EF2, + 0x1EF4, + 0x1EF4, + 0x1EF6, + 0x1EF6, + 0x1EF8, + 0x1EF8, + 0x1EFA, + 0x1EFA, + 0x1EFC, + 0x1EFC, + 0x1EFE, + 0x1EFE, + 0x1F08, + 0x1F0F, + 0x1F18, + 0x1F1D, + 0x1F28, + 0x1F2F, + 0x1F38, + 0x1F3F, + 0x1F48, + 0x1F4D, + 0x1F59, + 0x1F59, + 0x1F5B, + 0x1F5B, + 0x1F5D, + 0x1F5D, + 0x1F5F, + 0x1F5F, + 0x1F68, + 0x1F6F, + 0x1F88, + 0x1F8F, + 0x1F98, + 0x1F9F, + 0x1FA8, + 0x1FAF, + 0x1FB8, + 0x1FBC, + 0x1FC8, + 0x1FCC, + 0x1FD8, + 0x1FDB, + 0x1FE8, + 0x1FEC, + 0x1FF8, + 0x1FFC, + 0x2102, + 0x2102, + 0x2107, + 0x2107, + 0x210B, + 0x210D, + 0x2110, + 0x2112, + 0x2115, + 0x2115, + 0x2119, + 0x211D, + 0x2124, + 0x2124, + 0x2126, + 0x2126, + 0x2128, + 0x2128, + 0x212A, + 0x212D, + 0x2130, + 0x2133, + 0x213E, + 0x213F, + 0x2145, + 0x2145, + 0x2160, + 0x216F, + 0x2183, + 0x2183, + 0x24B6, + 0x24CF, + 0x2C00, + 0x2C2F, + 0x2C60, + 0x2C60, + 0x2C62, + 0x2C64, + 0x2C67, + 0x2C67, + 0x2C69, + 0x2C69, + 0x2C6B, + 0x2C6B, + 0x2C6D, + 0x2C70, + 0x2C72, + 0x2C72, + 0x2C75, + 0x2C75, + 0x2C7E, + 0x2C80, + 0x2C82, + 0x2C82, + 0x2C84, + 0x2C84, + 0x2C86, + 0x2C86, + 0x2C88, + 0x2C88, + 0x2C8A, + 0x2C8A, + 0x2C8C, + 0x2C8C, + 0x2C8E, + 0x2C8E, + 0x2C90, + 0x2C90, + 0x2C92, + 0x2C92, + 0x2C94, + 0x2C94, + 0x2C96, + 0x2C96, + 0x2C98, + 0x2C98, + 0x2C9A, + 0x2C9A, + 0x2C9C, + 0x2C9C, + 0x2C9E, + 0x2C9E, + 0x2CA0, + 0x2CA0, + 0x2CA2, + 0x2CA2, + 0x2CA4, + 0x2CA4, + 0x2CA6, + 0x2CA6, + 0x2CA8, + 0x2CA8, + 0x2CAA, + 0x2CAA, + 0x2CAC, + 0x2CAC, + 0x2CAE, + 0x2CAE, + 0x2CB0, + 0x2CB0, + 0x2CB2, + 0x2CB2, + 0x2CB4, + 0x2CB4, + 0x2CB6, + 0x2CB6, + 0x2CB8, + 0x2CB8, + 0x2CBA, + 0x2CBA, + 0x2CBC, + 0x2CBC, + 0x2CBE, + 0x2CBE, + 0x2CC0, + 0x2CC0, + 0x2CC2, + 0x2CC2, + 0x2CC4, + 0x2CC4, + 0x2CC6, + 0x2CC6, + 0x2CC8, + 0x2CC8, + 0x2CCA, + 0x2CCA, + 0x2CCC, + 0x2CCC, + 0x2CCE, + 0x2CCE, + 0x2CD0, + 0x2CD0, + 0x2CD2, + 0x2CD2, + 0x2CD4, + 0x2CD4, + 0x2CD6, + 0x2CD6, + 0x2CD8, + 0x2CD8, + 0x2CDA, + 0x2CDA, + 0x2CDC, + 0x2CDC, + 0x2CDE, + 0x2CDE, + 0x2CE0, + 0x2CE0, + 0x2CE2, + 0x2CE2, + 0x2CEB, + 0x2CEB, + 0x2CED, + 0x2CED, + 0x2CF2, + 0x2CF2, + 0xA640, + 0xA640, + 0xA642, + 0xA642, + 0xA644, + 0xA644, + 0xA646, + 0xA646, + 0xA648, + 0xA648, + 0xA64A, + 0xA64A, + 0xA64C, + 0xA64C, + 0xA64E, + 0xA64E, + 0xA650, + 0xA650, + 0xA652, + 0xA652, + 0xA654, + 0xA654, + 0xA656, + 0xA656, + 0xA658, + 0xA658, + 0xA65A, + 0xA65A, + 0xA65C, + 0xA65C, + 0xA65E, + 0xA65E, + 0xA660, + 0xA660, + 0xA662, + 0xA662, + 0xA664, + 0xA664, + 0xA666, + 0xA666, + 0xA668, + 0xA668, + 0xA66A, + 0xA66A, + 0xA66C, + 0xA66C, + 0xA680, + 0xA680, + 0xA682, + 0xA682, + 0xA684, + 0xA684, + 0xA686, + 0xA686, + 0xA688, + 0xA688, + 0xA68A, + 0xA68A, + 0xA68C, + 0xA68C, + 0xA68E, + 0xA68E, + 0xA690, + 0xA690, + 0xA692, + 0xA692, + 0xA694, + 0xA694, + 0xA696, + 0xA696, + 0xA698, + 0xA698, + 0xA69A, + 0xA69A, + 0xA722, + 0xA722, + 0xA724, + 0xA724, + 0xA726, + 0xA726, + 0xA728, + 0xA728, + 0xA72A, + 0xA72A, + 0xA72C, + 0xA72C, + 0xA72E, + 0xA72E, + 0xA732, + 0xA732, + 0xA734, + 0xA734, + 0xA736, + 0xA736, + 0xA738, + 0xA738, + 0xA73A, + 0xA73A, + 0xA73C, + 0xA73C, + 0xA73E, + 0xA73E, + 0xA740, + 0xA740, + 0xA742, + 0xA742, + 0xA744, + 0xA744, + 0xA746, + 0xA746, + 0xA748, + 0xA748, + 0xA74A, + 0xA74A, + 0xA74C, + 0xA74C, + 0xA74E, + 0xA74E, + 0xA750, + 0xA750, + 0xA752, + 0xA752, + 0xA754, + 0xA754, + 0xA756, + 0xA756, + 0xA758, + 0xA758, + 0xA75A, + 0xA75A, + 0xA75C, + 0xA75C, + 0xA75E, + 0xA75E, + 0xA760, + 0xA760, + 0xA762, + 0xA762, + 0xA764, + 0xA764, + 0xA766, + 0xA766, + 0xA768, + 0xA768, + 0xA76A, + 0xA76A, + 0xA76C, + 0xA76C, + 0xA76E, + 0xA76E, + 0xA779, + 0xA779, + 0xA77B, + 0xA77B, + 0xA77D, + 0xA77E, + 0xA780, + 0xA780, + 0xA782, + 0xA782, + 0xA784, + 0xA784, + 0xA786, + 0xA786, + 0xA78B, + 0xA78B, + 0xA78D, + 0xA78D, + 0xA790, + 0xA790, + 0xA792, + 0xA792, + 0xA796, + 0xA796, + 0xA798, + 0xA798, + 0xA79A, + 0xA79A, + 0xA79C, + 0xA79C, + 0xA79E, + 0xA79E, + 0xA7A0, + 0xA7A0, + 0xA7A2, + 0xA7A2, + 0xA7A4, + 0xA7A4, + 0xA7A6, + 0xA7A6, + 0xA7A8, + 0xA7A8, + 0xA7AA, + 0xA7AE, + 0xA7B0, + 0xA7B4, + 0xA7B6, + 0xA7B6, + 0xA7B8, + 0xA7B8, + 0xA7BA, + 0xA7BA, + 0xA7BC, + 0xA7BC, + 0xA7BE, + 0xA7BE, + 0xA7C0, + 0xA7C0, + 0xA7C2, + 0xA7C2, + 0xA7C4, + 0xA7C7, + 0xA7C9, + 0xA7C9, + 0xA7D0, + 0xA7D0, + 0xA7D6, + 0xA7D6, + 0xA7D8, + 0xA7D8, + 0xA7F5, + 0xA7F5, + 0xFF21, + 0xFF3A, + 0x10400, + 0x10427, + 0x104B0, + 0x104D3, + 0x10570, + 0x1057A, + 0x1057C, + 0x1058A, + 0x1058C, + 0x10592, + 0x10594, + 0x10595, + 0x10C80, + 0x10CB2, + 0x118A0, + 0x118BF, + 0x16E40, + 0x16E5F, + 0x1D400, + 0x1D419, + 0x1D434, + 0x1D44D, + 0x1D468, + 0x1D481, + 0x1D49C, + 0x1D49C, + 0x1D49E, + 0x1D49F, + 0x1D4A2, + 0x1D4A2, + 0x1D4A5, + 0x1D4A6, + 0x1D4A9, + 0x1D4AC, + 0x1D4AE, + 0x1D4B5, + 0x1D4D0, + 0x1D4E9, + 0x1D504, + 0x1D505, + 0x1D507, + 0x1D50A, + 0x1D50D, + 0x1D514, + 0x1D516, + 0x1D51C, + 0x1D538, + 0x1D539, + 0x1D53B, + 0x1D53E, + 0x1D540, + 0x1D544, + 0x1D546, + 0x1D546, + 0x1D54A, + 0x1D550, + 0x1D56C, + 0x1D585, + 0x1D5A0, + 0x1D5B9, + 0x1D5D4, + 0x1D5ED, + 0x1D608, + 0x1D621, + 0x1D63C, + 0x1D655, + 0x1D670, + 0x1D689, + 0x1D6A8, + 0x1D6C0, + 0x1D6E2, + 0x1D6FA, + 0x1D71C, + 0x1D734, + 0x1D756, + 0x1D76E, + 0x1D790, + 0x1D7A8, + 0x1D7CA, + 0x1D7CA, + 0x1E900, + 0x1E921, + 0x1F130, + 0x1F149, + 0x1F150, + 0x1F169, + 0x1F170, + 0x1F189, +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding unicode codepoint. Note that + * this table is different from other encodings where we used a lookup table + * because the indices of those tables are the byte representations, not the + * codepoints themselves. + */ +const uint8_t rbs_encoding_unicode_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Fx +}; + +/** + * Binary search through the given list of codepoints to see if the given + * codepoint is in the list. + */ +static bool +rbs_unicode_codepoint_match(rbs_unicode_codepoint_t codepoint, const rbs_unicode_codepoint_t *codepoints, size_t size) { + size_t start = 0; + size_t end = size; + + while (start < end) { + size_t middle = start + (end - start) / 2; + if ((middle % 2) != 0) middle--; + + if (codepoint >= codepoints[middle] && codepoint <= codepoints[middle + 1]) { + return true; + } + + if (codepoint < codepoints[middle]) { + end = middle; + } else { + start = middle + 2; + } + } + + return false; +} + +/** + * A state transition table for decoding UTF-8. + * + * Copyright (c) 2008-2009 Bjoern Hoehrmann + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +// clang-format off +static const uint8_t rbs_utf_8_dfa[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 00..1f + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 20..3f + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 40..5f + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 60..7f + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, + 9, // 80..9f + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // a0..bf + 8, + 8, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, // c0..df + 0xa, + 0x3, + 0x3, + 0x3, + 0x3, + 0x3, + 0x3, + 0x3, + 0x3, + 0x3, + 0x3, + 0x3, + 0x3, + 0x4, + 0x3, + 0x3, // e0..ef + 0xb, + 0x6, + 0x6, + 0x6, + 0x5, + 0x8, + 0x8, + 0x8, + 0x8, + 0x8, + 0x8, + 0x8, + 0x8, + 0x8, + 0x8, + 0x8, // f0..ff + 0x0, + 0x1, + 0x2, + 0x3, + 0x5, + 0x8, + 0x7, + 0x1, + 0x1, + 0x1, + 0x4, + 0x6, + 0x1, + 0x1, + 0x1, + 0x1, // s0..s0 + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 0, + 1, + 1, + 1, + 1, + 1, + 0, + 1, + 0, + 1, + 1, + 1, + 1, + 1, + 1, // s1..s2 + 1, + 2, + 1, + 1, + 1, + 1, + 1, + 2, + 1, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, // s3..s4 + 1, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 2, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 1, + 3, + 1, + 1, + 1, + 1, + 1, + 1, // s5..s6 + 1, + 3, + 1, + 1, + 1, + 1, + 1, + 3, + 1, + 3, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 3, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, + 1, // s7..s8 +}; +// clang-format on + +/** + * Given a pointer to a string and the number of bytes remaining in the string, + * decode the next UTF-8 codepoint and return it. The number of bytes consumed + * is returned in the width out parameter. + */ +static rbs_unicode_codepoint_t +rbs_utf_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) { + rbs_assert(n >= 0, "[rbs_unicode_codepoint_t] n must be greater than or equal to 0. Got %ti", n); + + size_t maximum = (n > 4) ? 4 : ((size_t) n); + uint32_t codepoint; + uint32_t state = 0; + + for (size_t index = 0; index < maximum; index++) { + uint32_t byte = b[index]; + uint32_t type = rbs_utf_8_dfa[byte]; + + codepoint = (state != 0) ? + (byte & 0x3fu) | (codepoint << 6) : + (0xffu >> type) & (byte); + + state = rbs_utf_8_dfa[256 + (state * 16) + type]; + if (state == 0) { + *width = index + 1; + return (rbs_unicode_codepoint_t) codepoint; + } + } + + *width = 0; + return 0; +} + +/** + * Return the size of the next character in the UTF-8 encoding. + */ +size_t +rbs_encoding_utf_8_char_width(const uint8_t *b, ptrdiff_t n) { + rbs_assert(n >= 0, "[rbs_encoding_utf_8_char_width] n must be greater than or equal to 0. Got %ti", n); + + size_t maximum = (n > 4) ? 4 : ((size_t) n); + uint32_t state = 0; + + for (size_t index = 0; index < maximum; index++) { + state = rbs_utf_8_dfa[256 + (state * 16) + rbs_utf_8_dfa[b[index]]]; + if (state == 0) return index + 1; + } + + return 0; +} + +/** + * Return the size of the next character in the UTF-8 encoding if it is an + * alphabetical character. + */ +size_t +rbs_encoding_utf_8_alpha_char(const uint8_t *b, ptrdiff_t n) { + if (*b < 0x80) { + return (rbs_encoding_unicode_table[*b] & RBS_ENCODING_ALPHABETIC_BIT) ? 1 : 0; + } + + size_t width; + rbs_unicode_codepoint_t codepoint = rbs_utf_8_codepoint(b, n, &width); + + if (codepoint <= 0xFF) { + return (rbs_encoding_unicode_table[(uint8_t) codepoint] & RBS_ENCODING_ALPHABETIC_BIT) ? width : 0; + } else { + return rbs_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0; + } +} + +/** + * Return the size of the next character in the UTF-8 encoding if it is an + * alphanumeric character. + */ +size_t +rbs_encoding_utf_8_alnum_char(const uint8_t *b, ptrdiff_t n) { + if (*b < 0x80) { + return (rbs_encoding_unicode_table[*b] & (RBS_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0; + } + + size_t width; + rbs_unicode_codepoint_t codepoint = rbs_utf_8_codepoint(b, n, &width); + + if (codepoint <= 0xFF) { + return (rbs_encoding_unicode_table[(uint8_t) codepoint] & (RBS_ENCODING_ALPHANUMERIC_BIT)) ? width : 0; + } else { + return rbs_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0; + } +} + +/** + * Return true if the next character in the UTF-8 encoding if it is an uppercase + * character. + */ +bool rbs_encoding_utf_8_isupper_char(const uint8_t *b, ptrdiff_t n) { + if (*b < 0x80) { + return (rbs_encoding_unicode_table[*b] & RBS_ENCODING_UPPERCASE_BIT) ? true : false; + } + + size_t width; + rbs_unicode_codepoint_t codepoint = rbs_utf_8_codepoint(b, n, &width); + + if (codepoint <= 0xFF) { + return (rbs_encoding_unicode_table[(uint8_t) codepoint] & RBS_ENCODING_UPPERCASE_BIT) ? true : false; + } else { + return rbs_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false; + } +} + +#ifndef RBS_ENCODING_EXCLUDE_FULL + +static rbs_unicode_codepoint_t +rbs_cesu_8_codepoint(const uint8_t *b, ptrdiff_t n, size_t *width) { + if (b[0] < 0x80) { + *width = 1; + return (rbs_unicode_codepoint_t) b[0]; + } + + if (n > 1 && b[0] >= 0xC2 && b[0] <= 0xDF && b[1] >= 0x80 && b[1] <= 0xBF) { + *width = 2; + + // 110xxxxx 10xxxxxx + return (rbs_unicode_codepoint_t) (((b[0] & 0x1F) << 6) | (b[1] & 0x3F)); + } + + if (n > 5 && b[0] == 0xED && b[1] >= 0xA0 && b[1] <= 0xAF && b[2] >= 0x80 && b[2] <= 0xBF && b[3] == 0xED && b[4] >= 0xB0 && b[4] <= 0xBF && b[5] >= 0x80 && b[5] <= 0xBF) { + *width = 6; + + // 11101101 1010xxxx 10xxxxxx 11101101 1011xxxx 10xxxxxx + return (rbs_unicode_codepoint_t) (0x10000 + (((b[1] & 0xF) << 16) | ((b[2] & 0x3F) << 10) | ((b[4] & 0xF) << 6) | (b[5] & 0x3F))); + } + + if (n > 2 && b[0] == 0xED && b[1] >= 0xA0 && b[1] <= 0xBF) { + *width = 3; + + // 11101101 1010xxxx 10xxxxx + return (rbs_unicode_codepoint_t) (0x10000 + (((b[0] & 0x03) << 16) | ((b[1] & 0x3F) << 10) | (b[2] & 0x3F))); + } + + if (n > 2 && ((b[0] == 0xE0 && b[1] >= 0xA0) || (b[0] >= 0xE1 && b[0] <= 0xEF && b[1] >= 0x80)) && b[1] <= 0xBF && b[2] >= 0x80 && b[2] <= 0xBF) { + *width = 3; + + // 1110xxxx 10xxxxxx 10xxxxx + return (rbs_unicode_codepoint_t) (((b[0] & 0xF) << 12) | ((b[1] & 0x3F) << 6) | (b[2] & 0x3F)); + } + + *width = 0; + return 0; +} + +static size_t +rbs_encoding_cesu_8_char_width(const uint8_t *b, ptrdiff_t n) { + size_t width; + rbs_cesu_8_codepoint(b, n, &width); + return width; +} + +static size_t +rbs_encoding_cesu_8_alpha_char(const uint8_t *b, ptrdiff_t n) { + if (*b < 0x80) { + return (rbs_encoding_unicode_table[*b] & RBS_ENCODING_ALPHABETIC_BIT) ? 1 : 0; + } + + size_t width; + rbs_unicode_codepoint_t codepoint = rbs_cesu_8_codepoint(b, n, &width); + + if (codepoint <= 0xFF) { + return (rbs_encoding_unicode_table[(uint8_t) codepoint] & RBS_ENCODING_ALPHABETIC_BIT) ? width : 0; + } else { + return rbs_unicode_codepoint_match(codepoint, unicode_alpha_codepoints, UNICODE_ALPHA_CODEPOINTS_LENGTH) ? width : 0; + } +} + +static size_t +rbs_encoding_cesu_8_alnum_char(const uint8_t *b, ptrdiff_t n) { + if (*b < 0x80) { + return (rbs_encoding_unicode_table[*b] & (RBS_ENCODING_ALPHANUMERIC_BIT)) ? 1 : 0; + } + + size_t width; + rbs_unicode_codepoint_t codepoint = rbs_cesu_8_codepoint(b, n, &width); + + if (codepoint <= 0xFF) { + return (rbs_encoding_unicode_table[(uint8_t) codepoint] & (RBS_ENCODING_ALPHANUMERIC_BIT)) ? width : 0; + } else { + return rbs_unicode_codepoint_match(codepoint, unicode_alnum_codepoints, UNICODE_ALNUM_CODEPOINTS_LENGTH) ? width : 0; + } +} + +static bool +rbs_encoding_cesu_8_isupper_char(const uint8_t *b, ptrdiff_t n) { + if (*b < 0x80) { + return (rbs_encoding_unicode_table[*b] & RBS_ENCODING_UPPERCASE_BIT) ? true : false; + } + + size_t width; + rbs_unicode_codepoint_t codepoint = rbs_cesu_8_codepoint(b, n, &width); + + if (codepoint <= 0xFF) { + return (rbs_encoding_unicode_table[(uint8_t) codepoint] & RBS_ENCODING_UPPERCASE_BIT) ? true : false; + } else { + return rbs_unicode_codepoint_match(codepoint, unicode_isupper_codepoints, UNICODE_ISUPPER_CODEPOINTS_LENGTH) ? true : false; + } +} + +#endif + +#undef UNICODE_ALPHA_CODEPOINTS_LENGTH +#undef UNICODE_ALNUM_CODEPOINTS_LENGTH +#undef UNICODE_ISUPPER_CODEPOINTS_LENGTH + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding US-ASCII character. + */ +static const uint8_t rbs_encoding_ascii_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +#ifndef RBS_ENCODING_EXCLUDE_FULL + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding CP850 character. + */ +static const uint8_t rbs_encoding_cp850_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding CP852 character. + */ +static const uint8_t rbs_encoding_cp852_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding CP855 character. + */ +static const uint8_t rbs_encoding_cp855_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding GB1988 character. + */ +static const uint8_t rbs_encoding_gb1988_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM437 character. + */ +static const uint8_t rbs_encoding_ibm437_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM720 character. + */ +static const uint8_t rbs_encoding_ibm720_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM737 character. + */ +static const uint8_t rbs_encoding_ibm737_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM775 character. + */ +static const uint8_t rbs_encoding_ibm775_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM852 character. + */ +static const uint8_t rbs_encoding_ibm852_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM855 character. + */ +static const uint8_t rbs_encoding_ibm855_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM857 character. + */ +static const uint8_t rbs_encoding_ibm857_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM860 character. + */ +static const uint8_t rbs_encoding_ibm860_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM861 character. + */ +static const uint8_t rbs_encoding_ibm861_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM862 character. + */ +static const uint8_t rbs_encoding_ibm862_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM863 character. + */ +static const uint8_t rbs_encoding_ibm863_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM864 character. + */ +static const uint8_t rbs_encoding_ibm864_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM865 character. + */ +static const uint8_t rbs_encoding_ibm865_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM866 character. + */ +static const uint8_t rbs_encoding_ibm866_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding IBM869 character. + */ +static const uint8_t rbs_encoding_ibm869_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-1 character. + */ +static const uint8_t rbs_encoding_iso_8859_1_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-2 character. + */ +static const uint8_t rbs_encoding_iso_8859_2_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 7, + 0, + 7, + 0, + 7, + 7, + 0, + 0, + 7, + 7, + 7, + 7, + 0, + 7, + 7, // Ax + 0, + 3, + 0, + 3, + 0, + 3, + 3, + 0, + 0, + 3, + 3, + 3, + 3, + 0, + 3, + 3, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-3 character. + */ +static const uint8_t rbs_encoding_iso_8859_3_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 7, + 0, + 0, + 0, + 0, + 7, + 0, + 0, + 7, + 7, + 7, + 7, + 0, + 0, + 7, // Ax + 0, + 3, + 0, + 0, + 0, + 3, + 3, + 0, + 0, + 3, + 3, + 3, + 3, + 0, + 0, + 3, // Bx + 7, + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-4 character. + */ +static const uint8_t rbs_encoding_iso_8859_4_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 7, + 3, + 7, + 0, + 7, + 7, + 0, + 0, + 7, + 7, + 7, + 7, + 0, + 7, + 0, // Ax + 0, + 3, + 0, + 3, + 0, + 3, + 3, + 0, + 0, + 3, + 3, + 3, + 3, + 7, + 3, + 3, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-5 character. + */ +static const uint8_t rbs_encoding_iso_8859_5_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, // Ax + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-6 character. + */ +static const uint8_t rbs_encoding_iso_8859_6_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Cx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-7 character. + */ +static const uint8_t rbs_encoding_iso_8859_7_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 0, + 7, + 7, + 7, + 0, + 7, + 0, + 7, + 7, // Bx + 3, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, + 3, + 3, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-8 character. + */ +static const uint8_t rbs_encoding_iso_8859_8_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-9 character. + */ +static const uint8_t rbs_encoding_iso_8859_9_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-10 character. + */ +static const uint8_t rbs_encoding_iso_8859_10_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, // Ax + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-11 character. + */ +static const uint8_t rbs_encoding_iso_8859_11_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ax + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Bx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Cx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-13 character. + */ +static const uint8_t rbs_encoding_iso_8859_13_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 0, + 7, + 0, + 0, + 0, + 0, + 7, // Ax + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 3, + 0, + 3, + 0, + 0, + 0, + 0, + 3, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-14 character. + */ +static const uint8_t rbs_encoding_iso_8859_14_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 7, + 3, + 0, + 7, + 3, + 7, + 0, + 7, + 0, + 7, + 3, + 7, + 0, + 0, + 7, // Ax + 7, + 3, + 7, + 3, + 7, + 3, + 0, + 7, + 3, + 3, + 3, + 7, + 3, + 7, + 3, + 3, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-15 character. + */ +static const uint8_t rbs_encoding_iso_8859_15_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 0, + 3, + 0, + 3, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 7, + 3, + 0, + 0, + 3, + 0, + 3, + 0, + 7, + 3, + 7, + 0, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding ISO-8859-16 character. + */ +static const uint8_t rbs_encoding_iso_8859_16_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 7, + 3, + 7, + 0, + 0, + 7, + 0, + 3, + 0, + 7, + 0, + 7, + 0, + 3, + 7, // Ax + 0, + 0, + 7, + 3, + 7, + 0, + 0, + 0, + 3, + 3, + 3, + 0, + 7, + 3, + 7, + 3, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding KOI8-R character. + */ +static const uint8_t rbs_encoding_koi8_r_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 7, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Cx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Dx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Ex + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding KOI8-U character. + */ +static const uint8_t rbs_encoding_koi8_u_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 3, + 3, + 0, + 3, + 3, + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, // Ax + 0, + 0, + 0, + 7, + 7, + 0, + 7, + 7, + 0, + 0, + 0, + 0, + 0, + 7, + 0, + 0, // Bx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Cx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Dx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Ex + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macCentEuro character. + */ +static const uint8_t rbs_encoding_mac_cent_euro_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macCroatian character. + */ +static const uint8_t rbs_encoding_mac_croatian_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macCyrillic character. + */ +static const uint8_t rbs_encoding_mac_cyrillic_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macGreek character. + */ +static const uint8_t rbs_encoding_mac_greek_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macIceland character. + */ +static const uint8_t rbs_encoding_mac_iceland_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macRoman character. + */ +static const uint8_t rbs_encoding_mac_roman_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macRomania character. + */ +static const uint8_t rbs_encoding_mac_romania_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macThai character. + */ +static const uint8_t rbs_encoding_mac_thai_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding TIS-620 character. + */ +static const uint8_t rbs_encoding_tis_620_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ax + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Bx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Cx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macTurkish character. + */ +static const uint8_t rbs_encoding_mac_turkish_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding macUkraine character. + */ +static const uint8_t rbs_encoding_mac_ukraine_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-1250 character. + */ +static const uint8_t rbs_encoding_windows_1250_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 0, + 7, + 7, + 7, + 7, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 3, + 3, + 3, + 3, // 9x + 0, + 0, + 0, + 7, + 0, + 7, + 0, + 0, + 0, + 0, + 7, + 0, + 0, + 0, + 0, + 7, // Ax + 0, + 0, + 0, + 3, + 0, + 3, + 0, + 0, + 0, + 3, + 3, + 0, + 7, + 0, + 3, + 3, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-1251 character. + */ +static const uint8_t rbs_encoding_windows_1251_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 7, + 7, + 0, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 0, + 7, + 7, + 7, + 7, // 8x + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 3, + 3, + 3, + 3, // 9x + 0, + 7, + 3, + 7, + 0, + 7, + 0, + 0, + 7, + 0, + 7, + 0, + 0, + 0, + 0, + 7, // Ax + 0, + 0, + 7, + 3, + 3, + 3, + 0, + 0, + 3, + 0, + 3, + 0, + 3, + 7, + 3, + 3, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-1252 character. + */ +static const uint8_t rbs_encoding_windows_1252_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 0, + 7, + 0, + 7, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 3, + 0, + 3, + 7, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-1253 character. + */ +static const uint8_t rbs_encoding_windows_1253_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 7, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 3, + 7, + 0, + 7, + 7, + 7, + 0, + 7, + 0, + 7, + 7, // Bx + 3, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, + 3, + 3, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-1254 character. + */ +static const uint8_t rbs_encoding_windows_1254_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 0, + 7, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 3, + 0, + 0, + 7, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-1255 character. + */ +static const uint8_t rbs_encoding_windows_1255_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-1256 character. + */ +static const uint8_t rbs_encoding_windows_1256_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Cx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-1257 character. + */ +static const uint8_t rbs_encoding_windows_1257_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 7, + 0, + 7, + 0, + 0, + 0, + 0, + 7, // Ax + 0, + 0, + 0, + 0, + 0, + 3, + 0, + 0, + 3, + 0, + 3, + 0, + 0, + 0, + 0, + 3, // Bx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // Cx + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 3, // Dx + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // Ex + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-1258 character. + */ +static const uint8_t rbs_encoding_windows_1258_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +/** + * Each element of the following table contains a bitfield that indicates a + * piece of information about the corresponding windows-874 character. + */ +static const uint8_t rbs_encoding_windows_874_table[256] = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 0x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 1x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 2x + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 2, + 0, + 0, + 0, + 0, + 0, + 0, // 3x + 0, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, // 4x + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 7, + 0, + 0, + 0, + 0, + 0, // 5x + 0, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, // 6x + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 3, + 0, + 0, + 0, + 0, + 0, // 7x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 8x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // 9x + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ax + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Bx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Cx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Dx + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Ex + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, // Fx +}; + +#define RBS_ENCODING_TABLE(name) \ + static size_t rbs_encoding_##name##_alpha_char(const uint8_t *b, RBS_ATTRIBUTE_UNUSED ptrdiff_t n) { \ + return (rbs_encoding_##name##_table[*b] & RBS_ENCODING_ALPHABETIC_BIT); \ + } \ + static size_t rbs_encoding_##name##_alnum_char(const uint8_t *b, RBS_ATTRIBUTE_UNUSED ptrdiff_t n) { \ + return (rbs_encoding_##name##_table[*b] & RBS_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; \ + } \ + static bool rbs_encoding_##name##_isupper_char(const uint8_t *b, RBS_ATTRIBUTE_UNUSED ptrdiff_t n) { \ + return (rbs_encoding_##name##_table[*b] & RBS_ENCODING_UPPERCASE_BIT); \ + } + +RBS_ENCODING_TABLE(cp850) +RBS_ENCODING_TABLE(cp852) +RBS_ENCODING_TABLE(cp855) +RBS_ENCODING_TABLE(gb1988) +RBS_ENCODING_TABLE(ibm437) +RBS_ENCODING_TABLE(ibm720) +RBS_ENCODING_TABLE(ibm737) +RBS_ENCODING_TABLE(ibm775) +RBS_ENCODING_TABLE(ibm852) +RBS_ENCODING_TABLE(ibm855) +RBS_ENCODING_TABLE(ibm857) +RBS_ENCODING_TABLE(ibm860) +RBS_ENCODING_TABLE(ibm861) +RBS_ENCODING_TABLE(ibm862) +RBS_ENCODING_TABLE(ibm863) +RBS_ENCODING_TABLE(ibm864) +RBS_ENCODING_TABLE(ibm865) +RBS_ENCODING_TABLE(ibm866) +RBS_ENCODING_TABLE(ibm869) +RBS_ENCODING_TABLE(iso_8859_1) +RBS_ENCODING_TABLE(iso_8859_2) +RBS_ENCODING_TABLE(iso_8859_3) +RBS_ENCODING_TABLE(iso_8859_4) +RBS_ENCODING_TABLE(iso_8859_5) +RBS_ENCODING_TABLE(iso_8859_6) +RBS_ENCODING_TABLE(iso_8859_7) +RBS_ENCODING_TABLE(iso_8859_8) +RBS_ENCODING_TABLE(iso_8859_9) +RBS_ENCODING_TABLE(iso_8859_10) +RBS_ENCODING_TABLE(iso_8859_11) +RBS_ENCODING_TABLE(iso_8859_13) +RBS_ENCODING_TABLE(iso_8859_14) +RBS_ENCODING_TABLE(iso_8859_15) +RBS_ENCODING_TABLE(iso_8859_16) +RBS_ENCODING_TABLE(koi8_r) +RBS_ENCODING_TABLE(koi8_u) +RBS_ENCODING_TABLE(mac_cent_euro) +RBS_ENCODING_TABLE(mac_croatian) +RBS_ENCODING_TABLE(mac_cyrillic) +RBS_ENCODING_TABLE(mac_greek) +RBS_ENCODING_TABLE(mac_iceland) +RBS_ENCODING_TABLE(mac_roman) +RBS_ENCODING_TABLE(mac_romania) +RBS_ENCODING_TABLE(mac_thai) +RBS_ENCODING_TABLE(mac_turkish) +RBS_ENCODING_TABLE(mac_ukraine) +RBS_ENCODING_TABLE(tis_620) +RBS_ENCODING_TABLE(windows_1250) +RBS_ENCODING_TABLE(windows_1251) +RBS_ENCODING_TABLE(windows_1252) +RBS_ENCODING_TABLE(windows_1253) +RBS_ENCODING_TABLE(windows_1254) +RBS_ENCODING_TABLE(windows_1255) +RBS_ENCODING_TABLE(windows_1256) +RBS_ENCODING_TABLE(windows_1257) +RBS_ENCODING_TABLE(windows_1258) +RBS_ENCODING_TABLE(windows_874) + +#undef RBS_ENCODING_TABLE +#endif + +/** + * Returns the size of the next character in the ASCII encoding. This basically + * means that if the top bit is not set, the character is 1 byte long. + */ +static size_t +rbs_encoding_ascii_char_width(const uint8_t *b, RBS_ATTRIBUTE_UNUSED ptrdiff_t n) { + return *b < 0x80 ? 1 : 0; +} + +/** + * Return the size of the next character in the ASCII encoding if it is an + * alphabetical character. + */ +static size_t +rbs_encoding_ascii_alpha_char(const uint8_t *b, RBS_ATTRIBUTE_UNUSED ptrdiff_t n) { + return (rbs_encoding_ascii_table[*b] & RBS_ENCODING_ALPHABETIC_BIT); +} + +/** + * Certain encodings are equivalent to ASCII below 0x80, so it works for our + * purposes to have a function here that first checks the bounds and then falls + * back to checking the ASCII lookup table. + */ +static size_t +rbs_encoding_ascii_alpha_char_7bit(const uint8_t *b, ptrdiff_t n) { + return (*b < 0x80) ? rbs_encoding_ascii_alpha_char(b, n) : 0; +} + +/** + * Return the size of the next character in the ASCII encoding if it is an + * alphanumeric character. + */ +static size_t +rbs_encoding_ascii_alnum_char(const uint8_t *b, RBS_ATTRIBUTE_UNUSED ptrdiff_t n) { + return (rbs_encoding_ascii_table[*b] & RBS_ENCODING_ALPHANUMERIC_BIT) ? 1 : 0; +} + +/** + * Certain encodings are equivalent to ASCII below 0x80, so it works for our + * purposes to have a function here that first checks the bounds and then falls + * back to checking the ASCII lookup table. + */ +static size_t +rbs_encoding_ascii_alnum_char_7bit(const uint8_t *b, ptrdiff_t n) { + return (*b < 0x80) ? rbs_encoding_ascii_alnum_char(b, n) : 0; +} + +/** + * Return true if the next character in the ASCII encoding if it is an uppercase + * character. + */ +static bool +rbs_encoding_ascii_isupper_char(const uint8_t *b, RBS_ATTRIBUTE_UNUSED ptrdiff_t n) { + return (rbs_encoding_ascii_table[*b] & RBS_ENCODING_UPPERCASE_BIT); +} + +/** + * For a lot of encodings the default is that they are a single byte long no + * matter what the codepoint, so this function is shared between them. + */ +static size_t +rbs_encoding_single_char_width(RBS_ATTRIBUTE_UNUSED const uint8_t *b, RBS_ATTRIBUTE_UNUSED ptrdiff_t n) { + return 1; +} + +/** + * Returns the size of the next character in the EUC-JP encoding, or 0 if a + * character cannot be decoded from the given bytes. + */ +static size_t +rbs_encoding_euc_jp_char_width(const uint8_t *b, ptrdiff_t n) { + // These are the single byte characters. + if (*b < 0x80) { + return 1; + } + + // These are the double byte characters. + if ((n > 1) && ((b[0] == 0x8E) || (b[0] >= 0xA1 && b[0] <= 0xFE)) && (b[1] >= 0xA1 && b[1] <= 0xFE)) { + return 2; + } + + // These are the triple byte characters. + if ((n > 2) && (b[0] == 0x8F) && (b[1] >= 0xA1 && b[2] <= 0xFE) && (b[2] >= 0xA1 && b[2] <= 0xFE)) { + return 3; + } + + return 0; +} + +/** + * Returns the size of the next character in the EUC-JP encoding if it is an + * uppercase character. + */ +static bool +rbs_encoding_euc_jp_isupper_char(const uint8_t *b, ptrdiff_t n) { + size_t width = rbs_encoding_euc_jp_char_width(b, n); + + if (width == 1) { + return rbs_encoding_ascii_isupper_char(b, n); + } else if (width == 2) { + return ( + (b[0] == 0xA3 && b[1] >= 0xC1 && b[1] <= 0xDA) || + (b[0] == 0xA6 && b[1] >= 0xA1 && b[1] <= 0xB8) || + (b[0] == 0xA7 && b[1] >= 0xA1 && b[1] <= 0xC1) + ); + } else { + return false; + } +} + +/** + * Returns the size of the next character in the Shift_JIS encoding, or 0 if a + * character cannot be decoded from the given bytes. + */ +static size_t +rbs_encoding_shift_jis_char_width(const uint8_t *b, ptrdiff_t n) { + // These are the single byte characters. + if (b[0] < 0x80 || (b[0] >= 0xA1 && b[0] <= 0xDF)) { + return 1; + } + + // These are the double byte characters. + if ((n > 1) && ((b[0] >= 0x81 && b[0] <= 0x9F) || (b[0] >= 0xE0 && b[0] <= 0xFC)) && (b[1] >= 0x40 && b[1] <= 0xFC && b[1] != 0x7F)) { + return 2; + } + + return 0; +} + +/** + * Returns the size of the next character in the Shift_JIS encoding if it is an + * alphanumeric character. + */ +static size_t +rbs_encoding_shift_jis_alnum_char(const uint8_t *b, ptrdiff_t n) { + size_t width = rbs_encoding_shift_jis_char_width(b, n); + return width == 1 ? ((b[0] >= 0x80) || rbs_encoding_ascii_alnum_char(b, n)) : width; +} + +/** + * Returns the size of the next character in the Shift_JIS encoding if it is an + * alphabetical character. + */ +static size_t +rbs_encoding_shift_jis_alpha_char(const uint8_t *b, ptrdiff_t n) { + size_t width = rbs_encoding_shift_jis_char_width(b, n); + return width == 1 ? ((b[0] >= 0x80) || rbs_encoding_ascii_alpha_char(b, n)) : width; +} + +/** + * Returns the size of the next character in the Shift_JIS encoding if it is an + * uppercase character. + */ +static bool +rbs_encoding_shift_jis_isupper_char(const uint8_t *b, ptrdiff_t n) { + size_t width = rbs_encoding_shift_jis_char_width(b, n); + + if (width == 1) { + return rbs_encoding_ascii_isupper_char(b, n); + } else if (width == 2) { + return ( + ((b[0] == 0x82) && (b[1] >= 0x60 && b[1] <= 0x79)) || + ((b[0] == 0x83) && (b[1] >= 0x9F && b[1] <= 0xB6)) || + ((b[0] == 0x84) && (b[1] >= 0x40 && b[1] <= 0x60)) + ); + } else { + return width; + } +} + +#ifndef RBS_ENCODING_EXCLUDE_FULL + +/** + * Certain encodings are equivalent to ASCII below 0x80, so it works for our + * purposes to have a function here that first checks the bounds and then falls + * back to checking the ASCII lookup table. + */ +static bool +rbs_encoding_ascii_isupper_char_7bit(const uint8_t *b, ptrdiff_t n) { + return (*b < 0x80) && rbs_encoding_ascii_isupper_char(b, n); +} + +/** + * Returns the size of the next character in the Big5 encoding, or 0 if a + * character cannot be decoded from the given bytes. + */ +static size_t +rbs_encoding_big5_char_width(const uint8_t *b, ptrdiff_t n) { + // These are the single byte characters. + if (*b < 0x80) { + return 1; + } + + // These are the double byte characters. + if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && ((b[1] >= 0x40 && b[1] <= 0x7E) || (b[1] >= 0xA1 && b[1] <= 0xFE))) { + return 2; + } + + return 0; +} + +/** + * Returns the size of the next character in the CP949 encoding, or 0 if a + * character cannot be decoded from the given bytes. + */ +static size_t +rbs_encoding_cp949_char_width(const uint8_t *b, ptrdiff_t n) { + // These are the single byte characters + if (*b <= 0x80) { + return 1; + } + + // These are the double byte characters + if ((n > 1) && (b[0] >= 0x81 && b[0] <= 0xFE) && ((b[1] >= 0x41 && b[1] <= 0x5A) || (b[1] >= 0x61 && b[1] <= 0x7A) || (b[1] >= 0x81 && b[1] <= 0xFE))) { + return 2; + } + + return 0; +} + +/** + * Returns the size of the next character in the Emacs MULE encoding, or 0 if a + * character cannot be decoded from the given bytes. + */ +static size_t +rbs_encoding_emacs_mule_char_width(const uint8_t *b, ptrdiff_t n) { + // These are the 1 byte characters. + if (*b < 0x80) { + return 1; + } + + // These are the 2 byte characters. + if ((n > 1) && (b[0] >= 0x81 && b[0] <= 0x8F) && (b[1] >= 0xA0)) { + return 2; + } + + // These are the 3 byte characters. + if ( + (n > 2) && + (((b[0] >= 0x90 && b[0] <= 0x99) && (b[1] >= 0xA0)) || + ((b[0] == 0x9A || b[0] == 0x9B) && (b[1] >= 0xE0 && b[1] <= 0xEF))) && + (b[2] >= 0xA0) + ) { + return 3; + } + + // These are the 4 byte characters. + if ( + (n > 3) && + (((b[0] == 0x9C) && (b[1] >= 0xF0) && (b[1] <= 0xF4)) || + ((b[0] == 0x9D) && (b[1] >= 0xF5) && (b[1] <= 0xFE))) && + (b[2] >= 0xA0) && (b[3] >= 0xA0) + ) { + return 4; + } + + return 0; +} + +/** + * Returns the size of the next character in the EUC-KR encoding, or 0 if a + * character cannot be decoded from the given bytes. + */ +static size_t +rbs_encoding_euc_kr_char_width(const uint8_t *b, ptrdiff_t n) { + // These are the single byte characters. + if (*b < 0x80) { + return 1; + } + + // These are the double byte characters. + if ((n > 1) && (b[0] >= 0xA1 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE)) { + return 2; + } + + return 0; +} + +/** + * Returns the size of the next character in the EUC-TW encoding, or 0 if a + * character cannot be decoded from the given bytes. + */ +static size_t +rbs_encoding_euc_tw_char_width(const uint8_t *b, ptrdiff_t n) { + // These are the single byte characters. + if (*b < 0x80) { + return 1; + } + + // These are the double byte characters. + if ((n > 1) && (b[0] >= 0xA1) && (b[0] <= 0xFE) && (b[1] >= 0xA1) && (b[1] <= 0xFE)) { + return 2; + } + + // These are the quadruple byte characters. + if ((n > 3) && (b[0] == 0x8E) && (b[1] >= 0xA1) && (b[1] <= 0xB0) && (b[2] >= 0xA1) && (b[2] <= 0xFE) && (b[3] >= 0xA1) && (b[3] <= 0xFE)) { + return 4; + } + + return 0; +} + +/** + * Returns the size of the next character in the GB18030 encoding, or 0 if a + * character cannot be decoded from the given bytes. + */ +static size_t +rbs_encoding_gb18030_char_width(const uint8_t *b, ptrdiff_t n) { + // These are the 1 byte characters. + if (*b < 0x80) { + return 1; + } + + // These are the 2 byte characters. + if ((n > 1) && (b[0] >= 0x81 && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xFE && b[1] != 0x7F)) { + return 2; + } + + // These are the 4 byte characters. + if ((n > 3) && ((b[0] >= 0x81 && b[0] <= 0xFE) && (b[1] >= 0x30 && b[1] <= 0x39) && (b[2] >= 0x81 && b[2] <= 0xFE) && (b[3] >= 0x30 && b[3] <= 0x39))) { + return 4; + } + + return 0; +} + +/** + * Returns the size of the next character in the GBK encoding, or 0 if a + * character cannot be decoded from the given bytes. + */ +static size_t +rbs_encoding_gbk_char_width(const uint8_t *b, ptrdiff_t n) { + // These are the single byte characters. + if (*b <= 0x80) { + return 1; + } + + // These are the double byte characters. + if ( + (n > 1) && + (((b[0] >= 0xA1 && b[0] <= 0xA9) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/1 + ((b[0] >= 0xB0 && b[0] <= 0xF7) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // GBK/2 + ((b[0] >= 0x81 && b[0] <= 0xA0) && (b[1] >= 0x40 && b[1] <= 0xFE) && (b[1] != 0x7F)) || // GBK/3 + ((b[0] >= 0xAA && b[0] <= 0xFE) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/4 + ((b[0] >= 0xA8 && b[0] <= 0xA9) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) || // GBK/5 + ((b[0] >= 0xAA && b[0] <= 0xAF) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 1 + ((b[0] >= 0xF8 && b[0] <= 0xFE) && (b[1] >= 0xA1 && b[1] <= 0xFE)) || // user-defined 2 + ((b[0] >= 0xA1 && b[0] <= 0xA7) && (b[1] >= 0x40 && b[1] <= 0xA0) && (b[1] != 0x7F)) // user-defined 3 + ) + ) { + return 2; + } + + return 0; +} + +#endif + +/** + * This is the table of all of the encodings that prism supports. + */ +const rbs_encoding_t rbs_encodings[] = { + [RBS_ENCODING_UTF_8] = { + .name = "UTF-8", + .char_width = rbs_encoding_utf_8_char_width, + .alnum_char = rbs_encoding_utf_8_alnum_char, + .alpha_char = rbs_encoding_utf_8_alpha_char, + .isupper_char = rbs_encoding_utf_8_isupper_char, + .multibyte = true }, + [RBS_ENCODING_US_ASCII] = { .name = "US-ASCII", .char_width = rbs_encoding_ascii_char_width, .alnum_char = rbs_encoding_ascii_alnum_char, .alpha_char = rbs_encoding_ascii_alpha_char, .isupper_char = rbs_encoding_ascii_isupper_char, .multibyte = false }, + [RBS_ENCODING_ASCII_8BIT] = { .name = "ASCII-8BIT", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ascii_alnum_char, .alpha_char = rbs_encoding_ascii_alpha_char, .isupper_char = rbs_encoding_ascii_isupper_char, .multibyte = false }, + [RBS_ENCODING_EUC_JP] = { .name = "EUC-JP", .char_width = rbs_encoding_euc_jp_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_euc_jp_isupper_char, .multibyte = true }, + [RBS_ENCODING_WINDOWS_31J] = { .name = "Windows-31J", .char_width = rbs_encoding_shift_jis_char_width, .alnum_char = rbs_encoding_shift_jis_alnum_char, .alpha_char = rbs_encoding_shift_jis_alpha_char, .isupper_char = rbs_encoding_shift_jis_isupper_char, .multibyte = true }, + +#ifndef RBS_ENCODING_EXCLUDE_FULL + [RBS_ENCODING_BIG5] = { .name = "Big5", .char_width = rbs_encoding_big5_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_BIG5_HKSCS] = { .name = "Big5-HKSCS", .char_width = rbs_encoding_big5_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_BIG5_UAO] = { .name = "Big5-UAO", .char_width = rbs_encoding_big5_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_CESU_8] = { .name = "CESU-8", .char_width = rbs_encoding_cesu_8_char_width, .alnum_char = rbs_encoding_cesu_8_alnum_char, .alpha_char = rbs_encoding_cesu_8_alpha_char, .isupper_char = rbs_encoding_cesu_8_isupper_char, .multibyte = true }, + [RBS_ENCODING_CP51932] = { .name = "CP51932", .char_width = rbs_encoding_euc_jp_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_euc_jp_isupper_char, .multibyte = true }, + [RBS_ENCODING_CP850] = { .name = "CP850", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_cp850_alnum_char, .alpha_char = rbs_encoding_cp850_alpha_char, .isupper_char = rbs_encoding_cp850_isupper_char, .multibyte = false }, + [RBS_ENCODING_CP852] = { .name = "CP852", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_cp852_alnum_char, .alpha_char = rbs_encoding_cp852_alpha_char, .isupper_char = rbs_encoding_cp852_isupper_char, .multibyte = false }, + [RBS_ENCODING_CP855] = { .name = "CP855", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_cp855_alnum_char, .alpha_char = rbs_encoding_cp855_alpha_char, .isupper_char = rbs_encoding_cp855_isupper_char, .multibyte = false }, + [RBS_ENCODING_CP949] = { .name = "CP949", .char_width = rbs_encoding_cp949_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_CP950] = { .name = "CP950", .char_width = rbs_encoding_big5_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_CP951] = { .name = "CP951", .char_width = rbs_encoding_big5_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_EMACS_MULE] = { .name = "Emacs-Mule", .char_width = rbs_encoding_emacs_mule_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_EUC_JP_MS] = { .name = "eucJP-ms", .char_width = rbs_encoding_euc_jp_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_euc_jp_isupper_char, .multibyte = true }, + [RBS_ENCODING_EUC_JIS_2004] = { .name = "EUC-JIS-2004", .char_width = rbs_encoding_euc_jp_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_euc_jp_isupper_char, .multibyte = true }, + [RBS_ENCODING_EUC_KR] = { .name = "EUC-KR", .char_width = rbs_encoding_euc_kr_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_EUC_TW] = { .name = "EUC-TW", .char_width = rbs_encoding_euc_tw_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_GB12345] = { .name = "GB12345", .char_width = rbs_encoding_euc_kr_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_GB18030] = { .name = "GB18030", .char_width = rbs_encoding_gb18030_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_GB1988] = { .name = "GB1988", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_gb1988_alnum_char, .alpha_char = rbs_encoding_gb1988_alpha_char, .isupper_char = rbs_encoding_gb1988_isupper_char, .multibyte = false }, + [RBS_ENCODING_GB2312] = { .name = "GB2312", .char_width = rbs_encoding_euc_kr_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_GBK] = { .name = "GBK", .char_width = rbs_encoding_gbk_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_IBM437] = { .name = "IBM437", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm437_alnum_char, .alpha_char = rbs_encoding_ibm437_alpha_char, .isupper_char = rbs_encoding_ibm437_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM720] = { .name = "IBM720", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm720_alnum_char, .alpha_char = rbs_encoding_ibm720_alpha_char, .isupper_char = rbs_encoding_ibm720_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM737] = { .name = "IBM737", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm737_alnum_char, .alpha_char = rbs_encoding_ibm737_alpha_char, .isupper_char = rbs_encoding_ibm737_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM775] = { .name = "IBM775", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm775_alnum_char, .alpha_char = rbs_encoding_ibm775_alpha_char, .isupper_char = rbs_encoding_ibm775_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM852] = { .name = "IBM852", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm852_alnum_char, .alpha_char = rbs_encoding_ibm852_alpha_char, .isupper_char = rbs_encoding_ibm852_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM855] = { .name = "IBM855", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm855_alnum_char, .alpha_char = rbs_encoding_ibm855_alpha_char, .isupper_char = rbs_encoding_ibm855_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM857] = { .name = "IBM857", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm857_alnum_char, .alpha_char = rbs_encoding_ibm857_alpha_char, .isupper_char = rbs_encoding_ibm857_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM860] = { .name = "IBM860", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm860_alnum_char, .alpha_char = rbs_encoding_ibm860_alpha_char, .isupper_char = rbs_encoding_ibm860_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM861] = { .name = "IBM861", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm861_alnum_char, .alpha_char = rbs_encoding_ibm861_alpha_char, .isupper_char = rbs_encoding_ibm861_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM862] = { .name = "IBM862", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm862_alnum_char, .alpha_char = rbs_encoding_ibm862_alpha_char, .isupper_char = rbs_encoding_ibm862_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM863] = { .name = "IBM863", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm863_alnum_char, .alpha_char = rbs_encoding_ibm863_alpha_char, .isupper_char = rbs_encoding_ibm863_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM864] = { .name = "IBM864", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm864_alnum_char, .alpha_char = rbs_encoding_ibm864_alpha_char, .isupper_char = rbs_encoding_ibm864_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM865] = { .name = "IBM865", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm865_alnum_char, .alpha_char = rbs_encoding_ibm865_alpha_char, .isupper_char = rbs_encoding_ibm865_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM866] = { .name = "IBM866", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm866_alnum_char, .alpha_char = rbs_encoding_ibm866_alpha_char, .isupper_char = rbs_encoding_ibm866_isupper_char, .multibyte = false }, + [RBS_ENCODING_IBM869] = { .name = "IBM869", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_ibm869_alnum_char, .alpha_char = rbs_encoding_ibm869_alpha_char, .isupper_char = rbs_encoding_ibm869_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_1] = { .name = "ISO-8859-1", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_1_alnum_char, .alpha_char = rbs_encoding_iso_8859_1_alpha_char, .isupper_char = rbs_encoding_iso_8859_1_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_2] = { .name = "ISO-8859-2", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_2_alnum_char, .alpha_char = rbs_encoding_iso_8859_2_alpha_char, .isupper_char = rbs_encoding_iso_8859_2_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_3] = { .name = "ISO-8859-3", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_3_alnum_char, .alpha_char = rbs_encoding_iso_8859_3_alpha_char, .isupper_char = rbs_encoding_iso_8859_3_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_4] = { .name = "ISO-8859-4", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_4_alnum_char, .alpha_char = rbs_encoding_iso_8859_4_alpha_char, .isupper_char = rbs_encoding_iso_8859_4_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_5] = { .name = "ISO-8859-5", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_5_alnum_char, .alpha_char = rbs_encoding_iso_8859_5_alpha_char, .isupper_char = rbs_encoding_iso_8859_5_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_6] = { .name = "ISO-8859-6", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_6_alnum_char, .alpha_char = rbs_encoding_iso_8859_6_alpha_char, .isupper_char = rbs_encoding_iso_8859_6_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_7] = { .name = "ISO-8859-7", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_7_alnum_char, .alpha_char = rbs_encoding_iso_8859_7_alpha_char, .isupper_char = rbs_encoding_iso_8859_7_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_8] = { .name = "ISO-8859-8", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_8_alnum_char, .alpha_char = rbs_encoding_iso_8859_8_alpha_char, .isupper_char = rbs_encoding_iso_8859_8_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_9] = { .name = "ISO-8859-9", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_9_alnum_char, .alpha_char = rbs_encoding_iso_8859_9_alpha_char, .isupper_char = rbs_encoding_iso_8859_9_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_10] = { .name = "ISO-8859-10", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_10_alnum_char, .alpha_char = rbs_encoding_iso_8859_10_alpha_char, .isupper_char = rbs_encoding_iso_8859_10_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_11] = { .name = "ISO-8859-11", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_11_alnum_char, .alpha_char = rbs_encoding_iso_8859_11_alpha_char, .isupper_char = rbs_encoding_iso_8859_11_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_13] = { .name = "ISO-8859-13", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_13_alnum_char, .alpha_char = rbs_encoding_iso_8859_13_alpha_char, .isupper_char = rbs_encoding_iso_8859_13_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_14] = { .name = "ISO-8859-14", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_14_alnum_char, .alpha_char = rbs_encoding_iso_8859_14_alpha_char, .isupper_char = rbs_encoding_iso_8859_14_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_15] = { .name = "ISO-8859-15", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_15_alnum_char, .alpha_char = rbs_encoding_iso_8859_15_alpha_char, .isupper_char = rbs_encoding_iso_8859_15_isupper_char, .multibyte = false }, + [RBS_ENCODING_ISO_8859_16] = { .name = "ISO-8859-16", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_iso_8859_16_alnum_char, .alpha_char = rbs_encoding_iso_8859_16_alpha_char, .isupper_char = rbs_encoding_iso_8859_16_isupper_char, .multibyte = false }, + [RBS_ENCODING_KOI8_R] = { .name = "KOI8-R", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_koi8_r_alnum_char, .alpha_char = rbs_encoding_koi8_r_alpha_char, .isupper_char = rbs_encoding_koi8_r_isupper_char, .multibyte = false }, + [RBS_ENCODING_KOI8_U] = { .name = "KOI8-U", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_koi8_u_alnum_char, .alpha_char = rbs_encoding_koi8_u_alpha_char, .isupper_char = rbs_encoding_koi8_u_isupper_char, .multibyte = false }, + [RBS_ENCODING_MAC_CENT_EURO] = { .name = "macCentEuro", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_mac_cent_euro_alnum_char, .alpha_char = rbs_encoding_mac_cent_euro_alpha_char, .isupper_char = rbs_encoding_mac_cent_euro_isupper_char, .multibyte = false }, + [RBS_ENCODING_MAC_CROATIAN] = { .name = "macCroatian", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_mac_croatian_alnum_char, .alpha_char = rbs_encoding_mac_croatian_alpha_char, .isupper_char = rbs_encoding_mac_croatian_isupper_char, .multibyte = false }, + [RBS_ENCODING_MAC_CYRILLIC] = { .name = "macCyrillic", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_mac_cyrillic_alnum_char, .alpha_char = rbs_encoding_mac_cyrillic_alpha_char, .isupper_char = rbs_encoding_mac_cyrillic_isupper_char, .multibyte = false }, + [RBS_ENCODING_MAC_GREEK] = { .name = "macGreek", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_mac_greek_alnum_char, .alpha_char = rbs_encoding_mac_greek_alpha_char, .isupper_char = rbs_encoding_mac_greek_isupper_char, .multibyte = false }, + [RBS_ENCODING_MAC_ICELAND] = { .name = "macIceland", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_mac_iceland_alnum_char, .alpha_char = rbs_encoding_mac_iceland_alpha_char, .isupper_char = rbs_encoding_mac_iceland_isupper_char, .multibyte = false }, + [RBS_ENCODING_MAC_JAPANESE] = { .name = "MacJapanese", .char_width = rbs_encoding_shift_jis_char_width, .alnum_char = rbs_encoding_shift_jis_alnum_char, .alpha_char = rbs_encoding_shift_jis_alpha_char, .isupper_char = rbs_encoding_shift_jis_isupper_char, .multibyte = true }, + [RBS_ENCODING_MAC_ROMAN] = { .name = "macRoman", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_mac_roman_alnum_char, .alpha_char = rbs_encoding_mac_roman_alpha_char, .isupper_char = rbs_encoding_mac_roman_isupper_char, .multibyte = false }, + [RBS_ENCODING_MAC_ROMANIA] = { .name = "macRomania", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_mac_romania_alnum_char, .alpha_char = rbs_encoding_mac_romania_alpha_char, .isupper_char = rbs_encoding_mac_romania_isupper_char, .multibyte = false }, + [RBS_ENCODING_MAC_THAI] = { .name = "macThai", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_mac_thai_alnum_char, .alpha_char = rbs_encoding_mac_thai_alpha_char, .isupper_char = rbs_encoding_mac_thai_isupper_char, .multibyte = false }, + [RBS_ENCODING_MAC_TURKISH] = { .name = "macTurkish", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_mac_turkish_alnum_char, .alpha_char = rbs_encoding_mac_turkish_alpha_char, .isupper_char = rbs_encoding_mac_turkish_isupper_char, .multibyte = false }, + [RBS_ENCODING_MAC_UKRAINE] = { .name = "macUkraine", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_mac_ukraine_alnum_char, .alpha_char = rbs_encoding_mac_ukraine_alpha_char, .isupper_char = rbs_encoding_mac_ukraine_isupper_char, .multibyte = false }, + [RBS_ENCODING_SHIFT_JIS] = { .name = "Shift_JIS", .char_width = rbs_encoding_shift_jis_char_width, .alnum_char = rbs_encoding_shift_jis_alnum_char, .alpha_char = rbs_encoding_shift_jis_alpha_char, .isupper_char = rbs_encoding_shift_jis_isupper_char, .multibyte = true }, + [RBS_ENCODING_SJIS_DOCOMO] = { .name = "SJIS-DoCoMo", .char_width = rbs_encoding_shift_jis_char_width, .alnum_char = rbs_encoding_shift_jis_alnum_char, .alpha_char = rbs_encoding_shift_jis_alpha_char, .isupper_char = rbs_encoding_shift_jis_isupper_char, .multibyte = true }, + [RBS_ENCODING_SJIS_KDDI] = { .name = "SJIS-KDDI", .char_width = rbs_encoding_shift_jis_char_width, .alnum_char = rbs_encoding_shift_jis_alnum_char, .alpha_char = rbs_encoding_shift_jis_alpha_char, .isupper_char = rbs_encoding_shift_jis_isupper_char, .multibyte = true }, + [RBS_ENCODING_SJIS_SOFTBANK] = { .name = "SJIS-SoftBank", .char_width = rbs_encoding_shift_jis_char_width, .alnum_char = rbs_encoding_shift_jis_alnum_char, .alpha_char = rbs_encoding_shift_jis_alpha_char, .isupper_char = rbs_encoding_shift_jis_isupper_char, .multibyte = true }, + [RBS_ENCODING_STATELESS_ISO_2022_JP] = { .name = "stateless-ISO-2022-JP", .char_width = rbs_encoding_emacs_mule_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_STATELESS_ISO_2022_JP_KDDI] = { .name = "stateless-ISO-2022-JP-KDDI", .char_width = rbs_encoding_emacs_mule_char_width, .alnum_char = rbs_encoding_ascii_alnum_char_7bit, .alpha_char = rbs_encoding_ascii_alpha_char_7bit, .isupper_char = rbs_encoding_ascii_isupper_char_7bit, .multibyte = true }, + [RBS_ENCODING_TIS_620] = { .name = "TIS-620", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_tis_620_alnum_char, .alpha_char = rbs_encoding_tis_620_alpha_char, .isupper_char = rbs_encoding_tis_620_isupper_char, .multibyte = false }, + [RBS_ENCODING_UTF8_MAC] = { .name = "UTF8-MAC", .char_width = rbs_encoding_utf_8_char_width, .alnum_char = rbs_encoding_utf_8_alnum_char, .alpha_char = rbs_encoding_utf_8_alpha_char, .isupper_char = rbs_encoding_utf_8_isupper_char, .multibyte = true }, + [RBS_ENCODING_UTF8_DOCOMO] = { .name = "UTF8-DoCoMo", .char_width = rbs_encoding_utf_8_char_width, .alnum_char = rbs_encoding_utf_8_alnum_char, .alpha_char = rbs_encoding_utf_8_alpha_char, .isupper_char = rbs_encoding_utf_8_isupper_char, .multibyte = true }, + [RBS_ENCODING_UTF8_KDDI] = { .name = "UTF8-KDDI", .char_width = rbs_encoding_utf_8_char_width, .alnum_char = rbs_encoding_utf_8_alnum_char, .alpha_char = rbs_encoding_utf_8_alpha_char, .isupper_char = rbs_encoding_utf_8_isupper_char, .multibyte = true }, + [RBS_ENCODING_UTF8_SOFTBANK] = { .name = "UTF8-SoftBank", .char_width = rbs_encoding_utf_8_char_width, .alnum_char = rbs_encoding_utf_8_alnum_char, .alpha_char = rbs_encoding_utf_8_alpha_char, .isupper_char = rbs_encoding_utf_8_isupper_char, .multibyte = true }, + [RBS_ENCODING_WINDOWS_1250] = { .name = "Windows-1250", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_windows_1250_alnum_char, .alpha_char = rbs_encoding_windows_1250_alpha_char, .isupper_char = rbs_encoding_windows_1250_isupper_char, .multibyte = false }, + [RBS_ENCODING_WINDOWS_1251] = { .name = "Windows-1251", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_windows_1251_alnum_char, .alpha_char = rbs_encoding_windows_1251_alpha_char, .isupper_char = rbs_encoding_windows_1251_isupper_char, .multibyte = false }, + [RBS_ENCODING_WINDOWS_1252] = { .name = "Windows-1252", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_windows_1252_alnum_char, .alpha_char = rbs_encoding_windows_1252_alpha_char, .isupper_char = rbs_encoding_windows_1252_isupper_char, .multibyte = false }, + [RBS_ENCODING_WINDOWS_1253] = { .name = "Windows-1253", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_windows_1253_alnum_char, .alpha_char = rbs_encoding_windows_1253_alpha_char, .isupper_char = rbs_encoding_windows_1253_isupper_char, .multibyte = false }, + [RBS_ENCODING_WINDOWS_1254] = { .name = "Windows-1254", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_windows_1254_alnum_char, .alpha_char = rbs_encoding_windows_1254_alpha_char, .isupper_char = rbs_encoding_windows_1254_isupper_char, .multibyte = false }, + [RBS_ENCODING_WINDOWS_1255] = { .name = "Windows-1255", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_windows_1255_alnum_char, .alpha_char = rbs_encoding_windows_1255_alpha_char, .isupper_char = rbs_encoding_windows_1255_isupper_char, .multibyte = false }, + [RBS_ENCODING_WINDOWS_1256] = { .name = "Windows-1256", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_windows_1256_alnum_char, .alpha_char = rbs_encoding_windows_1256_alpha_char, .isupper_char = rbs_encoding_windows_1256_isupper_char, .multibyte = false }, + [RBS_ENCODING_WINDOWS_1257] = { .name = "Windows-1257", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_windows_1257_alnum_char, .alpha_char = rbs_encoding_windows_1257_alpha_char, .isupper_char = rbs_encoding_windows_1257_isupper_char, .multibyte = false }, + [RBS_ENCODING_WINDOWS_1258] = { .name = "Windows-1258", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_windows_1258_alnum_char, .alpha_char = rbs_encoding_windows_1258_alpha_char, .isupper_char = rbs_encoding_windows_1258_isupper_char, .multibyte = false }, + [RBS_ENCODING_WINDOWS_874] = { .name = "Windows-874", .char_width = rbs_encoding_single_char_width, .alnum_char = rbs_encoding_windows_874_alnum_char, .alpha_char = rbs_encoding_windows_874_alpha_char, .isupper_char = rbs_encoding_windows_874_isupper_char, .multibyte = false } +#endif +}; + +/** + * Compare two strings, ignoring case, up to the given length. Returns 0 if the + * strings are equal, a negative number if string1 is less than string2, or a + * positive number if string1 is greater than string2. + * + * Note that this is effectively our own implementation of strncasecmp, but it's + * not available on all of the platforms we want to support so we're rolling it + * here. + * + * @param string1 The first string to compare. + * @param string2 The second string to compare + * @param length The maximum number of characters to compare. + * @return 0 if the strings are equal, a negative number if string1 is less than + * string2, or a positive number if string1 is greater than string2. + */ +static int +rbs_strncasecmp(const uint8_t *string1, const uint8_t *string2, size_t length) { + size_t offset = 0; + int difference = 0; + + while (offset < length && string1[offset] != '\0') { + if (string2[offset] == '\0') return string1[offset]; + if ((difference = tolower(string1[offset]) - tolower(string2[offset])) != 0) return difference; + offset++; + } + + return difference; +} + +/** + * Parse the given name of an encoding and return a pointer to the corresponding + * encoding struct if one can be found, otherwise return NULL. + */ +const rbs_encoding_t * +rbs_encoding_find(const uint8_t *start, const uint8_t *end) { + size_t width = (size_t) (end - start); + + // First, we're going to check for UTF-8. This is the most common encoding. + // UTF-8 can contain extra information at the end about the platform it is + // encoded on, such as UTF-8-MAC or UTF-8-UNIX. We'll ignore those suffixes. + if ((start + 5 <= end) && (rbs_strncasecmp(start, (const uint8_t *) "UTF-8", 5) == 0)) { +#ifndef RBS_ENCODING_EXCLUDE_FULL + // We need to explicitly handle UTF-8-HFS, as that one needs to switch + // over to being UTF8-MAC. + if (width == 9 && (rbs_strncasecmp(start + 5, (const uint8_t *) "-HFS", 4) == 0)) { + return &rbs_encodings[RBS_ENCODING_UTF8_MAC]; + } +#endif + + // Otherwise we'll return the default UTF-8 encoding. + return RBS_ENCODING_UTF_8_ENTRY; + } + + // Next, we're going to loop through each of the encodings that we handle + // explicitly. If we found one that we understand, we'll use that value. +#define ENCODING1(name, encoding) \ + if (width == sizeof(name) - 1 && rbs_strncasecmp(start, (const uint8_t *) name, width) == 0) return &rbs_encodings[encoding]; +#define ENCODING2(name1, name2, encoding) ENCODING1(name1, encoding) ENCODING1(name2, encoding) + + if (width >= 3) { + switch (*start) { + case 'A': + case 'a': + ENCODING1("ASCII", RBS_ENCODING_US_ASCII); + ENCODING1("ASCII-8BIT", RBS_ENCODING_ASCII_8BIT); + ENCODING1("ANSI_X3.4-1968", RBS_ENCODING_US_ASCII); + break; + case 'B': + case 'b': + ENCODING1("BINARY", RBS_ENCODING_ASCII_8BIT); +#ifndef RBS_ENCODING_EXCLUDE_FULL + ENCODING1("Big5", RBS_ENCODING_BIG5); + ENCODING2("Big5-HKSCS", "Big5-HKSCS:2008", RBS_ENCODING_BIG5_HKSCS); + ENCODING1("Big5-UAO", RBS_ENCODING_BIG5_UAO); +#endif + break; + case 'C': + case 'c': + ENCODING1("CP65001", RBS_ENCODING_UTF_8); + ENCODING2("CP932", "csWindows31J", RBS_ENCODING_WINDOWS_31J); +#ifndef RBS_ENCODING_EXCLUDE_FULL + ENCODING1("CESU-8", RBS_ENCODING_CESU_8); + ENCODING1("CP437", RBS_ENCODING_IBM437); + ENCODING1("CP720", RBS_ENCODING_IBM720); + ENCODING1("CP737", RBS_ENCODING_IBM737); + ENCODING1("CP775", RBS_ENCODING_IBM775); + ENCODING1("CP850", RBS_ENCODING_CP850); + ENCODING1("CP852", RBS_ENCODING_CP852); + ENCODING1("CP855", RBS_ENCODING_CP855); + ENCODING1("CP857", RBS_ENCODING_IBM857); + ENCODING1("CP860", RBS_ENCODING_IBM860); + ENCODING1("CP861", RBS_ENCODING_IBM861); + ENCODING1("CP862", RBS_ENCODING_IBM862); + ENCODING1("CP864", RBS_ENCODING_IBM864); + ENCODING1("CP865", RBS_ENCODING_IBM865); + ENCODING1("CP866", RBS_ENCODING_IBM866); + ENCODING1("CP869", RBS_ENCODING_IBM869); + ENCODING1("CP874", RBS_ENCODING_WINDOWS_874); + ENCODING1("CP878", RBS_ENCODING_KOI8_R); + ENCODING1("CP863", RBS_ENCODING_IBM863); + ENCODING1("CP936", RBS_ENCODING_GBK); + ENCODING1("CP949", RBS_ENCODING_CP949); + ENCODING1("CP950", RBS_ENCODING_CP950); + ENCODING1("CP951", RBS_ENCODING_CP951); + ENCODING1("CP1250", RBS_ENCODING_WINDOWS_1250); + ENCODING1("CP1251", RBS_ENCODING_WINDOWS_1251); + ENCODING1("CP1252", RBS_ENCODING_WINDOWS_1252); + ENCODING1("CP1253", RBS_ENCODING_WINDOWS_1253); + ENCODING1("CP1254", RBS_ENCODING_WINDOWS_1254); + ENCODING1("CP1255", RBS_ENCODING_WINDOWS_1255); + ENCODING1("CP1256", RBS_ENCODING_WINDOWS_1256); + ENCODING1("CP1257", RBS_ENCODING_WINDOWS_1257); + ENCODING1("CP1258", RBS_ENCODING_WINDOWS_1258); + ENCODING1("CP51932", RBS_ENCODING_CP51932); +#endif + break; + case 'E': + case 'e': + ENCODING2("EUC-JP", "eucJP", RBS_ENCODING_EUC_JP); +#ifndef RBS_ENCODING_EXCLUDE_FULL + ENCODING2("eucJP-ms", "euc-jp-ms", RBS_ENCODING_EUC_JP_MS); + ENCODING2("EUC-JIS-2004", "EUC-JISX0213", RBS_ENCODING_EUC_JIS_2004); + ENCODING2("EUC-KR", "eucKR", RBS_ENCODING_EUC_KR); + ENCODING2("EUC-CN", "eucCN", RBS_ENCODING_GB2312); + ENCODING2("EUC-TW", "eucTW", RBS_ENCODING_EUC_TW); + ENCODING1("Emacs-Mule", RBS_ENCODING_EMACS_MULE); +#endif + break; + case 'G': + case 'g': +#ifndef RBS_ENCODING_EXCLUDE_FULL + ENCODING1("GBK", RBS_ENCODING_GBK); + ENCODING1("GB12345", RBS_ENCODING_GB12345); + ENCODING1("GB18030", RBS_ENCODING_GB18030); + ENCODING1("GB1988", RBS_ENCODING_GB1988); + ENCODING1("GB2312", RBS_ENCODING_GB2312); +#endif + break; + case 'I': + case 'i': +#ifndef RBS_ENCODING_EXCLUDE_FULL + ENCODING1("IBM437", RBS_ENCODING_IBM437); + ENCODING1("IBM720", RBS_ENCODING_IBM720); + ENCODING1("IBM737", RBS_ENCODING_IBM737); + ENCODING1("IBM775", RBS_ENCODING_IBM775); + ENCODING1("IBM850", RBS_ENCODING_CP850); + ENCODING1("IBM852", RBS_ENCODING_IBM852); + ENCODING1("IBM855", RBS_ENCODING_IBM855); + ENCODING1("IBM857", RBS_ENCODING_IBM857); + ENCODING1("IBM860", RBS_ENCODING_IBM860); + ENCODING1("IBM861", RBS_ENCODING_IBM861); + ENCODING1("IBM862", RBS_ENCODING_IBM862); + ENCODING1("IBM863", RBS_ENCODING_IBM863); + ENCODING1("IBM864", RBS_ENCODING_IBM864); + ENCODING1("IBM865", RBS_ENCODING_IBM865); + ENCODING1("IBM866", RBS_ENCODING_IBM866); + ENCODING1("IBM869", RBS_ENCODING_IBM869); + ENCODING2("ISO-8859-1", "ISO8859-1", RBS_ENCODING_ISO_8859_1); + ENCODING2("ISO-8859-2", "ISO8859-2", RBS_ENCODING_ISO_8859_2); + ENCODING2("ISO-8859-3", "ISO8859-3", RBS_ENCODING_ISO_8859_3); + ENCODING2("ISO-8859-4", "ISO8859-4", RBS_ENCODING_ISO_8859_4); + ENCODING2("ISO-8859-5", "ISO8859-5", RBS_ENCODING_ISO_8859_5); + ENCODING2("ISO-8859-6", "ISO8859-6", RBS_ENCODING_ISO_8859_6); + ENCODING2("ISO-8859-7", "ISO8859-7", RBS_ENCODING_ISO_8859_7); + ENCODING2("ISO-8859-8", "ISO8859-8", RBS_ENCODING_ISO_8859_8); + ENCODING2("ISO-8859-9", "ISO8859-9", RBS_ENCODING_ISO_8859_9); + ENCODING2("ISO-8859-10", "ISO8859-10", RBS_ENCODING_ISO_8859_10); + ENCODING2("ISO-8859-11", "ISO8859-11", RBS_ENCODING_ISO_8859_11); + ENCODING2("ISO-8859-13", "ISO8859-13", RBS_ENCODING_ISO_8859_13); + ENCODING2("ISO-8859-14", "ISO8859-14", RBS_ENCODING_ISO_8859_14); + ENCODING2("ISO-8859-15", "ISO8859-15", RBS_ENCODING_ISO_8859_15); + ENCODING2("ISO-8859-16", "ISO8859-16", RBS_ENCODING_ISO_8859_16); +#endif + break; + case 'K': + case 'k': +#ifndef RBS_ENCODING_EXCLUDE_FULL + ENCODING1("KOI8-R", RBS_ENCODING_KOI8_R); + ENCODING1("KOI8-U", RBS_ENCODING_KOI8_U); +#endif + break; + case 'M': + case 'm': +#ifndef RBS_ENCODING_EXCLUDE_FULL + ENCODING1("macCentEuro", RBS_ENCODING_MAC_CENT_EURO); + ENCODING1("macCroatian", RBS_ENCODING_MAC_CROATIAN); + ENCODING1("macCyrillic", RBS_ENCODING_MAC_CYRILLIC); + ENCODING1("macGreek", RBS_ENCODING_MAC_GREEK); + ENCODING1("macIceland", RBS_ENCODING_MAC_ICELAND); + ENCODING1("MacJapanese", RBS_ENCODING_MAC_JAPANESE); + ENCODING1("MacJapan", RBS_ENCODING_MAC_JAPANESE); + ENCODING1("macRoman", RBS_ENCODING_MAC_ROMAN); + ENCODING1("macRomania", RBS_ENCODING_MAC_ROMANIA); + ENCODING1("macThai", RBS_ENCODING_MAC_THAI); + ENCODING1("macTurkish", RBS_ENCODING_MAC_TURKISH); + ENCODING1("macUkraine", RBS_ENCODING_MAC_UKRAINE); +#endif + break; + case 'P': + case 'p': + ENCODING1("PCK", RBS_ENCODING_WINDOWS_31J); + break; + case 'S': + case 's': + ENCODING1("SJIS", RBS_ENCODING_WINDOWS_31J); +#ifndef RBS_ENCODING_EXCLUDE_FULL + ENCODING1("Shift_JIS", RBS_ENCODING_SHIFT_JIS); + ENCODING1("SJIS-DoCoMo", RBS_ENCODING_SJIS_DOCOMO); + ENCODING1("SJIS-KDDI", RBS_ENCODING_SJIS_KDDI); + ENCODING1("SJIS-SoftBank", RBS_ENCODING_SJIS_SOFTBANK); + ENCODING1("stateless-ISO-2022-JP", RBS_ENCODING_STATELESS_ISO_2022_JP); + ENCODING1("stateless-ISO-2022-JP-KDDI", RBS_ENCODING_STATELESS_ISO_2022_JP_KDDI); +#endif + break; + case 'T': + case 't': +#ifndef RBS_ENCODING_EXCLUDE_FULL + ENCODING1("TIS-620", RBS_ENCODING_TIS_620); +#endif + break; + case 'U': + case 'u': + ENCODING1("US-ASCII", RBS_ENCODING_US_ASCII); +#ifndef RBS_ENCODING_EXCLUDE_FULL + ENCODING2("UTF8-MAC", "UTF-8-HFS", RBS_ENCODING_UTF8_MAC); + ENCODING1("UTF8-DoCoMo", RBS_ENCODING_UTF8_DOCOMO); + ENCODING1("UTF8-KDDI", RBS_ENCODING_UTF8_KDDI); + ENCODING1("UTF8-SoftBank", RBS_ENCODING_UTF8_SOFTBANK); +#endif + break; + case 'W': + case 'w': + ENCODING1("Windows-31J", RBS_ENCODING_WINDOWS_31J); +#ifndef RBS_ENCODING_EXCLUDE_FULL + ENCODING1("Windows-874", RBS_ENCODING_WINDOWS_874); + ENCODING1("Windows-1250", RBS_ENCODING_WINDOWS_1250); + ENCODING1("Windows-1251", RBS_ENCODING_WINDOWS_1251); + ENCODING1("Windows-1252", RBS_ENCODING_WINDOWS_1252); + ENCODING1("Windows-1253", RBS_ENCODING_WINDOWS_1253); + ENCODING1("Windows-1254", RBS_ENCODING_WINDOWS_1254); + ENCODING1("Windows-1255", RBS_ENCODING_WINDOWS_1255); + ENCODING1("Windows-1256", RBS_ENCODING_WINDOWS_1256); + ENCODING1("Windows-1257", RBS_ENCODING_WINDOWS_1257); + ENCODING1("Windows-1258", RBS_ENCODING_WINDOWS_1258); +#endif + break; + case '6': + ENCODING1("646", RBS_ENCODING_US_ASCII); + break; + } + } + +#undef ENCODING2 +#undef ENCODING1 + + // If we didn't match any encodings, return NULL. + return NULL; +} diff --git a/src/util/rbs_unescape.c b/src/util/rbs_unescape.c new file mode 100644 index 000000000..909c9c41d --- /dev/null +++ b/src/util/rbs_unescape.c @@ -0,0 +1,131 @@ +#include "rbs/util/rbs_unescape.h" +#include +#include +#include + +// Define the escape character mappings +// TODO: use a switch instead +static const struct { + const char *from; + const char *to; +} TABLE[] = { + { "\\a", "\a" }, + { "\\b", "\b" }, + { "\\e", "\033" }, + { "\\f", "\f" }, + { "\\n", "\n" }, + { "\\r", "\r" }, + { "\\s", " " }, + { "\\t", "\t" }, + { "\\v", "\v" }, + { "\\\"", "\"" }, + { "\\'", "'" }, + { "\\\\", "\\" }, + { "\\", "" } +}; + +// Helper function to convert hex string to integer +static int hex_to_int(const char *hex, int length) { + int result = 0; + for (int i = 0; i < length; i++) { + result = result * 16 + (isdigit(hex[i]) ? hex[i] - '0' : tolower(hex[i]) - 'a' + 10); + } + return result; +} + +// Helper function to convert octal string to integer +static int octal_to_int(const char *octal, int length) { + int result = 0; + for (int i = 0; i < length; i++) { + result = result * 8 + (octal[i] - '0'); + } + return result; +} + +int rbs_utf8_codelen(unsigned int c) { + if (c <= 0x7F) return 1; + if (c <= 0x7FF) return 2; + if (c <= 0xFFFF) return 3; + if (c <= 0x10FFFF) return 4; + return 1; // Invalid Unicode codepoint, treat as 1 byte +} + +rbs_string_t unescape_string(rbs_allocator_t *allocator, const rbs_string_t string, bool is_double_quote) { + if (!string.start) return RBS_STRING_NULL; + + size_t len = string.end - string.start; + const char *input = string.start; + + char *output = rbs_allocator_alloc_many(allocator, len + 1, char); + if (!output) return RBS_STRING_NULL; + + size_t i = 0, j = 0; + while (i < len) { + if (input[i] == '\\' && i + 1 < len) { + if (is_double_quote) { + if (isdigit(input[i + 1])) { + // Octal escape + int octal_len = 1; + while (octal_len < 3 && i + 1 + octal_len < len && isdigit(input[i + 1 + octal_len])) + octal_len++; + int value = octal_to_int(input + i + 1, octal_len); + output[j++] = (char) value; + i += octal_len + 1; + } else if (input[i + 1] == 'x' && i + 3 < len) { + // Hex escape + int hex_len = isxdigit(input[i + 3]) ? 2 : 1; + int value = hex_to_int(input + i + 2, hex_len); + output[j++] = (char) value; + i += hex_len + 2; + } else if (input[i + 1] == 'u' && i + 5 < len) { + // Unicode escape + int value = hex_to_int(input + i + 2, 4); + output[j++] = (char) value; + i += 6; + } else { + // Other escapes + int found = 0; + for (size_t k = 0; k < sizeof(TABLE) / sizeof(TABLE[0]); k++) { + if (strncmp(input + i, TABLE[k].from, strlen(TABLE[k].from)) == 0) { + output[j++] = TABLE[k].to[0]; + i += strlen(TABLE[k].from); + found = 1; + break; + } + } + if (!found) { + output[j++] = input[i++]; + } + } + } else { + /* Single quote: only escape ' and \ */ + if (input[i + 1] == '\'' || input[i + 1] == '\\') { + output[j++] = input[i + 1]; + i += 2; + } else { + output[j++] = input[i++]; + } + } + } else { + output[j++] = input[i++]; + } + } + output[j] = '\0'; + return rbs_string_new(output, output + j); +} + +rbs_string_t rbs_unquote_string(rbs_allocator_t *allocator, rbs_string_t input) { + unsigned int first_char = rbs_utf8_string_to_codepoint(input); + size_t byte_length = rbs_string_len(input); + + ptrdiff_t start_offset = 0; + if (first_char == '"' || first_char == '\'' || first_char == '`') { + int bs = rbs_utf8_codelen(first_char); + start_offset += bs; + byte_length -= 2 * bs; + } + + const char *new_start = input.start + start_offset; + rbs_string_t string = rbs_string_new(new_start, new_start + byte_length); + return unescape_string(allocator, string, first_char == '"'); +} diff --git a/templates/ext/rbs_extension/ast_translation.c.erb b/templates/ext/rbs_extension/ast_translation.c.erb new file mode 100644 index 000000000..d06dbb8fd --- /dev/null +++ b/templates/ext/rbs_extension/ast_translation.c.erb @@ -0,0 +1,193 @@ +#include "ast_translation.h" + +#include "class_constants.h" +#include "rbs_string_bridging.h" +#include "legacy_location.h" + +VALUE EMPTY_ARRAY; +VALUE EMPTY_HASH; + +#define RBS_LOC_CHILDREN_SIZE(cap) (sizeof(rbs_loc_children) + sizeof(rbs_loc_entry) * ((cap) - 1)) + +rbs_translation_context_t rbs_translation_context_create(rbs_constant_pool_t *constant_pool, VALUE buffer, rb_encoding *ruby_encoding) { + return (rbs_translation_context_t) { + .constant_pool = constant_pool, + .buffer = buffer, + .encoding = ruby_encoding, + }; +} + +VALUE rbs_node_list_to_ruby_array(rbs_translation_context_t ctx, rbs_node_list_t *list) { + VALUE ruby_array = rb_ary_new(); + + for (rbs_node_list_node_t *n = list->head; n != NULL; n = n->next) { + rb_ary_push(ruby_array, rbs_struct_to_ruby_value(ctx, n->node)); + } + + return ruby_array; +} + +VALUE rbs_hash_to_ruby_hash(rbs_translation_context_t ctx, rbs_hash_t *rbs_hash) { + if (!rbs_hash->head) { + return EMPTY_HASH; + } + + VALUE ruby_hash = rb_hash_new(); + + for (rbs_hash_node_t *n = rbs_hash->head; n != NULL; n = n->next) { + VALUE key = rbs_struct_to_ruby_value(ctx, n->key); + VALUE value = rbs_struct_to_ruby_value(ctx, n->value); + rb_hash_aset(ruby_hash, key, value); + } + + return ruby_hash; +} + +VALUE rbs_loc_to_ruby_location(rbs_translation_context_t ctx, rbs_location_t *source_loc) { + if (source_loc == NULL) { + return Qnil; + } + + VALUE new_loc = rbs_new_location(ctx.buffer, source_loc->rg); + rbs_loc *new_loc_struct = rbs_check_location(new_loc); + + if (source_loc->children != NULL) { + rbs_loc_legacy_alloc_children(new_loc_struct, source_loc->children->cap); + memcpy(new_loc_struct->children, source_loc->children, RBS_LOC_CHILDREN_SIZE(source_loc->children->cap)); + } + + return new_loc; +} + +VALUE rbs_location_list_to_ruby_array(rbs_translation_context_t ctx, rbs_location_list_t *list) { + if (list == NULL) { + return EMPTY_ARRAY; + } + + VALUE ruby_array = rb_ary_new(); + + for (rbs_location_list_node_t *n = list->head; n != NULL; n = n->next) { + rb_ary_push(ruby_array, rbs_loc_to_ruby_location(ctx, n->loc)); + } + + return ruby_array; +} + +#ifdef RB_PASS_KEYWORDS +// Ruby 2.7 or later +#define CLASS_NEW_INSTANCE(klass, argc, argv) \ + rb_class_new_instance_kw(argc, argv, klass, RB_PASS_KEYWORDS) +#else +// Ruby 2.6 +#define CLASS_NEW_INSTANCE(receiver, argc, argv) \ + rb_class_new_instance(argc, argv, receiver) +#endif + +VALUE rbs_struct_to_ruby_value(rbs_translation_context_t ctx, rbs_node_t *instance) { + if (instance == NULL) return Qnil; + + switch (instance->type) { + <%- nodes.each do |node| -%> + case <%= node.c_type_enum_name %>: { + <%- case node.ruby_full_name -%> + <%- when "RBS::AST::Bool" -%> + return ((rbs_ast_bool_t *) instance)->value ? Qtrue : Qfalse; + <%- when "RBS::AST::Integer" -%> + rbs_ast_integer_t *integer_node = (rbs_ast_integer_t *) instance; + rbs_string_t string_repr = integer_node->string_representation; + + VALUE str = rb_enc_str_new(string_repr.start, rbs_string_len(string_repr), rb_utf8_encoding()); + + return rb_funcall(str, rb_intern("to_i"), 0); + + <%- when "RBS::AST::String" -%> + rbs_ast_string_t *string_node = (rbs_ast_string_t *) instance; + rbs_string_t s = string_node->string; + + return rb_enc_str_new(s.start, rbs_string_len(s), rb_utf8_encoding()); + + <%- when "RBS::Types::Record::FieldType" -%> + rbs_types_record_field_type_t *record_fieldtype = (rbs_types_record_field_type_t *) instance; + + VALUE array = rb_ary_new(); + rb_ary_push(array, rbs_struct_to_ruby_value(ctx, record_fieldtype->type)); + rb_ary_push(array, record_fieldtype->required ? Qtrue : Qfalse); + return array; + + <%- when "RBS::Signature" -%> + rbs_signature_t *signature = (rbs_signature_t *) instance; + + VALUE array = rb_ary_new(); + rb_ary_push(array, rbs_node_list_to_ruby_array(ctx, signature->directives)); + rb_ary_push(array, rbs_node_list_to_ruby_array(ctx, signature->declarations)); + return array; + <%- else -%> + <%= node.c_type_name %> *node = (<%= node.c_type_name %> *) instance; + + VALUE h = rb_hash_new(); + <%- if node.expose_location? -%> + rb_hash_aset(h, ID2SYM(rb_intern("location")), rbs_loc_to_ruby_location(ctx, node->base.location)); + <%- end -%> + <%- node.fields.each do |field| -%> + <%- case field.c_type -%> + <%- when "VALUE" -%> + rb_hash_aset(h, ID2SYM(rb_intern("<%= field.name %>")), node-><%= field.c_name %>); + <%- when "rbs_node_list" -%> + rb_hash_aset(h, ID2SYM(rb_intern("<%= field.name %>")), rbs_node_list_to_ruby_array(ctx, node-><%= field.c_name %>)); + <%- when "rbs_hash" -%> + rb_hash_aset(h, ID2SYM(rb_intern("<%= field.name %>")), rbs_hash_to_ruby_hash(ctx, node-><%= field.c_name %>)); + <%- when "rbs_ast_symbol" -%> + rb_hash_aset(h, ID2SYM(rb_intern("<%= field.name %>")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node-><%= field.c_name %>)); // rbs_ast_symbol + <%- when "rbs_keyword" -%> + rb_hash_aset(h, ID2SYM(rb_intern("<%= field.name %>")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node-><%= field.c_name %>)); // rbs_keyword + <%- when "rbs_string" -%> + rb_hash_aset(h, ID2SYM(rb_intern("<%= field.name %>")), rbs_string_to_ruby_string(&node-><%= field.c_name %>, ctx.encoding)); + <%- when "bool" -%> + rb_hash_aset(h, ID2SYM(rb_intern("<%= field.name %>")), node-><%= field.c_name %> ? Qtrue : Qfalse); + <%- when "rbs_location" -%> + rb_hash_aset(h, ID2SYM(rb_intern("<%= field.name %>")), rbs_loc_to_ruby_location(ctx, node-><%= field.name %>)); + <%- when "rbs_location_list" -%> + rb_hash_aset(h, ID2SYM(rb_intern("<%= field.name %>")), rbs_location_list_to_ruby_array(ctx, node-><%= field.name %>)); + <%- else -%> + <%- unless field.ast_node? -%> + #warning unexpected type <%= field.c_type -%> + <%- end -%> + rb_hash_aset(h, ID2SYM(rb_intern("<%= field.name %>")), rbs_struct_to_ruby_value(ctx, (rbs_node_t *) node-><%= field.c_name %>)); // <%= field.c_type %> + <%- end -%> + <%- end -%> + + <%- case node.ruby_full_name -%> + <%- when "RBS::AST::Declarations::Class", "RBS::AST::Declarations::Module", "RBS::AST::Declarations::Interface", "RBS::AST::Declarations::TypeAlias", "RBS::MethodType" -%> + rb_funcall( + RBS_AST_TypeParam, + rb_intern("resolve_variables"), + 1, + rb_hash_lookup(h, ID2SYM(rb_intern("type_params"))) + ); + <%- end -%> + return CLASS_NEW_INSTANCE( + <%= node.c_constant_name %>, + 1, + &h + ); + <%- end -%> + } + <%- end -%> + case RBS_KEYWORD: { + rbs_constant_t *constant = rbs_constant_pool_id_to_constant(RBS_GLOBAL_CONSTANT_POOL, ((rbs_keyword_t *) instance)->constant_id); + assert(constant != NULL && "constant is NULL"); + assert(constant->start != NULL && "constant->start is NULL"); + + return ID2SYM(rb_intern2((const char *) constant->start, constant->length)); + } + case RBS_AST_SYMBOL: { + rbs_constant_t *constant = rbs_constant_pool_id_to_constant(ctx.constant_pool, ((rbs_keyword_t *) instance)->constant_id); + assert(constant != NULL && "constant is NULL"); + assert(constant->start != NULL && "constant->start is NULL"); + + return ID2SYM(rb_intern3((const char *) constant->start, constant->length, ctx.encoding)); + } + } + + rb_raise(rb_eRuntimeError, "Unknown node type: %d", instance->type); +} diff --git a/templates/ext/rbs_extension/ast_translation.h.erb b/templates/ext/rbs_extension/ast_translation.h.erb new file mode 100644 index 000000000..a0ba1948c --- /dev/null +++ b/templates/ext/rbs_extension/ast_translation.h.erb @@ -0,0 +1,30 @@ +#ifndef RBS_EXTENSION_AST_TRANSLATION_H +#define RBS_EXTENSION_AST_TRANSLATION_H + +#include "compat.h" + +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_BEGIN +#include "ruby.h" +#include "ruby/encoding.h" +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_END + +#include "rbs/ast.h" +#include "rbs/location.h" + +/// A bag of values needed when copying RBS C structs into Ruby objects. +typedef struct rbs_translation_context { + rbs_constant_pool_t *constant_pool; + VALUE buffer; + rb_encoding *encoding; +} rbs_translation_context_t; + +rbs_translation_context_t rbs_translation_context_create(rbs_constant_pool_t *, VALUE buffer_string, rb_encoding *ruby_encoding); + +VALUE rbs_node_list_to_ruby_array(rbs_translation_context_t, rbs_node_list_t *list); +VALUE rbs_hash_to_ruby_hash(rbs_translation_context_t, rbs_hash_t *hash); +VALUE rbs_struct_to_ruby_value(rbs_translation_context_t, rbs_node_t *instance); + +extern VALUE EMPTY_ARRAY; +extern VALUE EMPTY_HASH; + +#endif diff --git a/templates/ext/rbs_extension/class_constants.c.erb b/templates/ext/rbs_extension/class_constants.c.erb new file mode 100644 index 000000000..6b131a7bd --- /dev/null +++ b/templates/ext/rbs_extension/class_constants.c.erb @@ -0,0 +1,40 @@ +#include "rbs_extension.h" + +VALUE RBS_Parser; + +VALUE RBS; +VALUE RBS_AST; +VALUE RBS_AST_Declarations; +VALUE RBS_AST_Directives; +VALUE RBS_AST_Members; +VALUE RBS_Parser; +VALUE RBS_Types; +VALUE RBS_Types_Bases; + +<%- nodes.filter(&:expose_to_ruby?).each do |node| -%> +VALUE <%= node.c_constant_name %>; +<%- end -%> + +VALUE RBS_ParsingError; + +#define IMPORT_CONSTANT(var, parent, name) \ + { \ + var = rb_const_get(parent, rb_intern(name)); \ + rb_gc_register_mark_object(var); \ + } + +void rbs__init_constants(void) { + IMPORT_CONSTANT(RBS, rb_cObject, "RBS"); + IMPORT_CONSTANT(RBS_ParsingError, RBS, "ParsingError"); + + IMPORT_CONSTANT(RBS_AST, RBS, "AST"); + IMPORT_CONSTANT(RBS_AST_Declarations, RBS_AST, "Declarations"); + IMPORT_CONSTANT(RBS_AST_Directives, RBS_AST, "Directives"); + IMPORT_CONSTANT(RBS_AST_Members, RBS_AST, "Members"); + IMPORT_CONSTANT(RBS_Types, RBS, "Types"); + IMPORT_CONSTANT(RBS_Types_Bases, RBS_Types, "Bases"); + + <%- nodes.filter(&:expose_to_ruby?).each do |node| -%> + IMPORT_CONSTANT(<%= node.c_constant_name %>, <%= node.c_parent_constant_name %>, "<%= node.ruby_class_name %>"); + <%- end -%> +} diff --git a/templates/include/rbs/constants.h.erb b/templates/ext/rbs_extension/class_constants.h.erb similarity index 68% rename from templates/include/rbs/constants.h.erb rename to templates/ext/rbs_extension/class_constants.h.erb index 7be88cdfa..eb30ffdbe 100644 --- a/templates/include/rbs/constants.h.erb +++ b/templates/ext/rbs_extension/class_constants.h.erb @@ -1,6 +1,12 @@ #ifndef RBS__CONSTANTS_H #define RBS__CONSTANTS_H +#include "compat.h" + +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_BEGIN +#include "ruby.h" +SUPPRESS_RUBY_HEADER_DIAGNOSTICS_END + extern VALUE RBS; extern VALUE RBS_AST; @@ -11,7 +17,7 @@ extern VALUE RBS_Types; extern VALUE RBS_Types_Bases; extern VALUE RBS_ParsingError; -<%- nodes.each do |node| -%> +<%- nodes.filter(&:expose_to_ruby?).each do |node| -%> extern VALUE <%= node.c_constant_name %>; <%- end -%> diff --git a/templates/include/rbs/ast.h.erb b/templates/include/rbs/ast.h.erb new file mode 100644 index 000000000..18bcba4b7 --- /dev/null +++ b/templates/include/rbs/ast.h.erb @@ -0,0 +1,101 @@ +#ifndef RBS__AST_H +#define RBS__AST_H + +#include "rbs/util/rbs_allocator.h" +#include "rbs/util/rbs_constant_pool.h" +#include "string.h" +#include "location.h" + +enum rbs_node_type { +<%- nodes.each_with_index do |node, index| -%> + <%= node.c_type_enum_name %> = <%= index + 1 %>, +<%- end -%> + RBS_KEYWORD, + RBS_AST_SYMBOL, +}; + +typedef struct rbs_node { + enum rbs_node_type type; + rbs_location_t *location; +} rbs_node_t; + +const char *rbs_node_type_name(rbs_node_t *node); + +/* rbs_node_list_node */ + +typedef struct rbs_node_list_node { + rbs_node_t *node; + struct rbs_node_list_node *next; +} rbs_node_list_node_t; + +typedef struct rbs_node_list { + rbs_allocator_t *allocator; + rbs_node_list_node_t *head; + rbs_node_list_node_t *tail; + size_t length; +} rbs_node_list_t; + +rbs_node_list_t *rbs_node_list_new(rbs_allocator_t *); + +void rbs_node_list_append(rbs_node_list_t *list, rbs_node_t *node); + +/* rbs_hash */ + +typedef struct rbs_hash_node { + rbs_node_t *key; + rbs_node_t *value; + struct rbs_hash_node *next; +} rbs_hash_node_t; + +typedef struct rbs_hash { + rbs_allocator_t *allocator; + rbs_hash_node_t *head; + rbs_hash_node_t *tail; + size_t length; +} rbs_hash_t; + +rbs_hash_t *rbs_hash_new(rbs_allocator_t *); + +void rbs_hash_set(rbs_hash_t *hash, rbs_node_t *key, rbs_node_t *value); + +rbs_hash_node_t *rbs_hash_find(rbs_hash_t *hash, rbs_node_t *key); + +rbs_node_t *rbs_hash_get(rbs_hash_t *hash, rbs_node_t *key); + +/* rbs_ast_node */ + +<%- nodes.each do |node| -%> +typedef struct <%= node.c_base_name %> { + rbs_node_t base; + + <%- node.fields.each do |field| -%> + <%= field.stored_field_decl %>; + <%- end -%> +} <%= node.c_type_name %>; + +<%- end -%> + +/// `rbs_keyword_t` models RBS keywords like "private", "instance", "covariant", etc. +/// These are stored in the global constant pool, and get surfaced to Ruby as `Symbol`s, +/// just like `rbs_ast_symbol_t`s. +typedef struct rbs_keyword { + rbs_node_t base; + rbs_constant_id_t constant_id; +} rbs_keyword_t; + +rbs_keyword_t *rbs_keyword_new(rbs_allocator_t *, rbs_location_t *, rbs_constant_id_t); + +/// `rbs_ast_symbol_t` models user-defined identifiers like class names, method names, etc. +/// These get stored in the parser's own constant pool, and get surfaced to Ruby as `Symbol`s. +typedef struct rbs_ast_symbol { + rbs_node_t base; + rbs_constant_id_t constant_id; +} rbs_ast_symbol_t; + +rbs_ast_symbol_t *rbs_ast_symbol_new(rbs_allocator_t *, rbs_location_t *, rbs_constant_pool_t *, rbs_constant_id_t); + +<%- nodes.each do |node| -%> +<%= node.c_type_name %> *<%= node.c_constructor_function_name %>(<%= node.constructor_params.map(&:parameter_decl).join(", ") %>); +<%- end -%> + +#endif diff --git a/templates/include/rbs/ruby_objs.h.erb b/templates/include/rbs/ruby_objs.h.erb deleted file mode 100644 index b5376a301..000000000 --- a/templates/include/rbs/ruby_objs.h.erb +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef RBS__RUBY_OBJS_H -#define RBS__RUBY_OBJS_H - -#include "ruby.h" - -<%- nodes.each do |node| -%> -VALUE <%= node.c_function_name %>(<%= node.fields.map { |field| "#{field.c_type} #{field.name}" }.join(", ") %>); -<%- end -%> - -#endif diff --git a/templates/src/ast.c.erb b/templates/src/ast.c.erb new file mode 100644 index 000000000..9c98b2373 --- /dev/null +++ b/templates/src/ast.c.erb @@ -0,0 +1,174 @@ +#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>" +#include "rbs/ast.h" + +#include +#include + +const char *rbs_node_type_name(rbs_node_t *node) { + switch (node->type) { + <%- nodes.each do |node| -%> + case <%= node.c_type_enum_name %>: + return "<%= node.ruby_full_name %>"; + <%- end -%> + case RBS_AST_SYMBOL: + return "Symbol"; + default: + return "Unknown"; + } +} + +/* rbs_node_list */ + +rbs_node_list_t *rbs_node_list_new(rbs_allocator_t *allocator) { + rbs_node_list_t *list = rbs_allocator_alloc(allocator, rbs_node_list_t); + *list = (rbs_node_list_t) { + .allocator = allocator, + .head = NULL, + .tail = NULL, + .length = 0, + }; + + return list; +} + +void rbs_node_list_append(rbs_node_list_t *list, rbs_node_t *node) { + rbs_node_list_node_t *new_node = rbs_allocator_alloc(list->allocator, rbs_node_list_node_t); + *new_node = (rbs_node_list_node_t) { + .node = node, + .next = NULL, + }; + + if (list->tail == NULL) { + list->head = new_node; + list->tail = new_node; + } else { + list->tail->next = new_node; + list->tail = new_node; + } + + list->length++; +} + +/* rbs_hash */ + +rbs_hash_t *rbs_hash_new(rbs_allocator_t *allocator) { + rbs_hash_t *hash = rbs_allocator_alloc(allocator, rbs_hash_t); + *hash = (rbs_hash_t) { + .allocator = allocator, + .head = NULL, + .tail = NULL, + .length = 0, + }; + + return hash; +} + +bool rbs_node_equal(rbs_node_t *lhs, rbs_node_t *rhs) { + if (lhs == rhs) return true; + if (lhs->type != rhs->type) return false; + + switch (lhs->type) { + case RBS_AST_SYMBOL: + return ((rbs_ast_symbol_t *) lhs)->constant_id == ((rbs_ast_symbol_t *) rhs)->constant_id; + case RBS_KEYWORD: + return ((rbs_keyword_t *) lhs)->constant_id == ((rbs_keyword_t *) rhs)->constant_id; + case RBS_AST_BOOL: + return ((rbs_ast_bool_t *) lhs)->value == ((rbs_ast_bool_t *) rhs)->value; + case RBS_AST_INTEGER: + return rbs_string_equal(((rbs_ast_integer_t *) lhs)->string_representation, ((rbs_ast_integer_t *) rhs)->string_representation); + case RBS_AST_STRING: + return rbs_string_equal(((rbs_ast_string_t *) lhs)->string, ((rbs_ast_string_t *) rhs)->string); + default: + printf("Unhandled node type: %d\n", lhs->type); + return false; + } +} + +rbs_hash_node_t *rbs_hash_find(rbs_hash_t *hash, rbs_node_t *key) { + rbs_hash_node_t *current = hash->head; + + while (current != NULL) { + if (rbs_node_equal(key, current->key)) { + return current; + } + current = current->next; + } + + return NULL; +} + +void rbs_hash_set(rbs_hash_t *hash, rbs_node_t *key, rbs_node_t *value) { + rbs_hash_node_t *existing_node = rbs_hash_find(hash, key); + if (existing_node != NULL) { + existing_node->value = value; + return; + } + + rbs_hash_node_t *new_node = rbs_allocator_alloc(hash->allocator, rbs_hash_node_t); + new_node->key = key; + new_node->value = value; + new_node->next = NULL; + + if (hash->tail == NULL) { + hash->head = new_node; + hash->tail = new_node; + } else { + hash->tail->next = new_node; + hash->tail = new_node; + } +} + +rbs_node_t *rbs_hash_get(rbs_hash_t *hash, rbs_node_t *key) { + rbs_hash_node_t *node = rbs_hash_find(hash, key); + return node ? node->value : NULL; +} + +rbs_keyword_t *rbs_keyword_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_constant_id_t constant_id) { + rbs_keyword_t *instance = rbs_allocator_alloc(allocator, rbs_keyword_t); + + *instance = (rbs_keyword_t) { + .base = (rbs_node_t) { + .type = RBS_KEYWORD, + .location = location, + }, + .constant_id = constant_id, + }; + + return instance; +} + +rbs_ast_symbol_t *rbs_ast_symbol_new(rbs_allocator_t *allocator, rbs_location_t *location, rbs_constant_pool_t *constant_pool, rbs_constant_id_t constant_id) { + rbs_ast_symbol_t *instance = rbs_allocator_alloc(allocator, rbs_ast_symbol_t); + + *instance = (rbs_ast_symbol_t) { + .base = (rbs_node_t) { + .type = RBS_AST_SYMBOL, + .location = location, + }, + .constant_id = constant_id, + }; + + return instance; +} + +<%- nodes.each do |node| -%> +#line <%= __LINE__ + 1 %> "prism/templates/src/<%= File.basename(__FILE__) %>" +<%= node.c_type_name %> *<%= node.c_constructor_function_name %>(<%= node.constructor_params.map(&:parameter_decl).join(", ") %>) { + <%= node.c_type_name %> *instance = rbs_allocator_alloc(allocator, <%= node.c_type_name %>); + <%- node.fields.filter { |f| f.c_type == "VALUE" }.each do |f| -%> + rb_gc_register_mark_object(<%= f.c_name %>); + <%- end -%> + + *instance = (<%= node.c_type_name %>) { + .base = (rbs_node_t) { + .type = <%= node.c_type_enum_name %>, + .location = location, + }, + <%- node.fields.each do |field| -%> + .<%= field.c_name %> = <%= field.c_name %>, + <%- end -%> + }; + + return instance; +} +<%- end -%> diff --git a/templates/src/constants.c.erb b/templates/src/constants.c.erb deleted file mode 100644 index 1a6ff9400..000000000 --- a/templates/src/constants.c.erb +++ /dev/null @@ -1,36 +0,0 @@ -#include "rbs_extension.h" - -VALUE RBS_Parser; - -VALUE RBS; -VALUE RBS_AST; -VALUE RBS_AST_Declarations; -VALUE RBS_AST_Directives; -VALUE RBS_AST_Members; -VALUE RBS_Parser; -VALUE RBS_Types; -VALUE RBS_Types_Bases; - -<%- nodes.each do |node| -%> -VALUE <%= node.c_constant_name %>; -<%- end -%> - -VALUE RBS_ParsingError; - -#define IMPORT_CONSTANT(var, parent, name) { var = rb_const_get(parent, rb_intern(name)); rb_gc_register_mark_object(var); } - -void rbs__init_constants(void) { - IMPORT_CONSTANT(RBS, rb_cObject, "RBS"); - IMPORT_CONSTANT(RBS_ParsingError, RBS, "ParsingError"); - - IMPORT_CONSTANT(RBS_AST, RBS, "AST"); - IMPORT_CONSTANT(RBS_AST_Declarations, RBS_AST, "Declarations"); - IMPORT_CONSTANT(RBS_AST_Directives, RBS_AST, "Directives"); - IMPORT_CONSTANT(RBS_AST_Members, RBS_AST, "Members"); - IMPORT_CONSTANT(RBS_Types, RBS, "Types"); - IMPORT_CONSTANT(RBS_Types_Bases, RBS_Types, "Bases"); - - <%- nodes.each do |node| -%> - IMPORT_CONSTANT(<%= node.c_constant_name %>, <%= node.c_parent_constant_name %>, "<%= node.ruby_class_name %>"); - <%- end -%> -} diff --git a/templates/src/ruby_objs.c.erb b/templates/src/ruby_objs.c.erb deleted file mode 100644 index 24cbe4ffb..000000000 --- a/templates/src/ruby_objs.c.erb +++ /dev/null @@ -1,27 +0,0 @@ -#include "rbs_extension.h" - -#ifdef RB_PASS_KEYWORDS - // Ruby 2.7 or later - #define CLASS_NEW_INSTANCE(klass, argc, argv)\ - rb_class_new_instance_kw(argc, argv, klass, RB_PASS_KEYWORDS) -#else - // Ruby 2.6 - #define CLASS_NEW_INSTANCE(receiver, argc, argv)\ - rb_class_new_instance(argc, argv, receiver) -#endif - -<%- nodes.each do |node| -%> -VALUE <%= node.c_function_name %>(<%= node.fields.map { |field| "#{field.c_type} #{field.name}" }.join(", ") %>) { - VALUE _init_kwargs = rb_hash_new(); - <%- node.fields.each do |field| -%> - rb_hash_aset(_init_kwargs, ID2SYM(rb_intern("<%= field.name %>")), <%= field.name %>); - <%- end -%> - - return CLASS_NEW_INSTANCE( - <%= node.c_constant_name %>, - 1, - &_init_kwargs - ); -} - -<%- end -%> diff --git a/templates/template.rb b/templates/template.rb index 270b0f799..9e5114e3c 100644 --- a/templates/template.rb +++ b/templates/template.rb @@ -7,12 +7,51 @@ module RBS class Template class Field - attr_reader :name - attr_reader :c_type + attr_reader :name, :c_type, :c_name #: String - def initialize(yaml) - @name = yaml["name"] - @c_type = "VALUE" + def initialize(name:, c_type:, c_name: nil) + @name = name + @c_type = c_type + @c_name = c_name || name + end + + def self.from_hash(hash) + new(name: hash["name"], c_type: hash.fetch("c_type", "VALUE"), c_name: hash["c_name"]) + end + + def parameter_decl + case @c_type + when "VALUE", "bool" + "#{@c_type} #{c_name}" + when "rbs_string" + "rbs_string_t #{c_name}" + when ->(c_type) { c_type.end_with?("_t *") } + "#{@c_type}#{c_name}" + else + "#{@c_type}_t *#{c_name}" + end + end + + def stored_field_decl + case @c_type + when "VALUE" + "VALUE #{c_name}" + when "bool" + "bool #{c_name}" + when "rbs_string" + "rbs_string_t #{c_name}" + else + "struct #{@c_type} *#{c_name}" + end + end + + def ast_node? + @c_type == "rbs_node" || + @c_type == "rbs_type_name" || + @c_type == "rbs_namespace" || + @c_type.include?("_ast_") || + @c_type.include?("_decl_") || + @c_type.include?("_types_") end end @@ -25,13 +64,13 @@ class Type # e.g. `TypeAlias` attr_reader :ruby_class_name #: String - # The name of the auto-generated C struct for this type, - # e.g. `rbs_ast_declarations_typealias_t` - attr_reader :c_type_name #: String + # The base name of the auto-generated C struct for this type. + # e.g. `rbs_ast_declarations_type_alias` + attr_reader :c_base_name #: String - # The name of the pre-existing C function which constructs new Ruby objects of this type. - # e.g. `rbs_ast_declarations_typealias_new` - attr_reader :c_function_name #: String + # The name of the typedef of the auto-generated C struct for this type, + # e.g. `rbs_ast_declarations_type_alias_t` + attr_reader :c_type_name #: String # The name of the C constant which stores the Ruby VALUE pointing to the generated class. # e.g. `RBS_AST_Declarations_TypeAlias` @@ -41,20 +80,59 @@ class Type # e.g. `RBS_AST_Declarations` attr_reader :c_parent_constant_name #: String + attr_reader :c_type_enum_name #: String + + attr_reader :constructor_params #: Array[RBS::Template::Field] attr_reader :fields #: Array[RBS::Template::Field] def initialize(yaml) @ruby_full_name = yaml["name"] @ruby_class_name = @ruby_full_name[/[^:]+\z/] # demodulize-like - name = @ruby_full_name.gsub("::", "_") - @c_function_name = name.gsub(/(^)?(_)?([A-Z](?:[A-Z]*(?=[A-Z_])|[a-z0-9]*))/) { ($1 || $2 || "_") + $3.downcase } # underscore-like - @c_function_name.gsub!(/^rbs_types_/, 'rbs_') - @c_function_name.gsub!(/^rbs_ast_declarations_/, 'rbs_ast_decl_') + + @c_base_name = @ruby_full_name.split("::").map { |part| camel_to_snake(part) }.join("_") + @c_type_name = @c_base_name + "_t" + + # For compatibility with existing code, use the original approach for constant naming @c_constant_name = @ruby_full_name.gsub("::", "_") @c_parent_constant_name = @ruby_full_name.split("::")[0..-2].join("::").gsub("::", "_") - @fields = yaml.fetch("fields", []).map { |field| Field.new(field) }.freeze + @c_type_enum_name = @c_base_name.upcase + + @expose_to_ruby = yaml.fetch("expose_to_ruby", true) + @expose_location = yaml.fetch("expose_location", true) + + @fields = yaml.fetch("fields", []).map { |field| Field.from_hash(field) }.freeze + + @constructor_params = [ + Field.new(name: "allocator", c_type: "rbs_allocator_t *"), + Field.new(name: "location", c_type: "rbs_location_t *" ), + ] + @constructor_params.concat @fields + @constructor_params.freeze + end + + # The name of the C function which constructs new instances of this C structure. + # e.g. `rbs_ast_declarations_type_alias_new` + def c_constructor_function_name #: String + "#{@c_base_name}_new" end + + # Every templated type will have a C struct created for it. + # If this is true, then we will also create a Ruby class for it, otherwise we'll skip that. + def expose_to_ruby? + @expose_to_ruby + end + + def expose_location? + @expose_location + end + + # Convert CamelCase to snake_case + # e.g. "FooBarBaz" -> "foo_bar_baz" + def camel_to_snake(str) + str.gsub(/([a-z\d])([A-Z])/, '\1_\2').downcase + end + end class << self diff --git a/test/rbs/type_parsing_test.rb b/test/rbs/type_parsing_test.rb index 4a6fef497..8195db7e5 100644 --- a/test/rbs/type_parsing_test.rb +++ b/test/rbs/type_parsing_test.rb @@ -655,13 +655,14 @@ def test_string_literal_union end def test_record - Parser.parse_type("{ foo: untyped, 3 => 'hoge' }").yield_self do |type| + Parser.parse_type("{ foo: untyped, 3 => 'hoge', 4 => 'huge' }").yield_self do |type| assert_instance_of Types::Record, type assert_equal({ foo: Types::Bases::Any.new(location: nil), - 3 => Types::Literal.new(literal: "hoge", location: nil) + 3 => Types::Literal.new(literal: "hoge", location: nil), + 4 => Types::Literal.new(literal: "huge", location: nil), }, type.fields) - assert_equal "{ foo: untyped, 3 => 'hoge' }", type.location.source + assert_equal "{ foo: untyped, 3 => 'hoge', 4 => 'huge' }", type.location.source end Parser.parse_type("{}").yield_self do |type|