From eeb45e153d047d1e123d24a5a3ba783958dfb709 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Sun, 14 Jan 2024 21:48:27 +0900 Subject: [PATCH] Change `@scanner.match` to respond `nil`/`@scanner` in order to improve processing speed. --- lib/rexml/parsers/baseparser.rb | 3 +-- lib/rexml/source.rb | 17 ++++++------- test/parse/test_entity_declaration.rb | 36 +++++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 11 deletions(-) create mode 100644 test/parse/test_entity_declaration.rb diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 5ce7b8d9..65bad260 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -274,8 +274,7 @@ def pull_event return [ :elementdecl, @source.match( ELEMENTDECL_PATTERN, true )[1] ] when ENTITY_START - match = @source.match( ENTITYDECL, true ).compact - match[0] = :entitydecl + match = [:entitydecl, *@source.match( ENTITYDECL, true ).captures.compact] ref = false if match[1] == '%' ref = true diff --git a/lib/rexml/source.rb b/lib/rexml/source.rb index 4de68731..390d0ad5 100644 --- a/lib/rexml/source.rb +++ b/lib/rexml/source.rb @@ -70,11 +70,10 @@ def read def match(pattern, cons=false) if cons - @scanner.scan(pattern) + @scanner.scan(pattern).nil? ? nil : @scanner else - @scanner.check(pattern) + @scanner.check(pattern).nil? ? nil : @scanner end - @scanner.matched? ? [@scanner.matched, *@scanner.captures] : nil end # @return true if the Source is exhausted @@ -161,24 +160,24 @@ def read def match( pattern, cons=false ) if cons - @scanner.scan(pattern) + md = @scanner.scan(pattern) else - @scanner.check(pattern) + md = @scanner.check(pattern) end - while !@scanner.matched? and @source + while md.nil? and @source begin @scanner << readline if cons - @scanner.scan(pattern) + md = @scanner.scan(pattern) else - @scanner.check(pattern) + md = @scanner.check(pattern) end rescue @source = nil end end - @scanner.matched? ? [@scanner.matched, *@scanner.captures] : nil + md.nil? ? nil : @scanner end def empty? diff --git a/test/parse/test_entity_declaration.rb b/test/parse/test_entity_declaration.rb new file mode 100644 index 00000000..e15deec6 --- /dev/null +++ b/test/parse/test_entity_declaration.rb @@ -0,0 +1,36 @@ +# frozen_string_literal: false +require 'test/unit' +require 'rexml/document' + +module REXMLTests + class TestParseEntityDeclaration < Test::Unit::TestCase + private + def xml(internal_subset) + <<-XML + + + XML + end + + def parse(internal_subset) + REXML::Document.new(xml(internal_subset)).doctype + end + + def test_empty + exception = assert_raise(REXML::ParseException) do + parse(<<-INTERNAL_SUBSET) + + INTERNAL_SUBSET + end + assert_equal(<<-DETAIL.chomp, exception.to_s) +Malformed notation declaration: name is missing +Line: 5 +Position: 72 +Last 80 unconsumed characters: + ]> + DETAIL + end + end +end