Skip to content

Commit

Permalink
Do not output :text event after the root tag is closed (#167)
Browse files Browse the repository at this point in the history
## Why?
GitHub: fix GH-163

## Change
- sax_test.rb
```
require 'rexml/parsers/sax2parser'
require 'rexml/parsers/pullparser'
require 'rexml/parsers/streamparser'

require 'libxml-ruby'
require 'nokogiri'

xml = <<EOS
<root> a b  c
</root>
<!-- ok comment -->
<?abc version="1.0" ?>
EOS

class Listener
  def method_missing(name, *args)
    p [name, *args]
  end
end

puts "LibXML(SAX)"
parser = LibXML::XML::SaxParser.string(xml)
parser.callbacks = Listener.new
parser.parse

puts ""
puts "Nokogiri(SAX)"
parser = Nokogiri::XML::SAX::Parser.new(Listener.new)
parser.parse(xml)

puts ""
puts "REXML(SAX)"
parser = REXML::Parsers::SAX2Parser.new(xml)
parser.listen(Listener.new)
parser.parse

puts ""
puts "REXML(Pull)"
parser = REXML::Parsers::PullParser.new(xml)
while parser.has_next?
    res = parser.pull
    p res
end

puts ""
puts "REXML(Stream)"
parser = REXML::Parsers::StreamParser.new(xml, Listener.new).parse
```

## Before (rexml 3.3.1)
```
LibXML(SAX)
[:on_start_document]
[:on_start_element_ns, "root", {}, nil, nil, {}]
[:on_characters, " a b  c \n"]
[:on_end_element_ns, "root", nil, nil]
[:on_comment, " ok comment "]
[:on_processing_instruction, "abc", "version=\"1.0\" "]
[:on_end_document]

Nokogiri(SAX)
[:start_document]
[:start_element_namespace, "root", [], nil, nil, []]
[:characters, " a b  c \n"]
[:end_element_namespace, "root", nil, nil]
[:comment, " ok comment "]
[:processing_instruction, "abc", "version=\"1.0\" "]
[:end_document]

REXML(SAX)
[:start_document]
[:start_element, nil, "root", "root", {}]
[:progress, 6]
[:characters, " a b  c \n"]
[:progress, 15]
[:end_element, nil, "root", "root"]
[:progress, 22]
[:characters, "\n"]
[:progress, 23]
[:comment, " ok comment "]
[:progress, 42]
[:characters, "\n"]
[:progress, 43]
[:processing_instruction, "abc", " version=\"1.0\" "]
[:progress, 65]
[:characters, "\n"]
[:progress, 66]
[:end_document]

REXML(Pull)
start_element: ["root", {}]
text: [" a b  c \n", " a b  c \n"]
end_element: ["root"]
text: ["\n", "\n"]
comment: [" ok comment "]
text: ["\n", "\n"]
processing_instruction: ["abc", " version=\"1.0\" "]
text: ["\n", "\n"]

REXML(Stream)
[:tag_start, "root", {}]
[:text, " a b  c \n"]
[:tag_end, "root"]
[:text, "\n"]
[:comment, " ok comment "]
[:text, "\n"]
[:instruction, "abc", " version=\"1.0\" "]
[:text, "\n"]
```

## After(This PR)
```
REXML(SAX)
[:start_document]
[:start_element, nil, "root", "root", {}]
[:progress, 6]
[:characters, " a b  c \n"]
[:progress, 15]
[:end_element, nil, "root", "root"]
[:progress, 22]
[:comment, " ok comment "]
[:progress, 42]
[:processing_instruction, "abc", " version=\"1.0\" "]
[:progress, 65]
[:end_document]

REXML(Pull)
start_element: ["root", {}]
text: [" a b  c \n", " a b  c \n"]
end_element: ["root"]
comment: [" ok comment "]
processing_instruction: ["abc", " version=\"1.0\" "]
end_document: []

REXML(Stream)
[:tag_start, "root", {}]
[:text, " a b  c \n"]
[:tag_end, "root"]
[:comment, " ok comment "]
[:instruction, "abc", " version=\"1.0\" "]
```
  • Loading branch information
naitoh authored Jul 12, 2024
1 parent 20f8084 commit a5075c1
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 3 deletions.
1 change: 1 addition & 0 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ def pull_event
unless /\A\s*\z/.match?(text)
raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
end
return pull_event
end
return [ :text, text ]
end
Expand Down
15 changes: 15 additions & 0 deletions test/parse/test_text.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,20 @@ def test_after_root
DETAIL
end
end

def test_whitespace_characters_after_root
parser = REXML::Parsers::BaseParser.new('<a>b</a> ')

events = []
while parser.has_next?
event = parser.pull
case event[0]
when :text
events << event[1]
end
end

assert_equal(["b"], events)
end
end
end
1 change: 0 additions & 1 deletion test/parser/test_ultra_light.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ def test_entity_declaration
[:entitydecl, "name", "value"]
],
[:start_element, :parent, "root", {}],
[:text, "\n"],
],
parse(<<-INTERNAL_SUBSET))
<!ENTITY name "value">
Expand Down
2 changes: 1 addition & 1 deletion test/test_core.rb
Original file line number Diff line number Diff line change
Expand Up @@ -826,7 +826,7 @@ def test_deep_clone
end

def test_whitespace_before_root
a = <<EOL
a = <<EOL.chomp
<?xml version='1.0'?>
<blo>
<wak>
Expand Down
2 changes: 1 addition & 1 deletion test/test_document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@ def test_utf_16

actual_xml = ""
document.write(actual_xml)
expected_xml = <<-EOX.encode("UTF-16BE")
expected_xml = <<-EOX.chomp.encode("UTF-16BE")
\ufeff<?xml version='1.0' encoding='UTF-16'?>
<message>Hello world!</message>
EOX
Expand Down

0 comments on commit a5075c1

Please sign in to comment.