From c8fa85aded50f015e4bd513f6e9edfa97f2d5f84 Mon Sep 17 00:00:00 2001 From: Watson Date: Thu, 11 Jul 2024 14:32:27 +0900 Subject: [PATCH 1/2] Fix performance issue caused by using repeated `>` characters inside `]>` A `<` is treated as a string delimiter. In certain cases, if `<` is used in succession, read and match are repeated, which slows down the process. Therefore, the following is used to read ahead to a specific part of the string in advance. --- lib/rexml/parsers/baseparser.rb | 8 ++++++-- test/parse/test_entity_declaration.rb | 7 +++++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 4fcdaba7..e8f1a069 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -124,11 +124,15 @@ class BaseParser } module Private - INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um + # Terminal requires two or more letters. INSTRUCTION_TERM = "?>" COMMENT_TERM = "-->" CDATA_TERM = "]]>" DOCTYPE_TERM = "]>" + # Read to the end of DOCTYPE because there is no proper ENTITY termination + ENTITY_TERM = DOCTYPE_TERM + + INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um @@ -313,7 +317,7 @@ def pull_event raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil? return [ :elementdecl, " ]> DETAIL end + + def test_gt_linear_performance_entity + seq = [10000, 50000, 100000, 150000, 200000] + assert_linear_performance(seq, rehearsal: 10) do |n| + REXML::Document.new('' * n + '">') + end + end end end From 4a34d6fbca4a7f78c546aa76d8504dd93642946b Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Tue, 16 Jul 2024 11:21:07 +0900 Subject: [PATCH 2/2] Remove redundant "entity" --- test/parse/test_entity_declaration.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/parse/test_entity_declaration.rb b/test/parse/test_entity_declaration.rb index 49e4ff3d..07529016 100644 --- a/test/parse/test_entity_declaration.rb +++ b/test/parse/test_entity_declaration.rb @@ -33,7 +33,7 @@ def test_empty DETAIL end - def test_gt_linear_performance_entity + def test_gt_linear_performance seq = [10000, 50000, 100000, 150000, 200000] assert_linear_performance(seq, rehearsal: 10) do |n| REXML::Document.new('' * n + '">')