Skip to content

Commit

Permalink
Optimize BaseParser#unnormalize method
Browse files Browse the repository at this point in the history
## Benchmark
```
RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.3/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml
ruby 3.3.3 (2024-06-12 revision f1c7b6f435) [arm64-darwin22]
Calculating -------------------------------------
                         before       after  before(YJIT)  after(YJIT)
                 dom     17.776      17.551        32.512       32.658 i/s -     100.000 times in 5.625671s 5.697584s 3.075829s 3.062028s
                 sax     25.326      25.243        50.344       50.082 i/s -     100.000 times in 3.948501s 3.961519s 1.986315s 1.996706s
                pull     29.015      29.066        61.714       61.798 i/s -     100.000 times in 3.446473s 3.440471s 1.620377s 1.618183s
              stream     28.201      29.706        55.646       59.422 i/s -     100.000 times in 3.545943s 3.366298s 1.797077s 1.682876s

Comparison:
                              dom
         after(YJIT):        32.7 i/s
        before(YJIT):        32.5 i/s - 1.00x  slower
              before:        17.8 i/s - 1.84x  slower
               after:        17.6 i/s - 1.86x  slower

                              sax
        before(YJIT):        50.3 i/s
         after(YJIT):        50.1 i/s - 1.01x  slower
              before:        25.3 i/s - 1.99x  slower
               after:        25.2 i/s - 1.99x  slower

                             pull
         after(YJIT):        61.8 i/s
        before(YJIT):        61.7 i/s - 1.00x  slower
               after:        29.1 i/s - 2.13x  slower
              before:        29.0 i/s - 2.13x  slower

                           stream
         after(YJIT):        59.4 i/s
        before(YJIT):        55.6 i/s - 1.07x  slower
               after:        29.7 i/s - 2.00x  slower
              before:        28.2 i/s - 2.11x  slower

```

- YJIT=ON : 0.99x - 1.07x faster
- YJIT=OFF : 0.98x - 1.05x faster
  • Loading branch information
naitoh committed Jun 23, 2024
1 parent 8a1b4f5 commit e1f4758
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions lib/rexml/parsers/baseparser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,12 @@ module Private
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
CHARACTER_REFERENCES = /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/
DEFAULT_ENTITIES_PATTERNS = {}
default_entities = ['gt', 'lt', 'quot', 'apos', 'amp']
default_entities.each do |term|
DEFAULT_ENTITIES_PATTERNS[term] = /&#{term};/
end
end
private_constant :Private

Expand Down Expand Up @@ -506,7 +512,7 @@ def normalize( input, entities=nil, entity_filter=nil )
def unnormalize( rv, entities=nil, filter=nil )
matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0
rv.gsub!( /&#0*((?:\d+)|(?:x[a-fA-F0-9]+));/ ) {
rv.gsub!( Private::CHARACTER_REFERENCES) {
m=$1
m = "0#{m}" if m[0] == ?x
[Integer(m)].pack('U*')
Expand All @@ -517,15 +523,15 @@ def unnormalize( rv, entities=nil, filter=nil )
unless filter and filter.include?(entity_reference)
entity_value = entity( entity_reference, entities )
if entity_value
re = /&#{entity_reference};/
re = Private::DEFAULT_ENTITIES_PATTERNS[entity_reference] || /&#{entity_reference};/
rv.gsub!( re, entity_value )
else
er = DEFAULT_ENTITIES[entity_reference]
rv.gsub!( er[0], er[2] ) if er
end
end
end
rv.gsub!( /&/, '&' )
rv.gsub!( Private::DEFAULT_ENTITIES_PATTERNS['amp'], '&' )
end
rv
end
Expand Down

0 comments on commit e1f4758

Please sign in to comment.