From 1d34947581da3b6cd57769c28a94814bfa3731e8 Mon Sep 17 00:00:00 2001 From: Thomas THIMOTHEE Date: Thu, 21 Mar 2024 22:07:32 +0800 Subject: [PATCH] Correctly consider XML instruct declaration encoding when creating Hash using Ox.load(xml, mode: :hash) (#350) * Correctly consider XML instruct declaration encoding when creating Hash using Ox.load(xml, mode: :hash) * Fix rubocop linting issues * Fix rubocop linting issues * Run clang-format on C files --- Gemfile | 1 + contrib/sax_benchmark.rb | 4 +-- ext/ox/hash_load.c | 58 +++++++++++++++++++++++++++++++--------- ext/ox/ox.h | 2 +- ext/ox/parse.c | 2 +- ext/ox/sax_buf.c | 2 +- ext/ox/slotcache.c | 4 +-- lib/ox/version.rb | 2 +- ox.gemspec | 2 -- test/perf_mars.rb | 2 +- test/perf_obj.rb | 8 +++--- test/sax/sax_test.rb | 7 +++-- test/tests.rb | 9 +++++++ 13 files changed, 71 insertions(+), 32 deletions(-) diff --git a/Gemfile b/Gemfile index 20202fd8..d6260cbe 100644 --- a/Gemfile +++ b/Gemfile @@ -3,5 +3,6 @@ source 'https://rubygems.org' gemspec group :development do + gem 'rake-compiler', '>= 1.2', '< 2.0' gem 'rubocop', '~> 1.47', require: false end diff --git a/contrib/sax_benchmark.rb b/contrib/sax_benchmark.rb index 685b6a92..808833d3 100644 --- a/contrib/sax_benchmark.rb +++ b/contrib/sax_benchmark.rb @@ -31,7 +31,7 @@ }.strip.gsub(/>\s+<') class OxHandler < Ox::Sax - attr :root + attr_reader :root def initialize super @@ -70,7 +70,7 @@ def text(val) end class NokogiriHandler < Nokogiri::XML::SAX::Document - attr :root + attr_reader :root def characters(val) (@node['__content__'] ||= '') << val diff --git a/ext/ox/hash_load.c b/ext/ox/hash_load.c index c65f734b..2afd2514 100644 --- a/ext/ox/hash_load.c +++ b/ext/ox/hash_load.c @@ -93,14 +93,26 @@ static void add_str(PInfo pi, VALUE s) { } static void add_text(PInfo pi, char *text, int closed) { - add_str(pi, rb_str_new2(text)); + VALUE s = rb_str_new2(text); + if (0 != pi->options->rb_enc) { + rb_enc_associate(s, pi->options->rb_enc); + } + add_str(pi, s); } static void add_cdata(PInfo pi, const char *text, size_t len) { - add_str(pi, rb_str_new(text, len)); + VALUE s = rb_str_new2(text); + if (0 != pi->options->rb_enc) { + rb_enc_associate(s, pi->options->rb_enc); + } + add_str(pi, s); } static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) { + VALUE s = rb_str_new2(ename); + if (0 != pi->options->rb_enc) { + rb_enc_associate(s, pi->options->rb_enc); + } if (helper_stack_empty(&pi->helpers)) { create_top(pi); } @@ -111,12 +123,14 @@ static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren volatile VALUE a; for (; 0 != attrs->name; attrs++) { + key = rb_str_new2(attrs->name); + if (0 != pi->options->rb_enc) { + rb_enc_associate(key, pi->options->rb_enc); + } if (Qnil != pi->options->attr_key_mod) { - key = rb_funcall(pi->options->attr_key_mod, ox_call_id, 1, rb_str_new2(attrs->name)); + key = rb_funcall(pi->options->attr_key_mod, ox_call_id, 1, key); } else if (Yes == pi->options->sym_keys) { - key = rb_id2sym(rb_intern(attrs->name)); - } else { - key = rb_str_new2(attrs->name); + key = rb_id2sym(rb_intern_str(key)); } val = rb_str_new2(attrs->value); if (0 != pi->options->rb_enc) { @@ -127,17 +141,21 @@ static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren a = rb_ary_new(); rb_ary_push(a, h); mark_value(pi, a); - helper_stack_push(&pi->helpers, rb_intern(ename), a, ArrayCode); + helper_stack_push(&pi->helpers, rb_intern_str(s), a, ArrayCode); } else { - helper_stack_push(&pi->helpers, rb_intern(ename), Qnil, NoCode); + helper_stack_push(&pi->helpers, rb_intern_str(s), Qnil, NoCode); } } static void add_element_no_attrs(PInfo pi, const char *ename, Attr attrs, int hasChildren) { + VALUE s = rb_str_new2(ename); + if (0 != pi->options->rb_enc) { + rb_enc_associate(s, pi->options->rb_enc); + } if (helper_stack_empty(&pi->helpers)) { create_top(pi); } - helper_stack_push(&pi->helpers, rb_intern(ename), Qnil, NoCode); + helper_stack_push(&pi->helpers, rb_intern_str(s), Qnil, NoCode); } static int umark_hash_cb(VALUE key, VALUE value, VALUE x) { @@ -224,8 +242,22 @@ static void finish(PInfo pi) { xfree(pi->marked); } +static void set_encoding_from_instruct(PInfo pi, Attr attrs) { + for (; 0 != attrs->name; attrs++) { + if (0 == strcmp("encoding", attrs->name)) { + pi->options->rb_enc = rb_enc_find(attrs->value); + } + } +} + +static void instruct(PInfo pi, const char *target, Attr attrs, const char *content) { + if (0 == strcmp("xml", target)) { + set_encoding_from_instruct(pi, attrs); + } +} + struct _parseCallbacks _ox_hash_callbacks = { - NULL, + instruct, NULL, NULL, NULL, @@ -238,7 +270,7 @@ struct _parseCallbacks _ox_hash_callbacks = { ParseCallbacks ox_hash_callbacks = &_ox_hash_callbacks; struct _parseCallbacks _ox_hash_cdata_callbacks = { - NULL, + instruct, NULL, NULL, add_cdata, @@ -251,7 +283,7 @@ struct _parseCallbacks _ox_hash_cdata_callbacks = { ParseCallbacks ox_hash_cdata_callbacks = &_ox_hash_cdata_callbacks; struct _parseCallbacks _ox_hash_no_attrs_callbacks = { - NULL, + instruct, NULL, NULL, NULL, @@ -264,7 +296,7 @@ struct _parseCallbacks _ox_hash_no_attrs_callbacks = { ParseCallbacks ox_hash_no_attrs_callbacks = &_ox_hash_no_attrs_callbacks; struct _parseCallbacks _ox_hash_no_attrs_cdata_callbacks = { - NULL, + instruct, NULL, NULL, add_cdata, diff --git a/ext/ox/ox.h b/ext/ox/ox.h index aa359cb4..b00ee88e 100644 --- a/ext/ox/ox.h +++ b/ext/ox/ox.h @@ -146,7 +146,7 @@ struct _pInfo { VALUE *marked; int mark_size; // allocated size int mark_cnt; - char last; // last character read, rarely set + char last; // last character read, rarely set }; extern VALUE ox_parse(char *xml, size_t len, ParseCallbacks pcb, char **endp, Options options, Err err); diff --git a/ext/ox/parse.c b/ext/ox/parse.c index 5ad225ae..92db8b3f 100644 --- a/ext/ox/parse.c +++ b/ext/ox/parse.c @@ -167,7 +167,7 @@ ox_parse(char *xml, size_t len, ParseCallbacks pcb, char **endp, Options options helper_stack_cleanup(&pi.helpers); return Qnil; } - pi.s++; // past < + pi.s++; // past < switch (*pi.s) { case '?': // processing instruction pi.s++; diff --git a/ext/ox/sax_buf.c b/ext/ox/sax_buf.c index fba290f3..f8e9dc6a 100644 --- a/ext/ox/sax_buf.c +++ b/ext/ox/sax_buf.c @@ -79,7 +79,7 @@ int ox_sax_buf_read(Buf buf) { } else { shift = buf->pro - buf->head - 1; // leave one character so we cab backup one } - if (0 >= shift) { /* no space left so allocate more */ + if (0 >= shift) { /* no space left so allocate more */ char *old = buf->head; size_t size = buf->end - buf->head + BUF_PAD; diff --git a/ext/ox/slotcache.c b/ext/ox/slotcache.c index 2680fb81..c77b1966 100644 --- a/ext/ox/slotcache.c +++ b/ext/ox/slotcache.c @@ -82,8 +82,8 @@ slot_cache_get(SlotCache cache, const char *key, VALUE **slot, const char **keyp orig->key = form_key(key); orig->value = Qundef; } - } else { /* not exact match but on the path */ - if (0 != cache->key) { /* there is a key/value here already */ + } else { /* not exact match but on the path */ + if (0 != cache->key) { /* there is a key/value here already */ if (depth == *cache->key || (255 <= depth && 0 == strncmp(cache->key, key, depth) && '\0' == cache->key[depth])) { /* key belongs here */ continue; diff --git a/lib/ox/version.rb b/lib/ox/version.rb index 5dd9c25e..2c9e525c 100644 --- a/lib/ox/version.rb +++ b/lib/ox/version.rb @@ -1,4 +1,4 @@ module Ox # Current version of the module. - VERSION = '2.14.17' + VERSION = '2.14.18' end diff --git a/ox.gemspec b/ox.gemspec index 4b52eca2..2208b0bf 100644 --- a/ox.gemspec +++ b/ox.gemspec @@ -28,7 +28,5 @@ serialization. } s.required_ruby_version = '>= 2.7.0' - s.add_development_dependency 'rake-compiler', '>= 1.2', '< 2.0' - s.metadata['rubygems_mfa_required'] = 'true' end diff --git a/test/perf_mars.rb b/test/perf_mars.rb index 395e0ed5..94c62b98 100755 --- a/test/perf_mars.rb +++ b/test/perf_mars.rb @@ -58,7 +58,7 @@ def initialize(v=[]) data[:Fixnum].values << ((i - 10) * 101) data[:Float].values << ((i.to_f - 10.7) * 30.5) data[:String].values << "String #{i}" - data[:Symbol].values << "Symbol#{i}".to_sym + data[:Symbol].values << :"Symbol#{i}" data[:Time].values << (Time.now + i) data[:Array].values << [] data[:Hash].values << {} diff --git a/test/perf_obj.rb b/test/perf_obj.rb index 1343eae0..741a24d0 100755 --- a/test/perf_obj.rb +++ b/test/perf_obj.rb @@ -90,7 +90,7 @@ puts 'Load Performance' perf = Perf.new perf.add('Ox', 'load') { Ox.load($xml, mode: :object) } - perf.add('Oj', 'load') { Oj.load($json) } unless (defined?(Oj).nil? || ox_only) + perf.add('Oj', 'load') { Oj.load($json) } unless defined?(Oj).nil? || ox_only perf.add('Marshal', 'load') { Marshal.load($mars) } unless ox_only perf.run($iter) end @@ -100,7 +100,7 @@ puts 'Dump Performance' perf = Perf.new perf.add('Ox', 'dump') { Ox.dump($obj, indent: $indent, circular: $circular) } - perf.add('Oj', 'dump') { Oj.dump($obj) } unless (defined?(Oj).nil? || ox_only) + perf.add('Oj', 'dump') { Oj.dump($obj) } unless defined?(Oj).nil? || ox_only perf.add('Marshal', 'dump') { Marshal.dump($obj) } unless ox_only perf.run($iter) end @@ -110,7 +110,7 @@ puts 'Read from file Performance' perf = Perf.new perf.add('Ox', 'load_file') { Ox.load_file('sample.xml', mode: :object) } - perf.add('Oj', 'load') { Oj.load_file('sample.json') } unless (defined?(Oj).nil? || ox_only) + perf.add('Oj', 'load') { Oj.load_file('sample.json') } unless defined?(Oj).nil? || ox_only perf.add('Marshal', 'load') { Marshal.load(File.new('sample.marshal')) } unless ox_only perf.run($iter) end @@ -120,7 +120,7 @@ puts 'Write to file Performance' perf = Perf.new perf.add('Ox', 'to_file') { Ox.to_file('sample.xml', $obj, indent: $indent, circular: $circular) } - perf.add('Oj', 'to_file') { Oj.to_file('sample.json', $obj) } unless (defined?(Oj).nil? || ox_only) + perf.add('Oj', 'to_file') { Oj.to_file('sample.json', $obj) } unless defined?(Oj).nil? || ox_only perf.add('Marshal', 'dump') { Marshal.dump($obj, File.new('sample.marshal', 'w')) } unless ox_only perf.run($iter) end diff --git a/test/sax/sax_test.rb b/test/sax/sax_test.rb index c7f1745f..fb575a6f 100755 --- a/test/sax/sax_test.rb +++ b/test/sax/sax_test.rb @@ -1436,11 +1436,10 @@ def test_sax_html_attr [:start_element, :html], [:attr, :lang, 'en'], [:start_element, :head], - [:attr, :url, "http://ohler.com?x=2"], - [:text, " "], + [:attr, :url, 'http://ohler.com?x=2'], + [:text, ' '], [:end_element, :head], - [:end_element, :html], + [:end_element, :html] ], handler.calls) end - end diff --git a/test/tests.rb b/test/tests.rb index 760cde41..bfc1cb78 100755 --- a/test/tests.rb +++ b/test/tests.rb @@ -938,6 +938,15 @@ def test_full_encoding assert_equal(xml, dumped) end + def test_full_encoding_mode_hash + Ox.default_options = $ox_generic_options + xml = %{ +<いち name="ピーター" つま="まきえ">ピーター +} + obj = Ox.load(xml, mode: :hash) + assert_equal(obj, { :いち => [{ :name => 'ピーター', :つま => 'まきえ' }, 'ピーター'] }) + end + def test_obj_encoding Ox.default_options = $ox_object_options if RUBY_VERSION.start_with?('1.8')