Skip to content

Commit

Permalink
Correctly consider XML instruct declaration encoding when creating Ha…
Browse files Browse the repository at this point in the history
…sh using Ox.load(xml, mode: :hash) (#350)

* Correctly consider XML instruct declaration encoding when creating Hash using Ox.load(xml, mode: :hash)

* Fix rubocop linting issues

* Fix rubocop linting issues

* Run clang-format on C files
  • Loading branch information
Uelb authored Mar 21, 2024
1 parent ede3efe commit 1d34947
Show file tree
Hide file tree
Showing 13 changed files with 71 additions and 32 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ source 'https://rubygems.org'
gemspec

group :development do
gem 'rake-compiler', '>= 1.2', '< 2.0'
gem 'rubocop', '~> 1.47', require: false
end
4 changes: 2 additions & 2 deletions contrib/sax_benchmark.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
}.strip.gsub(/>\s+</, '><')

class OxHandler < Ox::Sax
attr :root
attr_reader :root

def initialize
super
Expand Down Expand Up @@ -70,7 +70,7 @@ def text(val)
end

class NokogiriHandler < Nokogiri::XML::SAX::Document
attr :root
attr_reader :root

def characters(val)
(@node['__content__'] ||= '') << val
Expand Down
58 changes: 45 additions & 13 deletions ext/ox/hash_load.c
Original file line number Diff line number Diff line change
Expand Up @@ -93,14 +93,26 @@ static void add_str(PInfo pi, VALUE s) {
}

static void add_text(PInfo pi, char *text, int closed) {
add_str(pi, rb_str_new2(text));
VALUE s = rb_str_new2(text);
if (0 != pi->options->rb_enc) {
rb_enc_associate(s, pi->options->rb_enc);
}
add_str(pi, s);
}

static void add_cdata(PInfo pi, const char *text, size_t len) {
add_str(pi, rb_str_new(text, len));
VALUE s = rb_str_new2(text);
if (0 != pi->options->rb_enc) {
rb_enc_associate(s, pi->options->rb_enc);
}
add_str(pi, s);
}

static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
VALUE s = rb_str_new2(ename);
if (0 != pi->options->rb_enc) {
rb_enc_associate(s, pi->options->rb_enc);
}
if (helper_stack_empty(&pi->helpers)) {
create_top(pi);
}
Expand All @@ -111,12 +123,14 @@ static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren
volatile VALUE a;

for (; 0 != attrs->name; attrs++) {
key = rb_str_new2(attrs->name);
if (0 != pi->options->rb_enc) {
rb_enc_associate(key, pi->options->rb_enc);
}
if (Qnil != pi->options->attr_key_mod) {
key = rb_funcall(pi->options->attr_key_mod, ox_call_id, 1, rb_str_new2(attrs->name));
key = rb_funcall(pi->options->attr_key_mod, ox_call_id, 1, key);
} else if (Yes == pi->options->sym_keys) {
key = rb_id2sym(rb_intern(attrs->name));
} else {
key = rb_str_new2(attrs->name);
key = rb_id2sym(rb_intern_str(key));
}
val = rb_str_new2(attrs->value);
if (0 != pi->options->rb_enc) {
Expand All @@ -127,17 +141,21 @@ static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren
a = rb_ary_new();
rb_ary_push(a, h);
mark_value(pi, a);
helper_stack_push(&pi->helpers, rb_intern(ename), a, ArrayCode);
helper_stack_push(&pi->helpers, rb_intern_str(s), a, ArrayCode);
} else {
helper_stack_push(&pi->helpers, rb_intern(ename), Qnil, NoCode);
helper_stack_push(&pi->helpers, rb_intern_str(s), Qnil, NoCode);
}
}

static void add_element_no_attrs(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
VALUE s = rb_str_new2(ename);
if (0 != pi->options->rb_enc) {
rb_enc_associate(s, pi->options->rb_enc);
}
if (helper_stack_empty(&pi->helpers)) {
create_top(pi);
}
helper_stack_push(&pi->helpers, rb_intern(ename), Qnil, NoCode);
helper_stack_push(&pi->helpers, rb_intern_str(s), Qnil, NoCode);
}

static int umark_hash_cb(VALUE key, VALUE value, VALUE x) {
Expand Down Expand Up @@ -224,8 +242,22 @@ static void finish(PInfo pi) {
xfree(pi->marked);
}

static void set_encoding_from_instruct(PInfo pi, Attr attrs) {
for (; 0 != attrs->name; attrs++) {
if (0 == strcmp("encoding", attrs->name)) {
pi->options->rb_enc = rb_enc_find(attrs->value);
}
}
}

static void instruct(PInfo pi, const char *target, Attr attrs, const char *content) {
if (0 == strcmp("xml", target)) {
set_encoding_from_instruct(pi, attrs);
}
}

struct _parseCallbacks _ox_hash_callbacks = {
NULL,
instruct,
NULL,
NULL,
NULL,
Expand All @@ -238,7 +270,7 @@ struct _parseCallbacks _ox_hash_callbacks = {
ParseCallbacks ox_hash_callbacks = &_ox_hash_callbacks;

struct _parseCallbacks _ox_hash_cdata_callbacks = {
NULL,
instruct,
NULL,
NULL,
add_cdata,
Expand All @@ -251,7 +283,7 @@ struct _parseCallbacks _ox_hash_cdata_callbacks = {
ParseCallbacks ox_hash_cdata_callbacks = &_ox_hash_cdata_callbacks;

struct _parseCallbacks _ox_hash_no_attrs_callbacks = {
NULL,
instruct,
NULL,
NULL,
NULL,
Expand All @@ -264,7 +296,7 @@ struct _parseCallbacks _ox_hash_no_attrs_callbacks = {
ParseCallbacks ox_hash_no_attrs_callbacks = &_ox_hash_no_attrs_callbacks;

struct _parseCallbacks _ox_hash_no_attrs_cdata_callbacks = {
NULL,
instruct,
NULL,
NULL,
add_cdata,
Expand Down
2 changes: 1 addition & 1 deletion ext/ox/ox.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ struct _pInfo {
VALUE *marked;
int mark_size; // allocated size
int mark_cnt;
char last; // last character read, rarely set
char last; // last character read, rarely set
};

extern VALUE ox_parse(char *xml, size_t len, ParseCallbacks pcb, char **endp, Options options, Err err);
Expand Down
2 changes: 1 addition & 1 deletion ext/ox/parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ ox_parse(char *xml, size_t len, ParseCallbacks pcb, char **endp, Options options
helper_stack_cleanup(&pi.helpers);
return Qnil;
}
pi.s++; // past <
pi.s++; // past <
switch (*pi.s) {
case '?': // processing instruction
pi.s++;
Expand Down
2 changes: 1 addition & 1 deletion ext/ox/sax_buf.c
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ int ox_sax_buf_read(Buf buf) {
} else {
shift = buf->pro - buf->head - 1; // leave one character so we cab backup one
}
if (0 >= shift) { /* no space left so allocate more */
if (0 >= shift) { /* no space left so allocate more */
char *old = buf->head;
size_t size = buf->end - buf->head + BUF_PAD;

Expand Down
4 changes: 2 additions & 2 deletions ext/ox/slotcache.c
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,8 @@ slot_cache_get(SlotCache cache, const char *key, VALUE **slot, const char **keyp
orig->key = form_key(key);
orig->value = Qundef;
}
} else { /* not exact match but on the path */
if (0 != cache->key) { /* there is a key/value here already */
} else { /* not exact match but on the path */
if (0 != cache->key) { /* there is a key/value here already */
if (depth == *cache->key || (255 <= depth && 0 == strncmp(cache->key, key, depth) &&
'\0' == cache->key[depth])) { /* key belongs here */
continue;
Expand Down
2 changes: 1 addition & 1 deletion lib/ox/version.rb
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
module Ox
# Current version of the module.
VERSION = '2.14.17'
VERSION = '2.14.18'
end
2 changes: 0 additions & 2 deletions ox.gemspec
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,5 @@ serialization. }

s.required_ruby_version = '>= 2.7.0'

s.add_development_dependency 'rake-compiler', '>= 1.2', '< 2.0'

s.metadata['rubygems_mfa_required'] = 'true'
end
2 changes: 1 addition & 1 deletion test/perf_mars.rb
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def initialize(v=[])
data[:Fixnum].values << ((i - 10) * 101)
data[:Float].values << ((i.to_f - 10.7) * 30.5)
data[:String].values << "String #{i}"
data[:Symbol].values << "Symbol#{i}".to_sym
data[:Symbol].values << :"Symbol#{i}"
data[:Time].values << (Time.now + i)
data[:Array].values << []
data[:Hash].values << {}
Expand Down
8 changes: 4 additions & 4 deletions test/perf_obj.rb
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@
puts 'Load Performance'
perf = Perf.new
perf.add('Ox', 'load') { Ox.load($xml, mode: :object) }
perf.add('Oj', 'load') { Oj.load($json) } unless (defined?(Oj).nil? || ox_only)
perf.add('Oj', 'load') { Oj.load($json) } unless defined?(Oj).nil? || ox_only
perf.add('Marshal', 'load') { Marshal.load($mars) } unless ox_only
perf.run($iter)
end
Expand All @@ -100,7 +100,7 @@
puts 'Dump Performance'
perf = Perf.new
perf.add('Ox', 'dump') { Ox.dump($obj, indent: $indent, circular: $circular) }
perf.add('Oj', 'dump') { Oj.dump($obj) } unless (defined?(Oj).nil? || ox_only)
perf.add('Oj', 'dump') { Oj.dump($obj) } unless defined?(Oj).nil? || ox_only
perf.add('Marshal', 'dump') { Marshal.dump($obj) } unless ox_only
perf.run($iter)
end
Expand All @@ -110,7 +110,7 @@
puts 'Read from file Performance'
perf = Perf.new
perf.add('Ox', 'load_file') { Ox.load_file('sample.xml', mode: :object) }
perf.add('Oj', 'load') { Oj.load_file('sample.json') } unless (defined?(Oj).nil? || ox_only)
perf.add('Oj', 'load') { Oj.load_file('sample.json') } unless defined?(Oj).nil? || ox_only
perf.add('Marshal', 'load') { Marshal.load(File.new('sample.marshal')) } unless ox_only
perf.run($iter)
end
Expand All @@ -120,7 +120,7 @@
puts 'Write to file Performance'
perf = Perf.new
perf.add('Ox', 'to_file') { Ox.to_file('sample.xml', $obj, indent: $indent, circular: $circular) }
perf.add('Oj', 'to_file') { Oj.to_file('sample.json', $obj) } unless (defined?(Oj).nil? || ox_only)
perf.add('Oj', 'to_file') { Oj.to_file('sample.json', $obj) } unless defined?(Oj).nil? || ox_only
perf.add('Marshal', 'dump') { Marshal.dump($obj, File.new('sample.marshal', 'w')) } unless ox_only
perf.run($iter)
end
7 changes: 3 additions & 4 deletions test/sax/sax_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1436,11 +1436,10 @@ def test_sax_html_attr
[:start_element, :html],
[:attr, :lang, 'en'],
[:start_element, :head],
[:attr, :url, "http://ohler.com?x=2"],
[:text, " "],
[:attr, :url, 'http://ohler.com?x=2'],
[:text, ' '],
[:end_element, :head],
[:end_element, :html],
[:end_element, :html]
], handler.calls)
end

end
9 changes: 9 additions & 0 deletions test/tests.rb
Original file line number Diff line number Diff line change
Expand Up @@ -938,6 +938,15 @@ def test_full_encoding
assert_equal(xml, dumped)
end

def test_full_encoding_mode_hash
Ox.default_options = $ox_generic_options
xml = %{<?xml version="1.0" encoding="UTF-8"?>
<いち name="ピーター" つま="まきえ">ピーター</いち>
}
obj = Ox.load(xml, mode: :hash)
assert_equal(obj, { :いち => [{ :name => 'ピーター', :つ => 'まきえ' }, 'ピーター'] })
end

def test_obj_encoding
Ox.default_options = $ox_object_options
if RUBY_VERSION.start_with?('1.8')
Expand Down

0 comments on commit 1d34947

Please sign in to comment.