From 3f4002af60b03e1486c5aef1b2b1cc2442aed83b Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 21 Jul 2022 16:41:00 -0400 Subject: [PATCH 1/6] ci: update vmactions/freebsd-vm job config Fixes #2600 --- .github/workflows/ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f5e9cd8ec13..50b05b092ea 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -282,12 +282,12 @@ jobs: fail-fast: false matrix: sys: ["enable", "disable"] - runs-on: macos-10.15 + runs-on: macos-12 steps: - uses: actions/checkout@v2 with: submodules: true - - uses: vmactions/freebsd-vm@v0.1.5 + - uses: vmactions/freebsd-vm@v0.2.0 with: usesh: true prepare: pkg install -y ruby devel/ruby-gems pkgconf libxml2 libxslt From 0c048067f5d5993e5617fc6c0e572dfc52d5b5a6 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 21 Jul 2022 14:31:02 -0400 Subject: [PATCH 2/6] ci: import the downstream pipeline from main --- .github/workflows/downstream.yml | 67 ++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) create mode 100644 .github/workflows/downstream.yml diff --git a/.github/workflows/downstream.yml b/.github/workflows/downstream.yml new file mode 100644 index 00000000000..5ea5371b76e --- /dev/null +++ b/.github/workflows/downstream.yml @@ -0,0 +1,67 @@ +name: downstream +concurrency: + group: "${{github.workflow}}-${{github.ref}}" + cancel-in-progress: true +on: + workflow_dispatch: + schedule: + - cron: "0 8 * * 1,3,5" # At 08:00 on Monday, Wednesday, and Friday # https://crontab.guru/#0_8_*_*_1,3,5 + push: + branches: + - main + - v*.*.x + tags: + - v*.*.* + pull_request: + types: [opened, synchronize] + branches: + - '*' + +jobs: + downstream: + name: downstream-${{matrix.name}} + strategy: + fail-fast: false + matrix: + include: + - url: https://github.com/flavorjones/loofah + name: loofah + command: "bundle exec rake test" + - url: https://github.com/rails/rails-html-sanitizer + name: rails-html-sanitizer + command: "bundle exec rake test" + - url: https://github.com/rgrove/sanitize + name: sanitize + command: "bundle exec rake test" + - url: https://github.com/ebeigarts/signer + name: signer + command: "bundle exec rake spec" + - url: https://github.com/WinRb/Viewpoint + name: viewpoint + command: "bundle exec rspec spec" + - url: https://github.com/rails/rails + name: xmlmini + command: "cd activesupport && bundle exec rake test TESTOPTS=-n/XmlMini/" + runs-on: ubuntu-latest + container: + image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 + steps: + - uses: actions/checkout@v2 + with: + submodules: true + - uses: actions/cache@v2 + with: + path: ports + key: ports-ubuntu-${{hashFiles('dependencies.yml', 'patches/**/*.patch')}} + - run: bundle install --local || bundle install + - run: bundle exec rake compile + - run: | + git clone --depth=1 ${{matrix.url}} ${{matrix.name}} + cd ${{matrix.name}} + if grep nokogiri Gemfile ; then + sed -i 's/\(.*nokogiri.*\)/\1, path: ".."/' Gemfile + else + echo "gem 'nokogiri', path: '..'" >> Gemfile + fi + bundle install --local || bundle install + ${{matrix.command}} From 193a07d3cb2c80ab5f5739d0761479aa4fa5e807 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 21 Jul 2022 12:36:05 -0400 Subject: [PATCH 3/6] ci: add creek to the downstream pipeline --- .github/workflows/downstream.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/downstream.yml b/.github/workflows/downstream.yml index 5ea5371b76e..7278f3f5462 100644 --- a/.github/workflows/downstream.yml +++ b/.github/workflows/downstream.yml @@ -42,6 +42,9 @@ jobs: - url: https://github.com/rails/rails name: xmlmini command: "cd activesupport && bundle exec rake test TESTOPTS=-n/XmlMini/" + - url: https://github.com/pythonicrubyist/creek + name: creek + command: "bundle exec rake spec" runs-on: ubuntu-latest container: image: ghcr.io/sparklemotion/nokogiri-test:mri-3.1 @@ -63,5 +66,8 @@ jobs: else echo "gem 'nokogiri', path: '..'" >> Gemfile fi + if egrep "add_development_dependency.*\bbundler\b" *gemspec ; then + sed -i 's/.*add_development_dependency.*\bbundler\b.*//' *gemspec + fi bundle install --local || bundle install ${{matrix.command}} From 12874a7a6b43db2f75be96d9cc77fe426d5ec433 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 21 Jul 2022 12:21:45 -0400 Subject: [PATCH 4/6] feat: Reader#attribute_hash which is now being used to implement Reader#attributes fix: Reader#namespaces on JRuby now returns an empty hash when no attributes are present (previously returned nil) --- ext/java/nokogiri/XmlReader.java | 7 +++ ext/java/nokogiri/internals/ReaderNode.java | 5 +- ext/nokogiri/xml_reader.c | 43 ++++++++++++++++ lib/nokogiri/xml/reader.rb | 7 +-- test/xml/test_reader.rb | 56 +++++++++++++++++---- 5 files changed, 101 insertions(+), 17 deletions(-) diff --git a/ext/java/nokogiri/XmlReader.java b/ext/java/nokogiri/XmlReader.java index f8b01d550a7..615eb85d3e3 100644 --- a/ext/java/nokogiri/XmlReader.java +++ b/ext/java/nokogiri/XmlReader.java @@ -144,6 +144,13 @@ public class XmlReader extends RubyObject return currentNode().getAttributesNodes(); } + @JRubyMethod + public IRubyObject + attribute_hash(ThreadContext context) + { + return currentNode().getAttributes(context); + } + @JRubyMethod(name = "attributes?") public IRubyObject attributes_p(ThreadContext context) diff --git a/ext/java/nokogiri/internals/ReaderNode.java b/ext/java/nokogiri/internals/ReaderNode.java index f671075db9f..4dc17f62e4d 100644 --- a/ext/java/nokogiri/internals/ReaderNode.java +++ b/ext/java/nokogiri/internals/ReaderNode.java @@ -112,9 +112,10 @@ public abstract class ReaderNode getAttributes(ThreadContext context) { final Ruby runtime = context.runtime; - if (attributeList == null) { return runtime.getNil(); } RubyHash hash = RubyHash.newHash(runtime); + if (attributeList == null) { return hash; } for (int i = 0; i < attributeList.length; i++) { + if (isNamespace(attributeList.names.get(i))) { continue; } IRubyObject k = stringOrBlank(runtime, attributeList.names.get(i)); IRubyObject v = stringOrBlank(runtime, attributeList.values.get(i)); hash.fastASetCheckString(runtime, k, v); // hash.op_aset(context, k, v) @@ -150,8 +151,8 @@ public abstract class ReaderNode getNamespaces(ThreadContext context) { final Ruby runtime = context.runtime; - if (namespaces == null) { return runtime.getNil(); } RubyHash hash = RubyHash.newHash(runtime); + if (namespaces == null) { return hash; } for (Map.Entry entry : namespaces.entrySet()) { IRubyObject k = stringOrBlank(runtime, entry.getKey()); IRubyObject v = stringOrBlank(runtime, entry.getValue()); diff --git a/ext/nokogiri/xml_reader.c b/ext/nokogiri/xml_reader.c index 4f87e18f144..81f4bdc2755 100644 --- a/ext/nokogiri/xml_reader.c +++ b/ext/nokogiri/xml_reader.c @@ -31,6 +31,7 @@ has_attributes(xmlTextReaderPtr reader) return (0); } +// TODO: merge this function into the `namespaces` method implementation static void Nokogiri_xml_node_namespaces(xmlNodePtr node, VALUE attr_hash) { @@ -181,6 +182,47 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader) return attr_nodes; } +/* + :call-seq: attribute_hash() → Hash + + Get the attributes of the current node as a Hash of names and values. + + See related: #attributes and #namespaces + */ +static VALUE +rb_xml_reader_attribute_hash(VALUE rb_reader) +{ + VALUE rb_attributes = rb_hash_new(); + xmlTextReaderPtr c_reader; + xmlNodePtr c_node; + xmlAttrPtr c_property; + + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); + + if (!has_attributes(c_reader)) { + return rb_attributes; + } + + c_node = xmlTextReaderExpand(c_reader); + c_property = c_node->properties; + while (c_property != NULL) { + VALUE rb_name = NOKOGIRI_STR_NEW2(c_property->name); + VALUE rb_value = Qnil; + xmlChar *c_value = xmlNodeGetContent((xmlNode *)c_property); + + if (c_value) { + rb_value = NOKOGIRI_STR_NEW2(c_value); + xmlFree(c_value); + } + + rb_hash_aset(rb_attributes, rb_name, rb_value); + + c_property = c_property->next; + } + + return rb_attributes; +} + /* * call-seq: * attribute_at(index) @@ -696,6 +738,7 @@ noko_init_xml_reader() rb_define_method(cNokogiriXmlReader, "attribute_at", attribute_at, 1); rb_define_method(cNokogiriXmlReader, "attribute_count", attribute_count, 0); rb_define_method(cNokogiriXmlReader, "attribute_nodes", rb_xml_reader_attribute_nodes, 0); + rb_define_method(cNokogiriXmlReader, "attribute_hash", rb_xml_reader_attribute_hash, 0); rb_define_method(cNokogiriXmlReader, "attributes?", attributes_eh, 0); rb_define_method(cNokogiriXmlReader, "base_uri", rb_xml_reader_base_uri, 0); rb_define_method(cNokogiriXmlReader, "default?", default_eh, 0); diff --git a/lib/nokogiri/xml/reader.rb b/lib/nokogiri/xml/reader.rb index df13216afed..1a007963598 100644 --- a/lib/nokogiri/xml/reader.rb +++ b/lib/nokogiri/xml/reader.rb @@ -87,12 +87,7 @@ def initialize(source, url = nil, encoding = nil) # :nodoc: # # [Returns] (Hash) Attribute names and values def attributes - attrs_hash = attribute_nodes.each_with_object({}) do |node, hash| - hash[node.name] = node.to_s - end - ns = namespaces - attrs_hash.merge!(ns) if ns - attrs_hash + attribute_hash.merge(namespaces) end ### diff --git a/test/xml/test_reader.rb b/test/xml/test_reader.rb index b3019691e6a..5b7e888203a 100644 --- a/test/xml/test_reader.rb +++ b/test/xml/test_reader.rb @@ -228,23 +228,61 @@ def test_attributes? reader.map(&:attributes?)) end - def test_attributes - reader = Nokogiri::XML::Reader.from_memory(<<-eoxml) - - snuggles! - - eoxml + def test_reader_attributes + reader = Nokogiri::XML::Reader.from_memory(<<~XML) + + snuggles! + + XML assert_empty(reader.attributes) assert_equal([{ "xmlns:tenderlove" => "http://tenderlovemaking.com/", "xmlns" => "http://mothership.connection.com/", }, - {}, { "awesome" => "true" }, {}, { "awesome" => "true" }, {}, + {}, + { "awesome" => "true" }, + {}, + { "awesome" => "true" }, + {}, { "xmlns:tenderlove" => "http://tenderlovemaking.com/", "xmlns" => "http://mothership.connection.com/", },], reader.map(&:attributes)) end + def test_reader_attributes_hash + reader = Nokogiri::XML::Reader.from_memory(<<~XML) + + snuggles! + + XML + assert_empty(reader.attribute_hash) + assert_equal([{}, + {}, + { "awesome" => "true" }, + {}, + { "awesome" => "true" }, + {}, + {},], + reader.map(&:attribute_hash)) + end + + def test_reader_namespaces + reader = Nokogiri::XML::Reader.from_memory(<<~XML) + + snuggles! + + XML + assert_empty(reader.namespaces) + assert_equal([{ "xmlns:tenderlove" => "http://tenderlovemaking.com/", + "xmlns" => "http://mothership.connection.com/", }, + {}, + {}, + {}, + {}, + {}, + { "xmlns:tenderlove" => "http://tenderlovemaking.com/", + "xmlns" => "http://mothership.connection.com/", },], + reader.map(&:namespaces)) + end + def test_attribute_roundtrip reader = Nokogiri::XML::Reader.from_memory(<<-eoxml) Date: Thu, 21 Jul 2022 10:38:32 -0400 Subject: [PATCH 5/6] dev: introduce NOKO_WARN_DEPRECATION macro which will use rb_category_warning if it's available, and rb_warning if it's not --- ext/nokogiri/extconf.rb | 1 + ext/nokogiri/nokogiri.h | 6 ++++++ ext/nokogiri/xml_node.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/ext/nokogiri/extconf.rb b/ext/nokogiri/extconf.rb index 333155eea7e..46593b9cae9 100644 --- a/ext/nokogiri/extconf.rb +++ b/ext/nokogiri/extconf.rb @@ -974,6 +974,7 @@ def compile have_func("xmlSchemaSetValidStructuredErrors") # introduced in libxml 2.6.23 have_func("xmlSchemaSetParserStructuredErrors") # introduced in libxml 2.6.23 have_func("rb_gc_location") # introduced in Ruby 2.7 +have_func("rb_category_warning") # introduced in Ruby 3.0 have_func("vasprintf") diff --git a/ext/nokogiri/nokogiri.h b/ext/nokogiri/nokogiri.h index 76394f32e25..51ddbf8d1eb 100644 --- a/ext/nokogiri/nokogiri.h +++ b/ext/nokogiri/nokogiri.h @@ -202,6 +202,12 @@ NOKOPUBFUN VALUE Nokogiri_wrap_xml_document(VALUE klass, #define DISCARD_CONST_QUAL(t, v) ((t)(uintptr_t)(v)) #define DISCARD_CONST_QUAL_XMLCHAR(v) DISCARD_CONST_QUAL(xmlChar *, v) +#if HAVE_RB_CATEGORY_WARNING +# define NOKO_WARN_DEPRECATION(message) rb_category_warning(RB_WARN_CATEGORY_DEPRECATED, message) +#else +# define NOKO_WARN_DEPRECATION(message) rb_warning(message) +#endif + void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state); void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state, void *user_data, xmlStructuredErrorFunc handler); diff --git a/ext/nokogiri/xml_node.c b/ext/nokogiri/xml_node.c index 2507f577f12..c80d2d4247c 100644 --- a/ext/nokogiri/xml_node.c +++ b/ext/nokogiri/xml_node.c @@ -1806,7 +1806,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass) } if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) { // TODO: deprecate allowing Node - rb_warn("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri."); + NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri."); } Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node); From 80e888c4034756d25c6388fcfa03c9606576dd85 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Thu, 21 Jul 2022 12:28:21 -0400 Subject: [PATCH 6/6] deprecate: Reader#attribute_nodes This method will be removed in a future version --- ext/java/nokogiri/XmlReader.java | 1 + ext/nokogiri/xml_reader.c | 10 +++++++++- lib/nokogiri/xml/reader.rb | 7 +++++-- test/xml/test_reader.rb | 25 ++++++++++++++++--------- 4 files changed, 31 insertions(+), 12 deletions(-) diff --git a/ext/java/nokogiri/XmlReader.java b/ext/java/nokogiri/XmlReader.java index 615eb85d3e3..dff65b4a1eb 100644 --- a/ext/java/nokogiri/XmlReader.java +++ b/ext/java/nokogiri/XmlReader.java @@ -141,6 +141,7 @@ public class XmlReader extends RubyObject public IRubyObject attribute_nodes(ThreadContext context) { + context.runtime.getWarnings().warn("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead."); return currentNode().getAttributesNodes(); } diff --git a/ext/nokogiri/xml_reader.c b/ext/nokogiri/xml_reader.c index 81f4bdc2755..022f8ff5310 100644 --- a/ext/nokogiri/xml_reader.c +++ b/ext/nokogiri/xml_reader.c @@ -151,7 +151,11 @@ namespaces(VALUE self) /* :call-seq: attribute_nodes() → Array - Get the attributes of the current node as an Array of Attr + Get the attributes of the current node as an Array of XML:Attr + + ⚠ This method is deprecated and unsafe to use. It will be removed in a future version of Nokogiri. + + See related: #attribute_hash, #attributes */ static VALUE rb_xml_reader_attribute_nodes(VALUE rb_reader) @@ -161,6 +165,10 @@ rb_xml_reader_attribute_nodes(VALUE rb_reader) VALUE attr_nodes; int j; + // TODO: deprecated, remove in Nokogiri v1.15, see https://github.com/sparklemotion/nokogiri/issues/2598 + // After removal, we can also remove all the "node_has_a_document" special handling from xml_node.c + NOKO_WARN_DEPRECATION("Reader#attribute_nodes is deprecated and will be removed in a future version of Nokogiri. Please use Reader#attribute_hash instead."); + Data_Get_Struct(rb_reader, xmlTextReader, c_reader); if (! has_attributes(c_reader)) { diff --git a/lib/nokogiri/xml/reader.rb b/lib/nokogiri/xml/reader.rb index 1a007963598..5f637cf9535 100644 --- a/lib/nokogiri/xml/reader.rb +++ b/lib/nokogiri/xml/reader.rb @@ -83,9 +83,12 @@ def initialize(source, url = nil, encoding = nil) # :nodoc: end private :initialize - # Get the attributes of the current node as a Hash + # Get the attributes and namespaces of the current node as a Hash. # - # [Returns] (Hash) Attribute names and values + # This is the union of Reader#attribute_hash and Reader#namespaces + # + # [Returns] + # (Hash) Attribute names and values, and namespace prefixes and hrefs. def attributes attribute_hash.merge(namespaces) end diff --git a/test/xml/test_reader.rb b/test/xml/test_reader.rb index 5b7e888203a..ab119656c7a 100644 --- a/test/xml/test_reader.rb +++ b/test/xml/test_reader.rb @@ -472,7 +472,9 @@ def test_reader_node_attributes_keep_a_reference_to_the_reader reader = Nokogiri::XML::Reader.from_memory(xml) reader.each do |element| - attribute_nodes += element.attribute_nodes + assert_output(nil, /Reader#attribute_nodes is deprecated/) do + attribute_nodes += element.attribute_nodes + end end end @@ -489,7 +491,9 @@ def test_namespaced_attributes eoxml attr_ns = [] while reader.read - if reader.node_type == Nokogiri::XML::Node::ELEMENT_NODE + next unless reader.node_type == Nokogiri::XML::Node::ELEMENT_NODE + + assert_output(nil, /Reader#attribute_nodes is deprecated/) do reader.attribute_nodes.each { |attr| attr_ns << (attr.namespace.nil? ? nil : attr.namespace.prefix) } end end @@ -593,14 +597,17 @@ def test_read_from_memory end def test_large_document_smoke_test - # simply run on a large document to verify that there no GC issues - xml = [] - xml << "" - 10000.times { |j| xml << "" } - xml << "" - xml = xml.join("\n") + skip_unless_libxml2("valgrind tests should only run with libxml2") - Nokogiri::XML::Reader.from_memory(xml).each(&:attributes) + refute_valgrind_errors do + xml = [] + xml << "" + 10000.times { |j| xml << "" } + xml << "" + xml = xml.join("\n") + + Nokogiri::XML::Reader.from_memory(xml).each(&:attributes) + end end def test_correct_outer_xml_inclusion