Skip to content

Commit

Permalink
refactor: move namespaces into the XPathVisitor
Browse files Browse the repository at this point in the history
Previously this was passed into CSS::Parser.new and used during the
parsing phase. Now it's passed into the visitor and we do namespace
munging there, where it should be.

Note the namespaces has also been moved out of the top-level parser
cache key into XPathVisitor#config.

Finally, note that namespaces is nil by default (instead of being an
empty Hash) which should prevent some unnecessary object creation in
the most common use cases.
  • Loading branch information
flavorjones committed Jun 11, 2024
1 parent a84ad7f commit 179d74d
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 37 deletions.
18 changes: 11 additions & 7 deletions lib/nokogiri/css.rb
Original file line number Diff line number Diff line change
Expand Up @@ -83,16 +83,20 @@ def xpath_for(
selector = selector.to_str
raise(Nokogiri::CSS::SyntaxError, "empty CSS selector") if selector.empty?

raise ArgumentError, "cannot provide both :prefix and :visitor" if prefix && visitor
if visitor
raise ArgumentError, "cannot provide both :prefix and :visitor" if prefix
raise ArgumentError, "cannot provide both :ns and :visitor" if ns
end

visitor ||= begin
visitor_kw = {}
visitor_kw[:prefix] = prefix if prefix
visitor_kw[:namespaces] = ns if ns

ns ||= {}
visitor ||= if prefix
Nokogiri::CSS::XPathVisitor.new(prefix: prefix)
else
Nokogiri::CSS::XPathVisitor.new
Nokogiri::CSS::XPathVisitor.new(**visitor_kw)
end

Parser.new(ns).xpath_for(selector, visitor)
Parser.new.xpath_for(selector, visitor)
end
end
end
Expand Down
6 changes: 3 additions & 3 deletions lib/nokogiri/css/parser.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# frozen_string_literal: true
#
# DO NOT MODIFY!!!!
# This file is automatically generated by Racc 1.7.3
# This file is automatically generated by Racc 1.8.0
# from Racc grammar file "parser.y".
#

Expand Down Expand Up @@ -470,12 +470,12 @@ def _reduce_23(val, _values, result)
end

def _reduce_24(val, _values, result)
result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')])
result = Node.new(:ELEMENT_NAME, [val[0], val[2]])
result
end

def _reduce_25(val, _values, result)
name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0]
name = val[0]
result = Node.new(:ELEMENT_NAME, [name])

result
Expand Down
4 changes: 2 additions & 2 deletions lib/nokogiri/css/parser.y
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,9 @@ rule
;

namespaced_ident:
namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')]) }
namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [val[0], val[2]]) }
| IDENT {
name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0]
name = val[0]
result = Node.new(:ELEMENT_NAME, [name])
}
;
Expand Down
8 changes: 3 additions & 5 deletions lib/nokogiri/css/parser_extras.rb
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,9 @@ def without_cache(&block)
end
end

# Create a new CSS parser with respect to +namespaces+
def initialize(namespaces = {})
def initialize
@tokenizer = Tokenizer.new
@namespaces = namespaces
super()
super
end

def parse(string)
Expand Down Expand Up @@ -88,7 +86,7 @@ def on_error(error_token_id, error_value, value_stack)

def cache_key(query, visitor)
if self.class.cache_on?
[query, @namespaces, visitor.config]
[query, visitor.config]
end
end
end
Expand Down
25 changes: 19 additions & 6 deletions lib/nokogiri/css/xpath_visitor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ module DoctypeConfig
# The visitor configuration set via the +prefix:+ keyword argument to XPathVisitor.new.
attr_reader :prefix

# The visitor configuration set via the +namespaces:+ keyword argument to XPathVisitor.new.
attr_reader :namespaces

# :call-seq:
# new() → XPathVisitor
# new(builtins:, doctype:) → XPathVisitor
Expand All @@ -66,7 +69,8 @@ module DoctypeConfig
def initialize(
builtins: BuiltinsConfig::NEVER,
doctype: DoctypeConfig::XML,
prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX
prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
namespaces: nil
)
unless BuiltinsConfig::VALUES.include?(builtins)
raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
Expand All @@ -78,6 +82,7 @@ def initialize(
@builtins = builtins
@doctype = doctype
@prefix = prefix
@namespaces = namespaces
end

# :call-seq: config() → Hash
Expand All @@ -86,7 +91,7 @@ def initialize(
# a Hash representing the configuration of the XPathVisitor, suitable for use as
# part of the CSS cache key.
def config
{ builtins: @builtins, doctype: @doctype, prefix: @prefix }
{ builtins: @builtins, doctype: @doctype, prefix: @prefix, namespaces: @namespaces }
end

# :stopdoc:
Expand Down Expand Up @@ -272,6 +277,14 @@ def visit_element_name(node)
else
"*[local-name()='#{node.value.first}']"
end
elsif node.value.length == 2 # has a namespace prefix
if node.value.first.nil? # namespace prefix is empty
node.value.last
else
node.value.join(":")
end
elsif @namespaces&.key?("xmlns") # apply the default namespace if it's declared
"xmlns:#{node.value.first}"
else
node.value.first
end
Expand All @@ -294,10 +307,10 @@ def validate_xpath_function_name(name)
end

def html5_element_name_needs_namespace_handling(node)
# if this is the wildcard selector "*", use it as normal
node.value.first != "*" &&
# if there is already a namespace (i.e., it is a prefixed QName), use it as normal
!node.value.first.include?(":")
# if there is already a namespace (i.e., it is a prefixed QName), use it as normal
node.value.length == 1 &&
# if this is the wildcard selector "*", use it as normal
node.value.first != "*"
end

def nth(node, options = {})
Expand Down
3 changes: 2 additions & 1 deletion lib/nokogiri/xml/searchable.rb
Original file line number Diff line number Diff line change
Expand Up @@ -248,8 +248,9 @@ def xpath_query_from_css_rule(rule, ns)
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
doctype: document.xpath_doctype,
prefix: implied_xpath_context,
namespaces: ns,
)
CSS.xpath_for(rule.to_s, ns: ns, visitor: visitor)
CSS.xpath_for(rule.to_s, visitor: visitor)
end.join(" | ")
end

Expand Down
4 changes: 2 additions & 2 deletions test/css/test_parser_cache.rb
Original file line number Diff line number Diff line change
Expand Up @@ -134,19 +134,19 @@ def test_cache_key_on_ns_prefix_and_visitor_config
Nokogiri::CSS.xpath_for("foo", prefix: ".//", ns: { "example" => "http://example.com/" })
Nokogiri::CSS.xpath_for(
"foo",
ns: { "example" => "http://example.com/" },
visitor: Nokogiri::CSS::XPathVisitor.new(
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::ALWAYS,
prefix: ".//",
namespaces: { "example" => "http://example.com/" },
),
)
Nokogiri::CSS.xpath_for(
"foo",
ns: { "example" => "http://example.com/" },
visitor: Nokogiri::CSS::XPathVisitor.new(
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::ALWAYS,
doctype: Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML5,
prefix: ".//",
namespaces: { "example" => "http://example.com/" },
),
)
assert_equal(5, cache.length)
Expand Down
40 changes: 29 additions & 11 deletions test/css/test_xpath_visitor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,6 @@

describe Nokogiri::CSS::XPathVisitor do
let(:parser) { Nokogiri::CSS::Parser.new }

let(:parser_with_ns) do
Nokogiri::CSS::Parser.new({
"xmlns" => "http://default.example.com/",
"hoge" => "http://hoge.example.com/",
})
end

let(:visitor) { Nokogiri::CSS::XPathVisitor.new }

def assert_xpath(expecteds, asts)
Expand Down Expand Up @@ -50,15 +42,23 @@ def assert_xpath(expecteds, asts)
Nokogiri::CSS::XPathVisitor.new(doctype: Nokogiri::CSS::XPathVisitor::DoctypeConfig::HTML5).doctype,
)
assert_raises(ArgumentError) { Nokogiri::CSS::XPathVisitor.new(doctype: :not_valid) }

assert_equal({ "foo": "bar" }, Nokogiri::CSS::XPathVisitor.new(namespaces: { "foo": "bar" }).namespaces)

assert_equal("xxx", Nokogiri::CSS::XPathVisitor.new(prefix: "xxx").prefix)
end

it "exposes its configuration" do
expected = {
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::NEVER,
doctype: Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML,
prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
namespaces: nil,
}
assert_equal(expected, visitor.config)

assert_nil(visitor.namespaces)
assert_equal(Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX, visitor.prefix)
end

it "raises an exception on single quote" do
Expand Down Expand Up @@ -137,9 +137,25 @@ def assert_xpath(expecteds, asts)
assert_xpath("//a[@href]", parser.parse("a[|href]"))
assert_xpath("//*[@flavorjones:href]", parser.parse("*[flavorjones|href]"))

## Default namespace is not applied to attributes, so this is @class, not @xmlns:class.
assert_xpath("//xmlns:a[@class='bar']", parser_with_ns.parse("a[class='bar']"))
assert_xpath("//xmlns:a[@hoge:class='bar']", parser_with_ns.parse("a[hoge|class='bar']"))
ns = {
"xmlns" => "http://default.example.com/",
"hoge" => "http://hoge.example.com/",
}

# An intentionally-empty namespace means "don't use the default xmlns"
assert_equal(["//a"], Nokogiri::CSS.xpath_for("|a", ns: ns))

# The default namespace is not applied to attributes (just elements)
assert_equal(
["//xmlns:a[@class='bar']"],
Nokogiri::CSS.xpath_for("a[class='bar']", ns: ns),
)

# We can explicitly apply a namespace to an attribue
assert_equal(
["//xmlns:a[@hoge:class='bar']"],
Nokogiri::CSS.xpath_for("a[hoge|class='bar']", ns: ns),
)
end

it "rhs with quotes" do
Expand Down Expand Up @@ -543,6 +559,7 @@ def visit_pseudo_class_aaron(node)
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::ALWAYS,
doctype: Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML,
prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
namespaces: nil,
}
assert_equal(expected, visitor.config)
end
Expand Down Expand Up @@ -606,6 +623,7 @@ def visit_pseudo_class_aaron(node)
builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
doctype: Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML,
prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
namespaces: nil,
}
assert_equal(expected, visitor.config)
end
Expand Down

0 comments on commit 179d74d

Please sign in to comment.