From 106647545b8be70c1c5bb6c5656af17c0b0b2e4f Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 11 Jan 2022 11:27:11 -0500 Subject: [PATCH 1/8] test: wrap CSS integration tests in a describe so it's easier to do focused test runs --- test/css/test_css_integration.rb | 614 ++++++++++++++++--------------- 1 file changed, 308 insertions(+), 306 deletions(-) diff --git a/test/css/test_css_integration.rb b/test/css/test_css_integration.rb index c7f2f26e312..292cfe69a43 100644 --- a/test/css/test_css_integration.rb +++ b/test/css/test_css_integration.rb @@ -3,363 +3,365 @@ require "helper" class TestNokogiriCssIntegration < Nokogiri::TestCase - let(:subject) do - subject_class.parse(<<~HTML) - - - - - - - - - - - - - - - - -
row1
row2
row3
row4
row5
row6
row7
row8
row9
row10
row11
row12
row13
row14
-
- bold1 - italic1 - bold2 - emphasis1 - italic2 -

para1

- bold3 -
-
- italic3 - emphasis2 - italic4 - emphasis3 - italic5 - italic6 - italic7 -
-
-

para2

-

para3

-
-
-

para4

-
- -
-

-

header1

-

-
-
-

header2

-

header3

-
-
-

header4

-
- -

-

- - HTML - end + describe "CSS integration tests" do + let(:subject) do + subject_class.parse(<<~HTML) + + + + + + + + + + + + + + + + +
row1
row2
row3
row4
row5
row6
row7
row8
row9
row10
row11
row12
row13
row14
+
+ bold1 + italic1 + bold2 + emphasis1 + italic2 +

para1

+ bold3 +
+
+ italic3 + emphasis2 + italic4 + emphasis3 + italic5 + italic6 + italic7 +
+
+

para2

+

para3

+
+
+

para4

+
+ +
+

+

header1

+

+
+
+

header2

+

header3

+
+
+

header4

+
+ +

+

+ + HTML + end - let(:nested) do - subject_class.parse(<<~HTML) - -
- bold -

para

-
- -
- bold - ... - ... -

para

-
- -
-
+ let(:nested) do + subject_class.parse(<<~HTML) + +
bold

para

-
+
bold ... ...

para

-
-
- bold -
+
+
+ bold +

para

+
+ +
+ bold + ... + ... +

para

+
+
-
-

para

-
- HTML - end +
+ bold +
- def assert_result_rows(intarray, result, word = "row") - assert_equal(intarray.size, result.size, - "unexpected number of rows returned: '#{result.inner_text}'") - assert_equal(intarray.map { |j| "#{word}#{j}" }.join(" "), result.inner_text.strip, - result.inner_text) - end +
+

para

+
+ HTML + end - doctypes = [Nokogiri::XML::Document, Nokogiri::HTML4::Document] - doctypes << Nokogiri::HTML5::Document if defined?(Nokogiri::HTML5::Document) + def assert_result_rows(intarray, result, word = "row") + assert_equal(intarray.size, result.size, + "unexpected number of rows returned: '#{result.inner_text}'") + assert_equal(intarray.map { |j| "#{word}#{j}" }.join(" "), result.inner_text.strip, + result.inner_text) + end - doctypes.each do |doctype| - describe doctype do - let(:subject_class) { doctype } + doctypes = [Nokogiri::XML::Document, Nokogiri::HTML4::Document] + doctypes << Nokogiri::HTML5::Document if defined?(Nokogiri::HTML5::Document) - it "selects even" do - assert_result_rows([2, 4, 6, 8, 10, 12, 14], subject.search("table//tr:nth(even)")) - end + doctypes.each do |doctype| + describe doctype do + let(:subject_class) { doctype } - it "selects odd" do - assert_result_rows([1, 3, 5, 7, 9, 11, 13], subject.search("table//tr:nth(odd)")) - end + it "selects even" do + assert_result_rows([2, 4, 6, 8, 10, 12, 14], subject.search("table//tr:nth(even)")) + end - it "selects n" do - assert_result_rows((1..14).to_a, subject.search("table//tr:nth(n)")) - end + it "selects odd" do + assert_result_rows([1, 3, 5, 7, 9, 11, 13], subject.search("table//tr:nth(odd)")) + end - it "selects 2n" do - assert_equal(subject.search("table//tr:nth(even)").inner_text, subject.search("table//tr:nth(2n)").inner_text) - end + it "selects n" do + assert_result_rows((1..14).to_a, subject.search("table//tr:nth(n)")) + end - it "selects 2np1" do - assert_equal(subject.search("table//tr:nth(odd)").inner_text, subject.search("table//tr:nth(2n+1)").inner_text) - end + it "selects 2n" do + assert_equal(subject.search("table//tr:nth(even)").inner_text, subject.search("table//tr:nth(2n)").inner_text) + end - it "selects 4np3" do - assert_result_rows([3, 7, 11], subject.search("table//tr:nth(4n+3)")) - end + it "selects 2np1" do + assert_equal(subject.search("table//tr:nth(odd)").inner_text, subject.search("table//tr:nth(2n+1)").inner_text) + end - it "selects 3np4" do - assert_result_rows([4, 7, 10, 13], subject.search("table//tr:nth(3n+4)")) - end + it "selects 4np3" do + assert_result_rows([3, 7, 11], subject.search("table//tr:nth(4n+3)")) + end - it "selects mnp3" do - assert_result_rows([1, 2, 3], subject.search("table//tr:nth(-n+3)")) - end + it "selects 3np4" do + assert_result_rows([4, 7, 10, 13], subject.search("table//tr:nth(3n+4)")) + end - it "selects 4nm1" do - assert_result_rows([3, 7, 11], subject.search("table//tr:nth(4n-1)")) - end + it "selects mnp3" do + assert_result_rows([1, 2, 3], subject.search("table//tr:nth(-n+3)")) + end - it "selects np3" do - assert_result_rows([3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], subject.search("table//tr:nth(n+3)")) - end + it "selects 4nm1" do + assert_result_rows([3, 7, 11], subject.search("table//tr:nth(4n-1)")) + end - it "selects first" do - assert_result_rows([1], subject.search("table//tr:first")) - assert_result_rows([1], subject.search("table//tr:first()")) - end + it "selects np3" do + assert_result_rows([3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], subject.search("table//tr:nth(n+3)")) + end - it "selects last" do - assert_result_rows([14], subject.search("table//tr:last")) - assert_result_rows([14], subject.search("table//tr:last()")) - end + it "selects first" do + assert_result_rows([1], subject.search("table//tr:first")) + assert_result_rows([1], subject.search("table//tr:first()")) + end - it "selects first_child" do - assert_result_rows([1], subject.search("div/b:first-child"), "bold") - assert_result_rows([1], subject.search("table//tr:first-child")) - assert_result_rows([2, 4], subject.search("div/h1.c:first-child"), "header") - end + it "selects last" do + assert_result_rows([14], subject.search("table//tr:last")) + assert_result_rows([14], subject.search("table//tr:last()")) + end - it "selects last_child" do - assert_result_rows([3], subject.search("div/b:last-child"), "bold") - assert_result_rows([14], subject.search("table//tr:last-child")) - assert_result_rows([3, 4], subject.search("div/h1.c:last-child"), "header") - end + it "selects first_child" do + assert_result_rows([1], subject.search("div/b:first-child"), "bold") + assert_result_rows([1], subject.search("table//tr:first-child")) + assert_result_rows([2, 4], subject.search("div/h1.c:first-child"), "header") + end - it "selects nth_child" do - assert_result_rows([2], subject.search("div/b:nth-child(3)"), "bold") - assert_result_rows([5], subject.search("table//tr:nth-child(5)")) - assert_result_rows([1, 3], subject.search("div/h1.c:nth-child(2)"), "header") - assert_result_rows([3, 4], subject.search("div/i.b:nth-child(2n+1)"), "italic") - end + it "selects last_child" do + assert_result_rows([3], subject.search("div/b:last-child"), "bold") + assert_result_rows([14], subject.search("table//tr:last-child")) + assert_result_rows([3, 4], subject.search("div/h1.c:last-child"), "header") + end - it "selects first_of_type" do - assert_result_rows([1], subject.search("table//tr:first-of-type")) - assert_result_rows([1], subject.search("div/b:first-of-type"), "bold") - assert_result_rows([2], subject.search("div/b.a:first-of-type"), "bold") - assert_result_rows([3], subject.search("div/i.b:first-of-type"), "italic") - end + it "selects nth_child" do + assert_result_rows([2], subject.search("div/b:nth-child(3)"), "bold") + assert_result_rows([5], subject.search("table//tr:nth-child(5)")) + assert_result_rows([1, 3], subject.search("div/h1.c:nth-child(2)"), "header") + assert_result_rows([3, 4], subject.search("div/i.b:nth-child(2n+1)"), "italic") + end - it "selects last_of_type" do - assert_result_rows([14], subject.search("table//tr:last-of-type")) - assert_result_rows([3], subject.search("div/b:last-of-type"), "bold") - assert_result_rows([2, 7], subject.search("div/i:last-of-type"), "italic") - assert_result_rows([2, 6, 7], subject.search("div i:last-of-type"), "italic") - assert_result_rows([4], subject.search("div/i.b:last-of-type"), "italic") - end + it "selects first_of_type" do + assert_result_rows([1], subject.search("table//tr:first-of-type")) + assert_result_rows([1], subject.search("div/b:first-of-type"), "bold") + assert_result_rows([2], subject.search("div/b.a:first-of-type"), "bold") + assert_result_rows([3], subject.search("div/i.b:first-of-type"), "italic") + end - it "selects nth_of_type" do - assert_result_rows([1], subject.search("div/b:nth-of-type(1)"), "bold") - assert_result_rows([2], subject.search("div/b:nth-of-type(2)"), "bold") - assert_result_rows([2], subject.search("div/.a:nth-of-type(1)"), "bold") - assert_result_rows([2, 4, 7], subject.search("div i:nth-of-type(2n)"), "italic") - assert_result_rows([1, 3, 5, 6], subject.search("div i:nth-of-type(2n+1)"), "italic") - assert_result_rows([1], subject.search("div .a:nth-of-type(2n)"), "emphasis") - assert_result_rows([2, 3], subject.search("div .a:nth-of-type(2n+1)"), "bold") - end + it "selects last_of_type" do + assert_result_rows([14], subject.search("table//tr:last-of-type")) + assert_result_rows([3], subject.search("div/b:last-of-type"), "bold") + assert_result_rows([2, 7], subject.search("div/i:last-of-type"), "italic") + assert_result_rows([2, 6, 7], subject.search("div i:last-of-type"), "italic") + assert_result_rows([4], subject.search("div/i.b:last-of-type"), "italic") + end - it "selects nth_last_of_type" do - assert_result_rows([14], subject.search("table//tr:nth-last-of-type(1)")) - assert_result_rows([12], subject.search("table//tr:nth-last-of-type(3)")) - assert_result_rows([2, 6, 7], subject.search("div i:nth-last-of-type(1)"), "italic") - assert_result_rows([1, 5], subject.search("div i:nth-last-of-type(2)"), "italic") - assert_result_rows([4], subject.search("div/i.b:nth-last-of-type(1)"), "italic") - assert_result_rows([3], subject.search("div/i.b:nth-last-of-type(2)"), "italic") - end + it "selects nth_of_type" do + assert_result_rows([1], subject.search("div/b:nth-of-type(1)"), "bold") + assert_result_rows([2], subject.search("div/b:nth-of-type(2)"), "bold") + assert_result_rows([2], subject.search("div/.a:nth-of-type(1)"), "bold") + assert_result_rows([2, 4, 7], subject.search("div i:nth-of-type(2n)"), "italic") + assert_result_rows([1, 3, 5, 6], subject.search("div i:nth-of-type(2n+1)"), "italic") + assert_result_rows([1], subject.search("div .a:nth-of-type(2n)"), "emphasis") + assert_result_rows([2, 3], subject.search("div .a:nth-of-type(2n+1)"), "bold") + end - it "selects only_of_type" do - assert_result_rows([1, 4], subject.search("div/p:only-of-type"), "para") - assert_result_rows([5], subject.search("div/i.c:only-of-type"), "italic") - end + it "selects nth_last_of_type" do + assert_result_rows([14], subject.search("table//tr:nth-last-of-type(1)")) + assert_result_rows([12], subject.search("table//tr:nth-last-of-type(3)")) + assert_result_rows([2, 6, 7], subject.search("div i:nth-last-of-type(1)"), "italic") + assert_result_rows([1, 5], subject.search("div i:nth-last-of-type(2)"), "italic") + assert_result_rows([4], subject.search("div/i.b:nth-last-of-type(1)"), "italic") + assert_result_rows([3], subject.search("div/i.b:nth-last-of-type(2)"), "italic") + end - it "selects only_child" do - assert_result_rows([4], subject.search("div/p:only-child"), "para") - assert_result_rows([4], subject.search("div/h1.c:only-child"), "header") - end + it "selects only_of_type" do + assert_result_rows([1, 4], subject.search("div/p:only-of-type"), "para") + assert_result_rows([5], subject.search("div/i.c:only-of-type"), "italic") + end - it "selects empty" do - result = subject.search("p:empty") - assert_equal(1, result.size, "unexpected number of rows returned: '#{result.inner_text}'") - assert_equal("empty", result.first["class"]) - end + it "selects only_child" do + assert_result_rows([4], subject.search("div/p:only-child"), "para") + assert_result_rows([4], subject.search("div/h1.c:only-child"), "header") + end - it "selects parent" do - result = subject.search("p:parent") - assert_equal(5, result.size) - 0.upto(3) do |j| - assert_equal("para#{j + 1} ", result[j].inner_text) + it "selects empty" do + result = subject.search("p:empty") + assert_equal(1, result.size, "unexpected number of rows returned: '#{result.inner_text}'") + assert_equal("empty", result.first["class"]) end - assert_equal("not-empty", result[4]["class"]) - end - it "selects siblings" do - html = <<~HTML -
-

p1

-

p2

-

p3

-

p4

-

p5

- HTML - doc = subject_class.parse(html) - assert_equal(2, doc.search("#3 ~ p").size) - assert_equal("p4 p5 ", doc.search("#3 ~ p").inner_text) - assert_equal(0, doc.search("#5 ~ p").size) - - assert_equal(1, doc.search("#3 + p").size) - assert_equal("p4 ", doc.search("#3 + p").inner_text) - assert_equal(0, doc.search("#5 + p").size) - end + it "selects parent" do + result = subject.search("p:parent") + assert_equal(5, result.size) + 0.upto(3) do |j| + assert_equal("para#{j + 1} ", result[j].inner_text) + end + assert_equal("not-empty", result[4]["class"]) + end - it "selects has_a" do - result = nested.css("div:has(b)") - expected = [ - nested.at_css(".unnested.direct"), - nested.at_css(".unnested.indirect"), - nested.at_css(".nested-parent"), - nested.at_css(".nested-child.direct"), - nested.at_css(".nested-child.indirect"), - nested.at_css(".has-bold"), - ] - assert_equal(expected, result.to_a) - end + it "selects siblings" do + html = <<~HTML +
+

p1

+

p2

+

p3

+

p4

+

p5

+ HTML + doc = subject_class.parse(html) + assert_equal(2, doc.search("#3 ~ p").size) + assert_equal("p4 p5 ", doc.search("#3 ~ p").inner_text) + assert_equal(0, doc.search("#5 ~ p").size) + + assert_equal(1, doc.search("#3 + p").size) + assert_equal("p4 ", doc.search("#3 + p").inner_text) + assert_equal(0, doc.search("#5 + p").size) + end - it "selects has_a_gt_b" do - result = nested.css("body *:has(div > b)") - expected = [ - nested.at_css(".nested-parent"), - ] - assert_equal(expected, result.to_a) - end + it "selects has_a" do + result = nested.css("div:has(b)") + expected = [ + nested.at_css(".unnested.direct"), + nested.at_css(".unnested.indirect"), + nested.at_css(".nested-parent"), + nested.at_css(".nested-child.direct"), + nested.at_css(".nested-child.indirect"), + nested.at_css(".has-bold"), + ] + assert_equal(expected, result.to_a) + end - it "selects has_gt_b" do - result = nested.css("body *:has(> b)") - expected = [ - nested.at_css(".unnested.direct"), - nested.at_css(".unnested.indirect"), - nested.at_css(".nested-child.direct"), - nested.at_css(".nested-child.indirect"), - nested.at_css(".has-bold"), - ] - assert_equal(expected, result.to_a) - end + it "selects has_a_gt_b" do + result = nested.css("body *:has(div > b)") + expected = [ + nested.at_css(".nested-parent"), + ] + assert_equal(expected, result.to_a) + end - it "selects has_a_plus_b" do - result = nested.css("div:has(b + p)") - expected = [ - nested.at_css(".unnested.direct"), - nested.at_css(".nested-parent"), - nested.at_css(".nested-child.direct"), - ] - assert_equal(expected, result.to_a) - end + it "selects has_gt_b" do + result = nested.css("body *:has(> b)") + expected = [ + nested.at_css(".unnested.direct"), + nested.at_css(".unnested.indirect"), + nested.at_css(".nested-child.direct"), + nested.at_css(".nested-child.indirect"), + nested.at_css(".has-bold"), + ] + assert_equal(expected, result.to_a) + end - it "selects has_plus_b" do - result = nested.css("b:has(+ p)") - expected = [ - nested.at_css(".unnested.direct b"), - nested.at_css(".nested-child.direct b"), - ] - assert_equal(expected, result.to_a) - end + it "selects has_a_plus_b" do + result = nested.css("div:has(b + p)") + expected = [ + nested.at_css(".unnested.direct"), + nested.at_css(".nested-parent"), + nested.at_css(".nested-child.direct"), + ] + assert_equal(expected, result.to_a) + end - it "selects has_a_tilde_b" do - result = nested.css("div:has(b ~ p)") - expected = [ - nested.at_css(".unnested.direct"), - nested.at_css(".unnested.indirect"), - nested.at_css(".nested-parent"), - nested.at_css(".nested-child.direct"), - nested.at_css(".nested-child.indirect"), - ] - assert_equal(expected, result.to_a) - end + it "selects has_plus_b" do + result = nested.css("b:has(+ p)") + expected = [ + nested.at_css(".unnested.direct b"), + nested.at_css(".nested-child.direct b"), + ] + assert_equal(expected, result.to_a) + end - it "selects has_tilde_b" do - result = nested.css("b:has(~ p)") - expected = [ - nested.at_css(".unnested.direct b"), - nested.at_css(".unnested.indirect b"), - nested.at_css(".nested-child.direct b"), - nested.at_css(".nested-child.indirect b"), - ].flatten - assert_equal(expected, result.to_a) - end + it "selects has_a_tilde_b" do + result = nested.css("div:has(b ~ p)") + expected = [ + nested.at_css(".unnested.direct"), + nested.at_css(".unnested.indirect"), + nested.at_css(".nested-parent"), + nested.at_css(".nested-child.direct"), + nested.at_css(".nested-child.indirect"), + ] + assert_equal(expected, result.to_a) + end - it "selects class_attr_selector" do - doc = subject_class.parse(<<~HTML) - -
space-delimited
-
tab-delimited
-
newline-delimited
-
carriage-return-delimited
- - HTML + it "selects has_tilde_b" do + result = nested.css("b:has(~ p)") + expected = [ + nested.at_css(".unnested.direct b"), + nested.at_css(".unnested.indirect b"), + nested.at_css(".nested-child.direct b"), + nested.at_css(".nested-child.indirect b"), + ].flatten + assert_equal(expected, result.to_a) + end - result = doc.css("div[class~='asdf']") - assert_equal(4, result.length) + it "selects class_attr_selector" do + doc = subject_class.parse(<<~HTML) + +
space-delimited
+
tab-delimited
+
newline-delimited
+
carriage-return-delimited
+ + HTML - result = doc.css("div[@class~='asdf']") - assert_equal(4, result.length) + result = doc.css("div[class~='asdf']") + assert_equal(4, result.length) - expected = doc.css("div") - assert_equal(expected, result) + result = doc.css("div[@class~='asdf']") + assert_equal(4, result.length) + + expected = doc.css("div") + assert_equal(expected, result) + end end end end From 461811a8e095b1f2b8059f61deead68d88b0a744 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Tue, 11 Jan 2022 21:06:17 -0500 Subject: [PATCH 2/8] style: reformat parser.y --- lib/nokogiri/css/parser.rb | 193 +++++++------- lib/nokogiri/css/parser.y | 506 +++++++++++++++++++------------------ 2 files changed, 356 insertions(+), 343 deletions(-) diff --git a/lib/nokogiri/css/parser.rb b/lib/nokogiri/css/parser.rb index 5cdc80d44ba..5d67e4f16ab 100644 --- a/lib/nokogiri/css/parser.rb +++ b/lib/nokogiri/css/parser.rb @@ -1,4 +1,3 @@ -# frozen_string_literal: true # # DO NOT MODIFY!!!! # This file is automatically generated by Racc 1.6.0 @@ -341,7 +340,7 @@ def unescape_css_string(str) # reduce 0 omitted def _reduce_1(val, _values, result) - result = [val.first, val.last].flatten + result = [val[0], val[2]].flatten result end @@ -352,7 +351,7 @@ def _reduce_2(val, _values, result) end def _reduce_3(val, _values, result) - result = [val.last].flatten + result = [val[1]].flatten result end @@ -382,11 +381,11 @@ def _reduce_8(val, _values, result) end def _reduce_9(val, _values, result) - result = if val[1].nil? - val.first - else - Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]]) - end + result = if val[1].nil? + val[0] + else + Node.new(:CONDITIONAL_SELECTOR, [val[0], val[1]]) + end result end @@ -394,39 +393,39 @@ def _reduce_9(val, _values, result) # reduce 10 omitted def _reduce_11(val, _values, result) - result = Node.new(:CONDITIONAL_SELECTOR, val) + result = Node.new(:CONDITIONAL_SELECTOR, val) result end def _reduce_12(val, _values, result) - result = Node.new(:CONDITIONAL_SELECTOR, val) + result = Node.new(:CONDITIONAL_SELECTOR, val) result end def _reduce_13(val, _values, result) - result = Node.new(:CONDITIONAL_SELECTOR, - [Node.new(:ELEMENT_NAME, ['*']), val.first] - ) + result = Node.new(:CONDITIONAL_SELECTOR, + [Node.new(:ELEMENT_NAME, ['*']), val[0]] + ) result end def _reduce_14(val, _values, result) - result = Node.new(val.first, [nil, val.last]) + result = Node.new(val[0], [nil, val[1]]) result end def _reduce_15(val, _values, result) - result = Node.new(val[1], [val.first, val.last]) + result = Node.new(val[1], [val[0], val[2]]) result end def _reduce_16(val, _values, result) - result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last]) + result = Node.new(:DESCENDANT_SELECTOR, [val[0], val[2]]) result end @@ -446,16 +445,16 @@ def _reduce_20(val, _values, result) end def _reduce_21(val, _values, result) - result = Node.new(:ELEMENT_NAME, - [[val.first, val.last].compact.join(':')] - ) + result = Node.new(:ELEMENT_NAME, + [[val[0], val[2]].compact.join(':')] + ) result end def _reduce_22(val, _values, result) - name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first - result = Node.new(:ELEMENT_NAME, [name]) + name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0] + result = Node.new(:ELEMENT_NAME, [name]) result end @@ -468,88 +467,88 @@ def _reduce_23(val, _values, result) # reduce 24 omitted def _reduce_25(val, _values, result) - result = Node.new(:ATTRIBUTE_CONDITION, - [val[1]] + (val[2] || []) - ) + result = Node.new(:ATTRIBUTE_CONDITION, + [val[1]] + (val[2] || []) + ) result end def _reduce_26(val, _values, result) - result = Node.new(:ATTRIBUTE_CONDITION, - [val[1]] + (val[2] || []) - ) + result = Node.new(:ATTRIBUTE_CONDITION, + [val[1]] + (val[2] || []) + ) result end def _reduce_27(val, _values, result) - # non-standard, from hpricot - result = Node.new(:PSEUDO_CLASS, - [Node.new(:FUNCTION, ['nth-child(', val[1]])] - ) + # non-standard, from hpricot + result = Node.new(:PSEUDO_CLASS, + [Node.new(:FUNCTION, ['nth-child(', val[1]])] + ) result end def _reduce_28(val, _values, result) - result = Node.new(:ATTRIB_NAME, - [[val.first, val.last].compact.join(':')] - ) + result = Node.new(:ATTRIB_NAME, + [[val[0], val[2]].compact.join(':')] + ) result end def _reduce_29(val, _values, result) - # Default namespace is not applied to attributes. - # So we don't add prefix "xmlns:" as in namespaced_ident. - result = Node.new(:ATTRIB_NAME, [val.first]) + # Default namespace is not applied to attributes. + # So we don't add prefix "xmlns:" as in namespaced_ident. + result = Node.new(:ATTRIB_NAME, [val[0]]) result end def _reduce_30(val, _values, result) - result = Node.new(:FUNCTION, [val.first.strip]) + result = Node.new(:FUNCTION, [val[0].strip]) result end def _reduce_31(val, _values, result) - result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten) + result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) result end def _reduce_32(val, _values, result) - result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten) + result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) result end def _reduce_33(val, _values, result) - result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten) + result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) result end def _reduce_34(val, _values, result) - result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten) + result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) result end def _reduce_35(val, _values, result) - result = [val.first, val.last] + result = [val[0], val[2]] result end def _reduce_36(val, _values, result) - result = [val.first, val.last] + result = [val[0], val[2]] result end def _reduce_37(val, _values, result) - result = [val.first, val.last] + result = [val[0], val[2]] result end @@ -558,72 +557,68 @@ def _reduce_37(val, _values, result) # reduce 39 omitted def _reduce_40(val, _values, result) - case val[0] - when 'even' - result = Node.new(:NTH, ['2','n','+','0']) - when 'odd' - result = Node.new(:NTH, ['2','n','+','1']) - when 'n' - result = Node.new(:NTH, ['1','n','+','0']) - else - # non-standard to support custom functions: - # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)')) - # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)')) - # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)')) - result = val - end + case val[0] + when 'even' + result = Node.new(:NTH, ['2','n','+','0']) + when 'odd' + result = Node.new(:NTH, ['2','n','+','1']) + when 'n' + result = Node.new(:NTH, ['1','n','+','0']) + else + result = val + end result end def _reduce_41(val, _values, result) - if val[1] == 'n' - result = Node.new(:NTH, val) - else - raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" - end + if val[1] == 'n' + result = Node.new(:NTH, val) + else + raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" + end result end def _reduce_42(val, _values, result) # n+3, -n+3 - if val[0] == 'n' - val.unshift("1") - result = Node.new(:NTH, val) - elsif val[0] == '-n' - val[0] = 'n' - val.unshift("-1") - result = Node.new(:NTH, val) - else - raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" - end + if val[0] == 'n' + val.unshift("1") + result = Node.new(:NTH, val) + elsif val[0] == '-n' + val[0] = 'n' + val.unshift("-1") + result = Node.new(:NTH, val) + else + raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" + end result end def _reduce_43(val, _values, result) # 5n, -5n, 10n-1 - n = val[1] - if n[0, 2] == 'n-' - val[1] = 'n' - val << "-" - # b is contained in n as n is the string "n-b" - val << n[2, n.size] - result = Node.new(:NTH, val) - elsif n == 'n' - val << "+" - val << "0" - result = Node.new(:NTH, val) - else - raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" - end + n = val[1] + if n[0, 2] == 'n-' + val[1] = 'n' + val << "-" + # b is contained in n as n is the string "n-b" + val << n[2, n.size] + result = Node.new(:NTH, val) + elsif n == 'n' + val << "+" + val << "0" + result = Node.new(:NTH, val) + else + raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" + end result end def _reduce_44(val, _values, result) - result = Node.new(:PSEUDO_CLASS, [val[1]]) + result = Node.new(:PSEUDO_CLASS, [val[1]]) result end @@ -638,31 +633,31 @@ def _reduce_45(val, _values, result) # reduce 47 omitted def _reduce_48(val, _values, result) - result = Node.new(:COMBINATOR, val) + result = Node.new(:COMBINATOR, val) result end def _reduce_49(val, _values, result) - result = Node.new(:COMBINATOR, val) + result = Node.new(:COMBINATOR, val) result end def _reduce_50(val, _values, result) - result = Node.new(:COMBINATOR, val) + result = Node.new(:COMBINATOR, val) result end def _reduce_51(val, _values, result) - result = Node.new(:COMBINATOR, val) + result = Node.new(:COMBINATOR, val) result end def _reduce_52(val, _values, result) - result = Node.new(:COMBINATOR, val) + result = Node.new(:COMBINATOR, val) result end @@ -678,22 +673,22 @@ def _reduce_52(val, _values, result) # reduce 57 omitted def _reduce_58(val, _values, result) - result = Node.new(:ID, [unescape_css_identifier(val.first)]) + result = Node.new(:ID, [unescape_css_identifier(val[0])]) result end def _reduce_59(val, _values, result) - result = [val.first, unescape_css_identifier(val[1])] + result = [val[0], unescape_css_identifier(val[1])] result end def _reduce_60(val, _values, result) - result = [val.first, unescape_css_string(val[1])] + result = [val[0], unescape_css_string(val[1])] result end def _reduce_61(val, _values, result) - result = [val.first, val[1]] + result = [val[0], val[1]] result end @@ -735,7 +730,7 @@ def _reduce_69(val, _values, result) end def _reduce_70(val, _values, result) - result = Node.new(:NOT, [val[1]]) + result = Node.new(:NOT, [val[1]]) result end diff --git a/lib/nokogiri/css/parser.y b/lib/nokogiri/css/parser.y index d5c4af1f6c1..6f1733ffe9d 100644 --- a/lib/nokogiri/css/parser.y +++ b/lib/nokogiri/css/parser.y @@ -5,250 +5,268 @@ token COMMA NUMBER PREFIXMATCH SUFFIXMATCH SUBSTRINGMATCH TILDE NOT_EQUAL token SLASH DOUBLESLASH NOT EQUAL RPAREN LSQUARE RSQUARE HAS rule - selector - : selector COMMA simple_selector_1toN { - result = [val.first, val.last].flatten - } - | prefixless_combinator_selector { result = val.flatten } - | optional_S simple_selector_1toN { result = [val.last].flatten } - ; - combinator - : PLUS { result = :DIRECT_ADJACENT_SELECTOR } - | GREATER { result = :CHILD_SELECTOR } - | TILDE { result = :FOLLOWING_SELECTOR } - | DOUBLESLASH { result = :DESCENDANT_SELECTOR } - | SLASH { result = :CHILD_SELECTOR } - ; - simple_selector - : element_name hcap_0toN { - result = if val[1].nil? - val.first - else - Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]]) - end - } - | function - | function pseudo { - result = Node.new(:CONDITIONAL_SELECTOR, val) - } - | function attrib { - result = Node.new(:CONDITIONAL_SELECTOR, val) - } - | hcap_1toN { - result = Node.new(:CONDITIONAL_SELECTOR, - [Node.new(:ELEMENT_NAME, ['*']), val.first] - ) - } - ; - prefixless_combinator_selector - : combinator simple_selector_1toN { - result = Node.new(val.first, [nil, val.last]) - } - ; - simple_selector_1toN - : simple_selector combinator simple_selector_1toN { - result = Node.new(val[1], [val.first, val.last]) - } - | simple_selector S simple_selector_1toN { - result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last]) - } - | simple_selector - ; - class - : '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) } - ; - element_name - : namespaced_ident - | '*' { result = Node.new(:ELEMENT_NAME, val) } - ; - namespaced_ident - : namespace '|' IDENT { - result = Node.new(:ELEMENT_NAME, - [[val.first, val.last].compact.join(':')] - ) - } - | IDENT { - name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first - result = Node.new(:ELEMENT_NAME, [name]) - } - ; - namespace - : IDENT { result = val[0] } - | - ; - attrib - : LSQUARE attrib_name attrib_val_0or1 RSQUARE { - result = Node.new(:ATTRIBUTE_CONDITION, - [val[1]] + (val[2] || []) - ) - } - | LSQUARE function attrib_val_0or1 RSQUARE { - result = Node.new(:ATTRIBUTE_CONDITION, - [val[1]] + (val[2] || []) - ) - } - | LSQUARE NUMBER RSQUARE { - # non-standard, from hpricot - result = Node.new(:PSEUDO_CLASS, - [Node.new(:FUNCTION, ['nth-child(', val[1]])] - ) - } - ; - attrib_name - : namespace '|' IDENT { - result = Node.new(:ATTRIB_NAME, - [[val.first, val.last].compact.join(':')] - ) - } - | IDENT { - # Default namespace is not applied to attributes. - # So we don't add prefix "xmlns:" as in namespaced_ident. - result = Node.new(:ATTRIB_NAME, [val.first]) - } - ; - function - : FUNCTION RPAREN { - result = Node.new(:FUNCTION, [val.first.strip]) - } - | FUNCTION expr RPAREN { - result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten) - } - | FUNCTION nth RPAREN { - result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten) - } - | NOT expr RPAREN { - result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten) - } - | HAS selector RPAREN { - result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten) - } - ; - expr - : NUMBER COMMA expr { result = [val.first, val.last] } - | STRING COMMA expr { result = [val.first, val.last] } - | IDENT COMMA expr { result = [val.first, val.last] } - | NUMBER - | STRING - | IDENT # even, odd - { - case val[0] - when 'even' - result = Node.new(:NTH, ['2','n','+','0']) - when 'odd' - result = Node.new(:NTH, ['2','n','+','1']) - when 'n' - result = Node.new(:NTH, ['1','n','+','0']) - else - # non-standard to support custom functions: - # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)')) - # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)')) - # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)')) - result = val - end - } - ; - nth - : NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3 - { - if val[1] == 'n' - result = Node.new(:NTH, val) - else - raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" - end - } - | IDENT PLUS NUMBER { # n+3, -n+3 - if val[0] == 'n' - val.unshift("1") - result = Node.new(:NTH, val) - elsif val[0] == '-n' - val[0] = 'n' - val.unshift("-1") - result = Node.new(:NTH, val) - else - raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" - end - } - | NUMBER IDENT { # 5n, -5n, 10n-1 - n = val[1] - if n[0, 2] == 'n-' - val[1] = 'n' - val << "-" - # b is contained in n as n is the string "n-b" - val << n[2, n.size] - result = Node.new(:NTH, val) - elsif n == 'n' - val << "+" - val << "0" - result = Node.new(:NTH, val) - else - raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" - end - } - ; - pseudo - : ':' function { - result = Node.new(:PSEUDO_CLASS, [val[1]]) - } - | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) } - ; - hcap_0toN - : hcap_1toN - | - ; - hcap_1toN - : attribute_id hcap_1toN { - result = Node.new(:COMBINATOR, val) - } - | class hcap_1toN { - result = Node.new(:COMBINATOR, val) - } - | attrib hcap_1toN { - result = Node.new(:COMBINATOR, val) - } - | pseudo hcap_1toN { - result = Node.new(:COMBINATOR, val) - } - | negation hcap_1toN { - result = Node.new(:COMBINATOR, val) - } - | attribute_id - | class - | attrib - | pseudo - | negation - ; - attribute_id - : HASH { result = Node.new(:ID, [unescape_css_identifier(val.first)]) } - ; - attrib_val_0or1 - : eql_incl_dash IDENT { result = [val.first, unescape_css_identifier(val[1])] } - | eql_incl_dash STRING { result = [val.first, unescape_css_string(val[1])] } - | eql_incl_dash NUMBER { result = [val.first, val[1]] } - | - ; - eql_incl_dash - : EQUAL { result = :equal } - | PREFIXMATCH { result = :prefix_match } - | SUFFIXMATCH { result = :suffix_match } - | SUBSTRINGMATCH { result = :substring_match } - | NOT_EQUAL { result = :not_equal } - | INCLUDES { result = :includes } - | DASHMATCH { result = :dash_match } - ; - negation - : NOT negation_arg RPAREN { - result = Node.new(:NOT, [val[1]]) - } - ; - negation_arg - : element_name - | element_name hcap_1toN - | hcap_1toN - ; - optional_S - : S - | - ; + selector: + selector COMMA simple_selector_1toN { + result = [val[0], val[2]].flatten + } + | prefixless_combinator_selector { result = val.flatten } + | optional_S simple_selector_1toN { result = [val[1]].flatten } + ; + + combinator: + PLUS { result = :DIRECT_ADJACENT_SELECTOR } + | GREATER { result = :CHILD_SELECTOR } + | TILDE { result = :FOLLOWING_SELECTOR } + | DOUBLESLASH { result = :DESCENDANT_SELECTOR } + | SLASH { result = :CHILD_SELECTOR } + ; + + simple_selector: + element_name hcap_0toN { + result = if val[1].nil? + val[0] + else + Node.new(:CONDITIONAL_SELECTOR, [val[0], val[1]]) + end + } + | function + | function pseudo { + result = Node.new(:CONDITIONAL_SELECTOR, val) + } + | function attrib { + result = Node.new(:CONDITIONAL_SELECTOR, val) + } + | hcap_1toN { + result = Node.new(:CONDITIONAL_SELECTOR, + [Node.new(:ELEMENT_NAME, ['*']), val[0]] + ) + } + ; + + prefixless_combinator_selector: + combinator simple_selector_1toN { + result = Node.new(val[0], [nil, val[1]]) + } + ; + + simple_selector_1toN: + simple_selector combinator simple_selector_1toN { + result = Node.new(val[1], [val[0], val[2]]) + } + | simple_selector S simple_selector_1toN { + result = Node.new(:DESCENDANT_SELECTOR, [val[0], val[2]]) + } + | simple_selector + ; + + class: + '.' IDENT { result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) } + ; + + element_name: + namespaced_ident + | '*' { result = Node.new(:ELEMENT_NAME, val) } + ; + + namespaced_ident: + namespace '|' IDENT { + result = Node.new(:ELEMENT_NAME, + [[val[0], val[2]].compact.join(':')] + ) + } + | IDENT { + name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0] + result = Node.new(:ELEMENT_NAME, [name]) + } + ; + + namespace: + IDENT { result = val[0] } + | + ; + + attrib: + LSQUARE attrib_name attrib_val_0or1 RSQUARE { + result = Node.new(:ATTRIBUTE_CONDITION, + [val[1]] + (val[2] || []) + ) + } + | LSQUARE function attrib_val_0or1 RSQUARE { + result = Node.new(:ATTRIBUTE_CONDITION, + [val[1]] + (val[2] || []) + ) + } + | LSQUARE NUMBER RSQUARE { + # non-standard, from hpricot + result = Node.new(:PSEUDO_CLASS, + [Node.new(:FUNCTION, ['nth-child(', val[1]])] + ) + } + ; + + attrib_name: + namespace '|' IDENT { + result = Node.new(:ATTRIB_NAME, + [[val[0], val[2]].compact.join(':')] + ) + } + | IDENT { + # Default namespace is not applied to attributes. + # So we don't add prefix "xmlns:" as in namespaced_ident. + result = Node.new(:ATTRIB_NAME, [val[0]]) + } + ; + + function: + FUNCTION RPAREN { + result = Node.new(:FUNCTION, [val[0].strip]) + } + | FUNCTION expr RPAREN { + result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) + } + | FUNCTION nth RPAREN { + result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) + } + | NOT expr RPAREN { + result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) + } + | HAS selector RPAREN { + result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) + } + ; + + expr: + NUMBER COMMA expr { result = [val[0], val[2]] } + | STRING COMMA expr { result = [val[0], val[2]] } + | IDENT COMMA expr { result = [val[0], val[2]] } + | NUMBER + | STRING + | IDENT { + case val[0] + when 'even' + result = Node.new(:NTH, ['2','n','+','0']) + when 'odd' + result = Node.new(:NTH, ['2','n','+','1']) + when 'n' + result = Node.new(:NTH, ['1','n','+','0']) + else + result = val + end + } + ; + + nth: + NUMBER IDENT PLUS NUMBER # 5n+3 -5n+3 + { + if val[1] == 'n' + result = Node.new(:NTH, val) + else + raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" + end + } + | IDENT PLUS NUMBER { # n+3, -n+3 + if val[0] == 'n' + val.unshift("1") + result = Node.new(:NTH, val) + elsif val[0] == '-n' + val[0] = 'n' + val.unshift("-1") + result = Node.new(:NTH, val) + else + raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" + end + } + | NUMBER IDENT { # 5n, -5n, 10n-1 + n = val[1] + if n[0, 2] == 'n-' + val[1] = 'n' + val << "-" + # b is contained in n as n is the string "n-b" + val << n[2, n.size] + result = Node.new(:NTH, val) + elsif n == 'n' + val << "+" + val << "0" + result = Node.new(:NTH, val) + else + raise Racc::ParseError, "parse error on IDENT '#{val[1]}'" + end + } + ; + + pseudo: + ':' function { + result = Node.new(:PSEUDO_CLASS, [val[1]]) + } + | ':' IDENT { result = Node.new(:PSEUDO_CLASS, [val[1]]) } + ; + + hcap_0toN: + hcap_1toN + | + ; + + hcap_1toN: + attribute_id hcap_1toN { + result = Node.new(:COMBINATOR, val) + } + | class hcap_1toN { + result = Node.new(:COMBINATOR, val) + } + | attrib hcap_1toN { + result = Node.new(:COMBINATOR, val) + } + | pseudo hcap_1toN { + result = Node.new(:COMBINATOR, val) + } + | negation hcap_1toN { + result = Node.new(:COMBINATOR, val) + } + | attribute_id + | class + | attrib + | pseudo + | negation + ; + + attribute_id: + HASH { result = Node.new(:ID, [unescape_css_identifier(val[0])]) } + ; + + attrib_val_0or1: + eql_incl_dash IDENT { result = [val[0], unescape_css_identifier(val[1])] } + | eql_incl_dash STRING { result = [val[0], unescape_css_string(val[1])] } + | eql_incl_dash NUMBER { result = [val[0], val[1]] } + | + ; + + eql_incl_dash: + EQUAL { result = :equal } + | PREFIXMATCH { result = :prefix_match } + | SUFFIXMATCH { result = :suffix_match } + | SUBSTRINGMATCH { result = :substring_match } + | NOT_EQUAL { result = :not_equal } + | INCLUDES { result = :includes } + | DASHMATCH { result = :dash_match } + ; + + negation: + NOT negation_arg RPAREN { + result = Node.new(:NOT, [val[1]]) + } + ; + + negation_arg: + element_name + | element_name hcap_1toN + | hcap_1toN + ; + + optional_S: + S + | + ; + end ---- header From f0ed1e0e8111739efee969a6b053e03ebf30fe6f Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 12 Jan 2022 08:20:52 -0500 Subject: [PATCH 3/8] fix: regression with XPath attributes in CSS selectors This commit removes "@" from the IDENT token so that we can create a new grammar rule in the parser for XPath attributes. Fixes #2419 --- lib/nokogiri/css/parser.rb | 516 +++++++++++++++++-------------- lib/nokogiri/css/parser.y | 12 + lib/nokogiri/css/tokenizer.rb | 4 +- lib/nokogiri/css/tokenizer.rex | 2 +- test/css/test_css_integration.rb | 42 +++ test/css/test_parser.rb | 8 + test/css/test_tokenizer.rb | 24 +- test/css/test_xpath_visitor.rb | 16 + 8 files changed, 377 insertions(+), 247 deletions(-) diff --git a/lib/nokogiri/css/parser.rb b/lib/nokogiri/css/parser.rb index 5d67e4f16ab..3e7f6211e48 100644 --- a/lib/nokogiri/css/parser.rb +++ b/lib/nokogiri/css/parser.rb @@ -1,3 +1,4 @@ +# frozen_string_literal: true # # DO NOT MODIFY!!!! # This file is automatically generated by Racc 1.6.0 @@ -38,190 +39,204 @@ def unescape_css_string(str) ##### State transition tables begin ### racc_action_table = [ - 24, 93, 56, 57, 33, 55, 94, 23, 24, 22, - 12, 93, 33, 27, 35, 52, 44, 22, -23, 25, - 45, 98, 23, 33, 26, 18, 20, 25, 27, -23, - 23, 24, 26, 18, 20, 33, 27, 11, 39, 24, - 22, 23, 74, 33, 18, 91, 90, 27, 22, 12, - 25, 24, -23, 23, 85, 26, 18, 20, 25, 27, - 66, 23, 24, 26, 18, 20, 33, 27, 86, 88, - 51, 22, 89, 92, 24, 26, 56, 87, 95, 60, - 90, 25, 96, 46, 23, 49, 26, 18, 20, 99, - 27, 33, 33, 51, 103, 104, 56, 58, 26, 60, - 93, 106, 33, 33, 33, 109, 39, 39, 110, 23, - 23, nil, 18, 18, 20, 27, 27, 39, 39, 39, - 23, 23, 23, 18, 18, 18, 27, 27, 27, 33, - 33, 101, 100, nil, 102, 22, 56, 87, nil, 60, - 33, nil, nil, nil, 39, 39, nil, 23, 23, nil, - 18, 18, 20, 27, 27, 39, 82, 83, 23, 56, - 87, 18, 60, nil, 27, 82, 83, 78, 79, 80, - nil, 81, nil, nil, nil, 77, 78, 79, 80, nil, - 81, 4, 5, 10, 77, 4, 5, 43, nil, 56, - 87, 6, 60, 8, 7, 6, nil, 8, 7, 4, - 5, 10, nil, nil, nil, nil, nil, nil, nil, 6, - nil, 8, 7 ] + 27, 11, 38, 99, 36, 12, 40, 26, 48, 25, + 49, 27, 100, 12, 30, 36, 105, 99, -26, 28, + 25, -26, 26, 27, 29, 14, 21, 23, 80, 30, + 28, 36, 72, 26, -26, 29, 14, 21, 23, 27, + 30, 91, 56, 36, 97, 96, 43, 29, 25, 26, + 27, 92, 94, 21, 36, 95, 30, 98, 28, 25, + 101, 26, 102, 29, 14, 21, 23, 96, 30, 28, + 36, 36, 26, 103, 29, 14, 21, 23, 27, 30, + 108, 107, 36, 109, 106, 43, 43, 25, 26, 26, + 27, 110, 21, 21, 111, 30, 30, 28, 99, 50, + 26, 53, 29, 14, 21, 23, 36, 30, 36, 56, + 61, 64, 113, 66, 29, 14, 116, 36, 118, 36, + nil, 43, nil, 43, 26, nil, 26, 14, 21, 23, + 21, 30, 43, 30, 43, 26, nil, 26, 36, 21, + 36, 21, 30, 25, 30, nil, nil, nil, nil, nil, + nil, 61, 62, 43, 60, 43, 26, nil, 26, nil, + 21, 23, 21, 30, 57, 30, 88, 89, 14, nil, + nil, 88, 89, nil, nil, nil, nil, 84, 85, 86, + nil, 87, 84, 85, 86, 83, 87, nil, 61, 93, + 83, 66, 61, 93, nil, 66, 61, 93, nil, 66, + 61, 93, nil, 66, nil, 14, nil, 61, 93, 14, + 66, nil, nil, 14, nil, nil, nil, 14, 4, 5, + 10, nil, nil, nil, 14, 4, 5, 47, 6, nil, + 8, 7, 4, 5, 10, 6, nil, 8, 7, nil, + nil, nil, 6, nil, 8, 7 ] racc_action_check = [ - 3, 58, 24, 24, 3, 24, 57, 15, 9, 3, - 64, 57, 9, 15, 11, 24, 18, 9, 58, 3, - 21, 64, 3, 14, 3, 3, 3, 9, 3, 22, - 9, 12, 9, 9, 9, 12, 9, 1, 14, 42, - 12, 14, 45, 42, 14, 55, 55, 14, 42, 1, - 12, 27, 46, 12, 49, 12, 12, 12, 42, 12, - 27, 42, 43, 42, 42, 42, 43, 42, 50, 53, - 27, 43, 54, 56, 23, 27, 51, 51, 59, 51, - 60, 43, 61, 23, 43, 23, 43, 43, 43, 75, - 43, 28, 25, 23, 84, 86, 25, 25, 23, 25, - 87, 91, 29, 30, 31, 94, 28, 25, 106, 28, - 25, nil, 28, 25, 25, 28, 25, 29, 30, 31, - 29, 30, 31, 29, 30, 31, 29, 30, 31, 32, - 39, 76, 76, nil, 76, 39, 90, 90, nil, 90, - 62, nil, nil, nil, 32, 39, nil, 32, 39, nil, - 32, 39, 39, 32, 39, 62, 47, 47, 62, 92, - 92, 62, 92, nil, 62, 48, 48, 47, 47, 47, - nil, 47, nil, nil, nil, 47, 48, 48, 48, nil, - 48, 0, 0, 0, 48, 17, 17, 17, nil, 93, - 93, 0, 93, 0, 0, 17, nil, 17, 17, 26, - 26, 26, nil, nil, nil, nil, nil, nil, nil, 26, - nil, 26, 26 ] + 3, 1, 11, 64, 3, 70, 14, 17, 21, 3, + 24, 9, 62, 1, 17, 9, 70, 62, 25, 3, + 9, 64, 3, 30, 3, 3, 3, 3, 49, 3, + 9, 16, 30, 9, 50, 9, 9, 9, 9, 12, + 9, 53, 30, 12, 60, 60, 16, 30, 12, 16, + 46, 54, 58, 16, 46, 59, 16, 61, 12, 46, + 63, 12, 65, 12, 12, 12, 12, 66, 12, 46, + 31, 32, 46, 67, 46, 46, 46, 46, 47, 46, + 82, 82, 47, 82, 81, 31, 32, 47, 31, 32, + 26, 90, 31, 32, 92, 31, 32, 47, 93, 26, + 47, 26, 47, 47, 47, 47, 28, 47, 33, 26, + 28, 28, 97, 28, 26, 26, 100, 34, 113, 35, + nil, 28, nil, 33, 28, nil, 33, 28, 28, 28, + 33, 28, 34, 33, 35, 34, nil, 35, 43, 34, + 68, 35, 34, 43, 35, nil, nil, nil, nil, nil, + nil, 27, 27, 43, 27, 68, 43, nil, 68, nil, + 43, 43, 68, 43, 27, 68, 51, 51, 27, nil, + nil, 52, 52, nil, nil, nil, nil, 51, 51, 51, + nil, 51, 52, 52, 52, 51, 52, nil, 56, 56, + 52, 56, 96, 96, nil, 96, 98, 98, nil, 98, + 99, 99, nil, 99, nil, 56, nil, 101, 101, 96, + 101, nil, nil, 98, nil, nil, nil, 99, 0, 0, + 0, nil, nil, nil, 101, 20, 20, 20, 0, nil, + 0, 0, 29, 29, 29, 20, nil, 20, 20, nil, + nil, nil, 29, nil, 29, 29 ] racc_action_pointer = [ - 174, 37, nil, -2, nil, nil, nil, nil, nil, 6, - nil, 14, 29, nil, 17, -17, nil, 178, 5, nil, - nil, -9, 0, 72, -8, 86, 192, 49, 85, 96, - 97, 98, 123, nil, nil, nil, nil, nil, nil, 124, - nil, nil, 37, 60, nil, 31, 23, 153, 162, 29, - 39, 66, nil, 46, 49, 34, 61, -1, -11, 55, - 68, 59, 134, nil, -2, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, 64, 121, nil, nil, nil, - nil, nil, nil, nil, 69, nil, 84, 88, nil, nil, - 126, 94, 149, 179, 92, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, nil, 95, nil, nil, nil, - nil ] + 211, 1, nil, -2, nil, nil, nil, nil, nil, 9, + nil, 2, 37, nil, -5, nil, 25, -17, nil, nil, + 218, -3, nil, nil, -20, -12, 88, 141, 100, 225, + 21, 64, 65, 102, 111, 113, nil, nil, nil, nil, + nil, nil, nil, 132, nil, nil, 48, 76, nil, 17, + 4, 163, 168, 16, 21, nil, 178, nil, 29, 32, + 33, 45, 5, 48, -9, 39, 55, 50, 134, nil, + -7, nil, nil, nil, nil, nil, nil, nil, nil, nil, + nil, 59, 70, nil, nil, nil, nil, nil, nil, nil, + 66, nil, 83, 86, nil, nil, 182, 105, 186, 190, + 103, 197, nil, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, 105, nil, nil, nil, nil, nil ] racc_action_default = [ - -75, -76, -2, -24, -4, -5, -6, -7, -8, -24, - -74, -76, -24, -3, -47, -10, -13, -17, -76, -19, - -20, -76, -22, -24, -76, -24, -75, -76, -53, -54, - -55, -56, -57, -58, -14, 111, -1, -9, -46, -24, - -11, -12, -24, -24, -18, -76, -29, -62, -62, -76, - -76, -76, -30, -76, -76, -38, -39, -40, -22, -76, - -38, -76, -71, -73, -76, -44, -45, -48, -49, -50, - -51, -52, -15, -16, -21, -76, -76, -63, -64, -65, - -66, -67, -68, -69, -76, -27, -76, -40, -31, -32, - -76, -43, -76, -76, -76, -33, -70, -72, -34, -25, - -59, -60, -61, -26, -28, -35, -76, -36, -37, -42, - -41 ] + -81, -82, -2, -27, -4, -5, -6, -7, -8, -27, + -80, -82, -27, -3, -82, -10, -53, -12, -15, -16, + -20, -82, -22, -23, -82, -25, -27, -82, -27, -81, + -82, -59, -60, -61, -62, -63, -64, -17, 119, -1, + -9, -11, -52, -27, -13, -14, -27, -27, -21, -82, + -32, -68, -68, -82, -82, -33, -82, -34, -82, -82, + -43, -44, -45, -46, -25, -82, -43, -82, -77, -79, + -82, -50, -51, -54, -55, -56, -57, -58, -18, -19, + -24, -82, -82, -69, -70, -71, -72, -73, -74, -75, + -82, -30, -82, -45, -35, -36, -82, -49, -82, -82, + -82, -82, -37, -76, -78, -38, -28, -65, -66, -67, + -29, -31, -39, -82, -40, -41, -48, -42, -47 ] racc_goto_table = [ - 53, 38, 13, 1, 42, 48, 62, 37, 34, 65, - 40, 36, 63, 75, 84, 67, 68, 69, 70, 71, - 62, 41, 50, 47, 54, nil, 63, nil, nil, 64, - nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, 72, 73, nil, nil, nil, nil, nil, nil, 97, - nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, nil, 105, nil, 107, 108 ] + 58, 42, 13, 1, 46, 52, 15, 68, 37, 71, + 55, 39, 15, 69, 41, 15, 73, 74, 75, 76, + 77, 44, 68, 81, 90, 45, 54, 51, 69, 15, + 59, nil, 70, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, 78, 79, nil, nil, 15, + 15, nil, nil, 104, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, nil, 112, + nil, 114, 115, nil, 117 ] racc_goto_check = [ - 18, 12, 2, 1, 5, 9, 7, 8, 2, 9, - 10, 2, 12, 17, 17, 12, 12, 12, 12, 12, - 7, 11, 15, 16, 19, nil, 12, nil, nil, 1, - nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, 2, 2, nil, nil, nil, nil, nil, nil, 12, - nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, nil, 18, nil, 18, 18 ] + 20, 14, 2, 1, 5, 11, 6, 9, 2, 11, + 7, 2, 6, 14, 10, 6, 14, 14, 14, 14, + 14, 12, 9, 19, 19, 13, 17, 18, 14, 6, + 21, nil, 1, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, 2, 2, nil, nil, 6, + 6, nil, nil, 14, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, nil, nil, nil, nil, 20, + nil, 20, 20, nil, 20 ] racc_goto_pointer = [ - nil, 3, -1, nil, nil, -13, nil, -19, -7, -18, - -5, 6, -13, nil, nil, -1, 0, -34, -24, 0, - nil, nil, nil, nil ] + nil, 3, -1, nil, nil, -16, 3, -16, nil, -21, + -2, -21, 4, 8, -15, nil, nil, 0, 1, -28, + -27, 3, nil, nil, nil, nil ] racc_goto_default = [ - nil, nil, nil, 2, 3, 9, 17, 14, nil, 15, - 31, 30, 16, 29, 19, 21, nil, nil, 59, nil, - 28, 32, 76, 61 ] + nil, nil, nil, 2, 3, 9, 63, 19, 20, 16, + nil, 17, 34, 33, 18, 32, 22, 24, nil, nil, + 65, nil, 31, 35, 82, 67 ] racc_reduce_table = [ 0, 0, :racc_error, - 3, 32, :_reduce_1, - 1, 32, :_reduce_2, - 2, 32, :_reduce_3, - 1, 36, :_reduce_4, - 1, 36, :_reduce_5, - 1, 36, :_reduce_6, - 1, 36, :_reduce_7, - 1, 36, :_reduce_8, - 2, 37, :_reduce_9, - 1, 37, :_reduce_none, - 2, 37, :_reduce_11, - 2, 37, :_reduce_12, - 1, 37, :_reduce_13, - 2, 34, :_reduce_14, - 3, 33, :_reduce_15, - 3, 33, :_reduce_16, - 1, 33, :_reduce_none, - 2, 44, :_reduce_18, - 1, 38, :_reduce_none, - 1, 38, :_reduce_20, - 3, 45, :_reduce_21, - 1, 45, :_reduce_22, - 1, 46, :_reduce_23, - 0, 46, :_reduce_none, - 4, 42, :_reduce_25, - 4, 42, :_reduce_26, - 3, 42, :_reduce_27, - 3, 47, :_reduce_28, - 1, 47, :_reduce_29, - 2, 40, :_reduce_30, - 3, 40, :_reduce_31, - 3, 40, :_reduce_32, - 3, 40, :_reduce_33, - 3, 40, :_reduce_34, - 3, 49, :_reduce_35, - 3, 49, :_reduce_36, - 3, 49, :_reduce_37, - 1, 49, :_reduce_none, - 1, 49, :_reduce_none, - 1, 49, :_reduce_40, - 4, 50, :_reduce_41, - 3, 50, :_reduce_42, - 2, 50, :_reduce_43, - 2, 41, :_reduce_44, - 2, 41, :_reduce_45, - 1, 39, :_reduce_none, - 0, 39, :_reduce_none, - 2, 43, :_reduce_48, - 2, 43, :_reduce_49, - 2, 43, :_reduce_50, - 2, 43, :_reduce_51, - 2, 43, :_reduce_52, - 1, 43, :_reduce_none, - 1, 43, :_reduce_none, - 1, 43, :_reduce_none, - 1, 43, :_reduce_none, - 1, 43, :_reduce_none, - 1, 51, :_reduce_58, - 2, 48, :_reduce_59, - 2, 48, :_reduce_60, - 2, 48, :_reduce_61, - 0, 48, :_reduce_none, - 1, 53, :_reduce_63, - 1, 53, :_reduce_64, - 1, 53, :_reduce_65, - 1, 53, :_reduce_66, - 1, 53, :_reduce_67, - 1, 53, :_reduce_68, - 1, 53, :_reduce_69, - 3, 52, :_reduce_70, - 1, 54, :_reduce_none, - 2, 54, :_reduce_none, - 1, 54, :_reduce_none, - 1, 35, :_reduce_none, - 0, 35, :_reduce_none ] - -racc_reduce_n = 76 - -racc_shift_n = 111 + 3, 33, :_reduce_1, + 1, 33, :_reduce_2, + 2, 33, :_reduce_3, + 1, 37, :_reduce_4, + 1, 37, :_reduce_5, + 1, 37, :_reduce_6, + 1, 37, :_reduce_7, + 1, 37, :_reduce_8, + 2, 38, :_reduce_9, + 1, 39, :_reduce_10, + 2, 40, :_reduce_11, + 1, 40, :_reduce_none, + 2, 40, :_reduce_13, + 2, 40, :_reduce_14, + 1, 40, :_reduce_15, + 1, 40, :_reduce_none, + 2, 35, :_reduce_17, + 3, 34, :_reduce_18, + 3, 34, :_reduce_19, + 1, 34, :_reduce_none, + 2, 47, :_reduce_21, + 1, 41, :_reduce_none, + 1, 41, :_reduce_23, + 3, 48, :_reduce_24, + 1, 48, :_reduce_25, + 1, 49, :_reduce_26, + 0, 49, :_reduce_none, + 4, 45, :_reduce_28, + 4, 45, :_reduce_29, + 3, 45, :_reduce_30, + 3, 50, :_reduce_31, + 1, 50, :_reduce_32, + 1, 50, :_reduce_none, + 2, 43, :_reduce_34, + 3, 43, :_reduce_35, + 3, 43, :_reduce_36, + 3, 43, :_reduce_37, + 3, 43, :_reduce_38, + 3, 52, :_reduce_39, + 3, 52, :_reduce_40, + 3, 52, :_reduce_41, + 3, 52, :_reduce_42, + 1, 52, :_reduce_none, + 1, 52, :_reduce_none, + 1, 52, :_reduce_45, + 1, 52, :_reduce_none, + 4, 53, :_reduce_47, + 3, 53, :_reduce_48, + 2, 53, :_reduce_49, + 2, 44, :_reduce_50, + 2, 44, :_reduce_51, + 1, 42, :_reduce_none, + 0, 42, :_reduce_none, + 2, 46, :_reduce_54, + 2, 46, :_reduce_55, + 2, 46, :_reduce_56, + 2, 46, :_reduce_57, + 2, 46, :_reduce_58, + 1, 46, :_reduce_none, + 1, 46, :_reduce_none, + 1, 46, :_reduce_none, + 1, 46, :_reduce_none, + 1, 46, :_reduce_none, + 1, 54, :_reduce_64, + 2, 51, :_reduce_65, + 2, 51, :_reduce_66, + 2, 51, :_reduce_67, + 0, 51, :_reduce_none, + 1, 56, :_reduce_69, + 1, 56, :_reduce_70, + 1, 56, :_reduce_71, + 1, 56, :_reduce_72, + 1, 56, :_reduce_73, + 1, 56, :_reduce_74, + 1, 56, :_reduce_75, + 3, 55, :_reduce_76, + 1, 57, :_reduce_none, + 2, 57, :_reduce_none, + 1, 57, :_reduce_none, + 1, 36, :_reduce_none, + 0, 36, :_reduce_none ] + +racc_reduce_n = 82 + +racc_shift_n = 119 racc_token_table = { false => 0, @@ -251,12 +266,13 @@ def unescape_css_string(str) :LSQUARE => 24, :RSQUARE => 25, :HAS => 26, - "." => 27, - "*" => 28, - "|" => 29, - ":" => 30 } + "@" => 27, + "." => 28, + "*" => 29, + "|" => 30, + ":" => 31 } -racc_nt_base = 31 +racc_nt_base = 32 racc_use_result_var = true @@ -304,6 +320,7 @@ def unescape_css_string(str) "LSQUARE", "RSQUARE", "HAS", + "\"@\"", "\".\"", "\"*\"", "\"|\"", @@ -314,6 +331,8 @@ def unescape_css_string(str) "prefixless_combinator_selector", "optional_S", "combinator", + "xpath_attribute_name", + "xpath_attribute", "simple_selector", "element_name", "hcap_0toN", @@ -381,6 +400,16 @@ def _reduce_8(val, _values, result) end def _reduce_9(val, _values, result) + result = val.join + result +end + +def _reduce_10(val, _values, result) + result = Node.new(:ATTRIB_NAME, [val[0]]) + result +end + +def _reduce_11(val, _values, result) result = if val[1].nil? val[0] else @@ -390,21 +419,21 @@ def _reduce_9(val, _values, result) result end -# reduce 10 omitted +# reduce 12 omitted -def _reduce_11(val, _values, result) +def _reduce_13(val, _values, result) result = Node.new(:CONDITIONAL_SELECTOR, val) result end -def _reduce_12(val, _values, result) +def _reduce_14(val, _values, result) result = Node.new(:CONDITIONAL_SELECTOR, val) result end -def _reduce_13(val, _values, result) +def _reduce_15(val, _values, result) result = Node.new(:CONDITIONAL_SELECTOR, [Node.new(:ELEMENT_NAME, ['*']), val[0]] ) @@ -412,39 +441,41 @@ def _reduce_13(val, _values, result) result end -def _reduce_14(val, _values, result) +# reduce 16 omitted + +def _reduce_17(val, _values, result) result = Node.new(val[0], [nil, val[1]]) result end -def _reduce_15(val, _values, result) +def _reduce_18(val, _values, result) result = Node.new(val[1], [val[0], val[2]]) result end -def _reduce_16(val, _values, result) +def _reduce_19(val, _values, result) result = Node.new(:DESCENDANT_SELECTOR, [val[0], val[2]]) result end -# reduce 17 omitted +# reduce 20 omitted -def _reduce_18(val, _values, result) +def _reduce_21(val, _values, result) result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])]) result end -# reduce 19 omitted +# reduce 22 omitted -def _reduce_20(val, _values, result) +def _reduce_23(val, _values, result) result = Node.new(:ELEMENT_NAME, val) result end -def _reduce_21(val, _values, result) +def _reduce_24(val, _values, result) result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')] ) @@ -452,21 +483,21 @@ def _reduce_21(val, _values, result) result end -def _reduce_22(val, _values, result) +def _reduce_25(val, _values, result) name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0] result = Node.new(:ELEMENT_NAME, [name]) result end -def _reduce_23(val, _values, result) +def _reduce_26(val, _values, result) result = val[0] result end -# reduce 24 omitted +# reduce 27 omitted -def _reduce_25(val, _values, result) +def _reduce_28(val, _values, result) result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || []) ) @@ -474,7 +505,7 @@ def _reduce_25(val, _values, result) result end -def _reduce_26(val, _values, result) +def _reduce_29(val, _values, result) result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || []) ) @@ -482,7 +513,7 @@ def _reduce_26(val, _values, result) result end -def _reduce_27(val, _values, result) +def _reduce_30(val, _values, result) # non-standard, from hpricot result = Node.new(:PSEUDO_CLASS, [Node.new(:FUNCTION, ['nth-child(', val[1]])] @@ -491,7 +522,7 @@ def _reduce_27(val, _values, result) result end -def _reduce_28(val, _values, result) +def _reduce_31(val, _values, result) result = Node.new(:ATTRIB_NAME, [[val[0], val[2]].compact.join(':')] ) @@ -499,7 +530,7 @@ def _reduce_28(val, _values, result) result end -def _reduce_29(val, _values, result) +def _reduce_32(val, _values, result) # Default namespace is not applied to attributes. # So we don't add prefix "xmlns:" as in namespaced_ident. result = Node.new(:ATTRIB_NAME, [val[0]]) @@ -507,56 +538,63 @@ def _reduce_29(val, _values, result) result end -def _reduce_30(val, _values, result) +# reduce 33 omitted + +def _reduce_34(val, _values, result) result = Node.new(:FUNCTION, [val[0].strip]) result end -def _reduce_31(val, _values, result) +def _reduce_35(val, _values, result) result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) result end -def _reduce_32(val, _values, result) +def _reduce_36(val, _values, result) result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) result end -def _reduce_33(val, _values, result) +def _reduce_37(val, _values, result) result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) result end -def _reduce_34(val, _values, result) +def _reduce_38(val, _values, result) result = Node.new(:FUNCTION, [val[0].strip, val[1]].flatten) result end -def _reduce_35(val, _values, result) +def _reduce_39(val, _values, result) result = [val[0], val[2]] result end -def _reduce_36(val, _values, result) +def _reduce_40(val, _values, result) result = [val[0], val[2]] result end -def _reduce_37(val, _values, result) +def _reduce_41(val, _values, result) result = [val[0], val[2]] result end -# reduce 38 omitted +def _reduce_42(val, _values, result) + result = [val[0], val[2]] + result +end -# reduce 39 omitted +# reduce 43 omitted -def _reduce_40(val, _values, result) +# reduce 44 omitted + +def _reduce_45(val, _values, result) case val[0] when 'even' result = Node.new(:NTH, ['2','n','+','0']) @@ -571,7 +609,9 @@ def _reduce_40(val, _values, result) result end -def _reduce_41(val, _values, result) +# reduce 46 omitted + +def _reduce_47(val, _values, result) if val[1] == 'n' result = Node.new(:NTH, val) else @@ -581,7 +621,7 @@ def _reduce_41(val, _values, result) result end -def _reduce_42(val, _values, result) +def _reduce_48(val, _values, result) # n+3, -n+3 if val[0] == 'n' val.unshift("1") @@ -597,7 +637,7 @@ def _reduce_42(val, _values, result) result end -def _reduce_43(val, _values, result) +def _reduce_49(val, _values, result) # 5n, -5n, 10n-1 n = val[1] if n[0, 2] == 'n-' @@ -617,133 +657,133 @@ def _reduce_43(val, _values, result) result end -def _reduce_44(val, _values, result) +def _reduce_50(val, _values, result) result = Node.new(:PSEUDO_CLASS, [val[1]]) result end -def _reduce_45(val, _values, result) +def _reduce_51(val, _values, result) result = Node.new(:PSEUDO_CLASS, [val[1]]) result end -# reduce 46 omitted +# reduce 52 omitted -# reduce 47 omitted +# reduce 53 omitted -def _reduce_48(val, _values, result) +def _reduce_54(val, _values, result) result = Node.new(:COMBINATOR, val) result end -def _reduce_49(val, _values, result) +def _reduce_55(val, _values, result) result = Node.new(:COMBINATOR, val) result end -def _reduce_50(val, _values, result) +def _reduce_56(val, _values, result) result = Node.new(:COMBINATOR, val) result end -def _reduce_51(val, _values, result) +def _reduce_57(val, _values, result) result = Node.new(:COMBINATOR, val) result end -def _reduce_52(val, _values, result) +def _reduce_58(val, _values, result) result = Node.new(:COMBINATOR, val) result end -# reduce 53 omitted +# reduce 59 omitted -# reduce 54 omitted +# reduce 60 omitted -# reduce 55 omitted +# reduce 61 omitted -# reduce 56 omitted +# reduce 62 omitted -# reduce 57 omitted +# reduce 63 omitted -def _reduce_58(val, _values, result) +def _reduce_64(val, _values, result) result = Node.new(:ID, [unescape_css_identifier(val[0])]) result end -def _reduce_59(val, _values, result) +def _reduce_65(val, _values, result) result = [val[0], unescape_css_identifier(val[1])] result end -def _reduce_60(val, _values, result) +def _reduce_66(val, _values, result) result = [val[0], unescape_css_string(val[1])] result end -def _reduce_61(val, _values, result) +def _reduce_67(val, _values, result) result = [val[0], val[1]] result end -# reduce 62 omitted +# reduce 68 omitted -def _reduce_63(val, _values, result) +def _reduce_69(val, _values, result) result = :equal result end -def _reduce_64(val, _values, result) +def _reduce_70(val, _values, result) result = :prefix_match result end -def _reduce_65(val, _values, result) +def _reduce_71(val, _values, result) result = :suffix_match result end -def _reduce_66(val, _values, result) +def _reduce_72(val, _values, result) result = :substring_match result end -def _reduce_67(val, _values, result) +def _reduce_73(val, _values, result) result = :not_equal result end -def _reduce_68(val, _values, result) +def _reduce_74(val, _values, result) result = :includes result end -def _reduce_69(val, _values, result) +def _reduce_75(val, _values, result) result = :dash_match result end -def _reduce_70(val, _values, result) +def _reduce_76(val, _values, result) result = Node.new(:NOT, [val[1]]) result end -# reduce 71 omitted +# reduce 77 omitted -# reduce 72 omitted +# reduce 78 omitted -# reduce 73 omitted +# reduce 79 omitted -# reduce 74 omitted +# reduce 80 omitted -# reduce 75 omitted +# reduce 81 omitted def _reduce_none(val, _values, result) val[0] diff --git a/lib/nokogiri/css/parser.y b/lib/nokogiri/css/parser.y index 6f1733ffe9d..69a26e83327 100644 --- a/lib/nokogiri/css/parser.y +++ b/lib/nokogiri/css/parser.y @@ -21,6 +21,14 @@ rule | SLASH { result = :CHILD_SELECTOR } ; + xpath_attribute_name: + '@' IDENT { result = val.join } + ; + + xpath_attribute: + xpath_attribute_name { result = Node.new(:ATTRIB_NAME, [val[0]]) } + ; + simple_selector: element_name hcap_0toN { result = if val[1].nil? @@ -41,6 +49,7 @@ rule [Node.new(:ELEMENT_NAME, ['*']), val[0]] ) } + | xpath_attribute ; prefixless_combinator_selector: @@ -115,6 +124,7 @@ rule # So we don't add prefix "xmlns:" as in namespaced_ident. result = Node.new(:ATTRIB_NAME, [val[0]]) } + | xpath_attribute ; function: @@ -139,6 +149,7 @@ rule NUMBER COMMA expr { result = [val[0], val[2]] } | STRING COMMA expr { result = [val[0], val[2]] } | IDENT COMMA expr { result = [val[0], val[2]] } + | xpath_attribute_name COMMA expr { result = [val[0], val[2]] } | NUMBER | STRING | IDENT { @@ -153,6 +164,7 @@ rule result = val end } + | xpath_attribute_name ; nth: diff --git a/lib/nokogiri/css/tokenizer.rb b/lib/nokogiri/css/tokenizer.rb index d173bc2c387..d2a5b2e7c56 100644 --- a/lib/nokogiri/css/tokenizer.rb +++ b/lib/nokogiri/css/tokenizer.rb @@ -63,10 +63,10 @@ def _next_token when (text = @ss.scan(/has\([\s]*/)) action { [:HAS, text] } - when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/)) + when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/)) action { [:FUNCTION, text] } - when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/)) + when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/)) action { [:IDENT, text] } when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/)) diff --git a/lib/nokogiri/css/tokenizer.rex b/lib/nokogiri/css/tokenizer.rex index 7db1d0e156b..10fdf76cca0 100644 --- a/lib/nokogiri/css/tokenizer.rex +++ b/lib/nokogiri/css/tokenizer.rex @@ -13,7 +13,7 @@ macro escape {unicode}|\\[^\n\r\f0-9A-Fa-f] nmchar [_A-Za-z0-9-]|{nonascii}|{escape} nmstart [_A-Za-z]|{nonascii}|{escape} - ident [-@]?({nmstart})({nmchar})* + ident -?({nmstart})({nmchar})* name ({nmchar})+ string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(? +
+ +
+
+
+
+ + HTML + + result = doc.css("div > @class") + assert_equal(["first", "second", "third", "fourth"], result.map(&:to_s)) + + result = doc.css("div/@class") + assert_equal(["first", "second", "third", "fourth"], result.map(&:to_s)) + + result = doc.css("div @class") + assert_equal(["first", "child", "second", "third", "fourth"], result.map(&:to_s)) + end + + it "handles xpath functions" do + doc = subject_class.parse(<<~HTML) + +
firstchild
+
second
+
third
+
fourth
+ + HTML + + result = doc.css("div > text()") + assert_equal(["first", "second", "third", "fourth"], result.map(&:to_s)) + + result = doc.css("div/text()") + assert_equal(["first", "second", "third", "fourth"], result.map(&:to_s)) + + result = doc.css("div text()") + assert_equal(["first", "child", "second", "third", "fourth"], result.map(&:to_s)) + end end end end diff --git a/test/css/test_parser.rb b/test/css/test_parser.rb index ac2f9ae20cc..9c3175002ae 100644 --- a/test/css/test_parser.rb +++ b/test/css/test_parser.rb @@ -37,5 +37,13 @@ class TestNokogiri < Nokogiri::TestCase [:FUNCTION, ["nth-child("], ["2"]],],], asts.first.to_a ) end + + it "parses xpath attributes" do + ast = parser.parse("a/@href").first + assert_equal( + [:CHILD_SELECTOR, [:ELEMENT_NAME, ["a"]], [:ATTRIB_NAME, ["@href"]]], + ast.to_a + ) + end end end diff --git a/test/css/test_tokenizer.rb b/test/css/test_tokenizer.rb index fa915ebd830..183a322e598 100644 --- a/test/css/test_tokenizer.rb +++ b/test/css/test_tokenizer.rb @@ -19,6 +19,14 @@ def setup @scanner = Nokogiri::CSS::Tokenizer.new end + def assert_tokens(tokens, scanner) + toks = [] + while (tok = @scanner.next_token) + toks << tok + end + assert_equal(tokens, toks) + end + def test_has @scanner.scan("a:has(b)") assert_tokens( @@ -189,12 +197,16 @@ def test_significant_space [:RSQUARE, "]"],], @scanner) end - def assert_tokens(tokens, scanner) - toks = [] - while (tok = @scanner.next_token) - toks << tok - end - assert_equal(tokens, toks) + def test_xpath_attributes + @scanner.scan("a/@href") + assert_tokens([[:IDENT, "a"], [:SLASH, "/"], ["@", "@"], [:IDENT, "href"]], + @scanner) + end + + def test_xpath_functions + @scanner.scan("a/text()") + assert_tokens([[:IDENT, "a"], [:SLASH, "/"], [:FUNCTION, "text("], [:RPAREN, ")"]], + @scanner) end end end diff --git a/test/css/test_xpath_visitor.rb b/test/css/test_xpath_visitor.rb index 751afb47e8a..dd1e5dada19 100644 --- a/test/css/test_xpath_visitor.rb +++ b/test/css/test_xpath_visitor.rb @@ -371,6 +371,10 @@ def assert_xpath(expecteds, asts) it "handles text() (non-standard)" do assert_xpath("//a[child::text()]", parser.parse("a[text()]")) assert_xpath("//child::text()", parser.parse("text()")) + assert_xpath("//a//child::text()", parser.parse("a text()")) + assert_xpath("//a/child::text()", parser.parse("a / text()")) + assert_xpath("//a/child::text()", parser.parse("a > text()")) + assert_xpath("//a//child::text()", parser.parse("a text()")) end it "handles comment() (non-standard)" do @@ -615,6 +619,14 @@ def visit_pseudo_class_aaron(node) it "avoids the wildcard when using namespaces" do assert_xpath("//ns1:foo", parser.parse("ns1|foo")) end + + it "avoids the wildcard when using attribute selectors" do + if Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") + assert_xpath("//*:a/@href", parser.parse("a/@href")) + else + assert_xpath("//*[nokogiri-builtin:local-name-is('a')]/@href", parser.parse("a/@href")) + end + end end describe "builtins:never" do @@ -622,6 +634,10 @@ def visit_pseudo_class_aaron(node) it "matches on the element's local-name, ignoring namespaces" do assert_xpath("//*[local-name()='foo']", parser.parse("foo")) end + + it "avoids the wildcard when using attribute selectors" do + assert_xpath("//*[local-name()='a']/@href", parser.parse("a/@href")) + end end describe "builtins:optimal" do From f7786a3334f1d525146cc8197e313d2a5ac82a4b Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 12 Jan 2022 08:28:10 -0500 Subject: [PATCH 4/8] refactor: extract xpath visitor html5 namespace handling logic --- lib/nokogiri/css/xpath_visitor.rb | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/lib/nokogiri/css/xpath_visitor.rb b/lib/nokogiri/css/xpath_visitor.rb index ef7a22b59a3..8ac7188c5c2 100644 --- a/lib/nokogiri/css/xpath_visitor.rb +++ b/lib/nokogiri/css/xpath_visitor.rb @@ -249,10 +249,7 @@ def visit_conditional_selector(node) end def visit_element_name(node) - if @doctype == DoctypeConfig::HTML5 && node.value.first != "*" - # if there is already a namespace, use it as normal - return node.value.first if node.value.first.include?(":") - + if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node) # HTML5 has namespaces that should be ignored in CSS queries # https://github.com/sparklemotion/nokogiri/issues/2376 if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?) @@ -279,6 +276,13 @@ def accept(node) private + def html5_element_name_needs_namespace_handling(node) + # if this is the wildcard selector "*", use it as normal + node.value.first != "*" && + # if there is already a namespace (i.e., it is a prefixed QName), use it as normal + !node.value.first.include?(":") + end + def nth(node, options = {}) raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4 From 7e876a477f2d07a11bf02931a92f5890df2d97a6 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 12 Jan 2022 17:00:55 -0500 Subject: [PATCH 5/8] refactor: CSS parser AST uses :ATTRIB_NAME nodes consistently All attribute references end up as an :ATTRIB_NAME node without the "@" present, which simplifies the XPath visitor code. --- lib/nokogiri/css/parser.rb | 34 ++++++++++++------------ lib/nokogiri/css/parser.y | 6 ++--- lib/nokogiri/css/xpath_visitor.rb | 22 ++++++---------- test/css/test_parser.rb | 44 ++++++++++++++++++++++++++++--- test/css/test_xpath_visitor.rb | 2 ++ 5 files changed, 71 insertions(+), 37 deletions(-) diff --git a/lib/nokogiri/css/parser.rb b/lib/nokogiri/css/parser.rb index 3e7f6211e48..ffc6e96178b 100644 --- a/lib/nokogiri/css/parser.rb +++ b/lib/nokogiri/css/parser.rb @@ -121,32 +121,32 @@ def unescape_css_string(str) -29, -31, -39, -82, -40, -41, -48, -42, -47 ] racc_goto_table = [ - 58, 42, 13, 1, 46, 52, 15, 68, 37, 71, - 55, 39, 15, 69, 41, 15, 73, 74, 75, 76, - 77, 44, 68, 81, 90, 45, 54, 51, 69, 15, - 59, nil, 70, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, 78, 79, nil, nil, 15, - 15, nil, nil, 104, nil, nil, nil, nil, nil, nil, + 58, 42, 13, 1, 46, 52, 19, 68, 37, 71, + 41, 39, 19, 69, 44, 19, 73, 74, 75, 76, + 77, 45, 68, 81, 90, 54, 51, 59, 69, 55, + nil, nil, 70, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, 78, 79, nil, nil, 19, + 19, nil, nil, 104, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, 112, nil, 114, 115, nil, 117 ] racc_goto_check = [ - 20, 14, 2, 1, 5, 11, 6, 9, 2, 11, - 7, 2, 6, 14, 10, 6, 14, 14, 14, 14, - 14, 12, 9, 19, 19, 13, 17, 18, 14, 6, - 21, nil, 1, nil, nil, nil, nil, nil, nil, nil, - nil, nil, nil, nil, nil, 2, 2, nil, nil, 6, - 6, nil, nil, 14, nil, nil, nil, nil, nil, nil, + 20, 14, 2, 1, 5, 11, 7, 9, 2, 11, + 10, 2, 7, 14, 12, 7, 14, 14, 14, 14, + 14, 13, 9, 19, 19, 17, 18, 21, 14, 7, + nil, nil, 1, nil, nil, nil, nil, nil, nil, nil, + nil, nil, nil, nil, nil, 2, 2, nil, nil, 7, + 7, nil, nil, 14, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, nil, 20, nil, 20, 20, nil, 20 ] racc_goto_pointer = [ - nil, 3, -1, nil, nil, -16, 3, -16, nil, -21, - -2, -21, 4, 8, -15, nil, nil, 0, 1, -28, - -27, 3, nil, nil, nil, nil ] + nil, 3, -1, nil, nil, -16, nil, 3, nil, -21, + -6, -21, -3, 4, -15, nil, nil, -1, 0, -28, + -27, 0, nil, nil, nil, nil ] racc_goto_default = [ - nil, nil, nil, 2, 3, 9, 63, 19, 20, 16, + nil, nil, nil, 2, 3, 9, 15, 63, 20, 16, nil, 17, 34, 33, 18, 32, 22, 24, nil, nil, 65, nil, 31, 35, 82, 67 ] @@ -400,7 +400,7 @@ def _reduce_8(val, _values, result) end def _reduce_9(val, _values, result) - result = val.join + result = val[1] result end diff --git a/lib/nokogiri/css/parser.y b/lib/nokogiri/css/parser.y index 69a26e83327..af1ba6bad38 100644 --- a/lib/nokogiri/css/parser.y +++ b/lib/nokogiri/css/parser.y @@ -22,7 +22,7 @@ rule ; xpath_attribute_name: - '@' IDENT { result = val.join } + '@' IDENT { result = val[1] } ; xpath_attribute: @@ -149,7 +149,7 @@ rule NUMBER COMMA expr { result = [val[0], val[2]] } | STRING COMMA expr { result = [val[0], val[2]] } | IDENT COMMA expr { result = [val[0], val[2]] } - | xpath_attribute_name COMMA expr { result = [val[0], val[2]] } + | xpath_attribute COMMA expr { result = [val[0], val[2]] } | NUMBER | STRING | IDENT { @@ -164,7 +164,7 @@ rule result = val end } - | xpath_attribute_name + | xpath_attribute ; nth: diff --git a/lib/nokogiri/css/xpath_visitor.rb b/lib/nokogiri/css/xpath_visitor.rb index 8ac7188c5c2..2a7ab074dbb 100644 --- a/lib/nokogiri/css/xpath_visitor.rb +++ b/lib/nokogiri/css/xpath_visitor.rb @@ -128,8 +128,11 @@ def visit_function(node) is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)" ".#{"//" unless is_direct}#{node.value[1].accept(self)}" else - # non-standard. this looks like a function call. - args = ["."] + node.value[1..-1] + # xpath function call, let's marshal those arguments + args = ["."] + args += node.value[1..-1].map do |n| + n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n + end "#{node.value.first}#{args.join(",")})" end end @@ -149,17 +152,8 @@ def visit_id(node) end def visit_attribute_condition(node) - attribute = if (node.value.first.type == :FUNCTION) || (node.value.first.value.first =~ /::/) - "" - else - "@" - end - attribute += node.value.first.accept(self) - - # non-standard. attributes starting with '@' - attribute.gsub!(/^@@/, "@") - - return attribute unless node.value.length == 3 + attribute = node.value.first.accept(self) + return attribute if node.value.length == 1 value = node.value.last value = "'#{value}'" unless /^['"]/.match?(value) @@ -267,7 +261,7 @@ def visit_element_name(node) end def visit_attrib_name(node) - node.value.first + "@#{node.value.first}" end def accept(node) diff --git a/test/css/test_parser.rb b/test/css/test_parser.rb index 9c3175002ae..4f5ab0fa6cb 100644 --- a/test/css/test_parser.rb +++ b/test/css/test_parser.rb @@ -24,7 +24,8 @@ class TestNokogiri < Nokogiri::TestCase [:CONDITIONAL_SELECTOR, [:ELEMENT_NAME], [:PSEUDO_CLASS, - [:FUNCTION],],], ast.to_type + [:FUNCTION],],], + ast.to_type ) end @@ -34,16 +35,53 @@ class TestNokogiri < Nokogiri::TestCase [:CONDITIONAL_SELECTOR, [:ELEMENT_NAME, ["a"]], [:PSEUDO_CLASS, - [:FUNCTION, ["nth-child("], ["2"]],],], asts.first.to_a + [:FUNCTION, ["nth-child("], ["2"]],],], + asts.first.to_a + ) + end + + it "parses xpath attributes in conditional selectors" do + ast = parser.parse("a[@class~=bar]").first + assert_equal( + [:CONDITIONAL_SELECTOR, + [:ELEMENT_NAME, ["a"]], + [:ATTRIBUTE_CONDITION, + [:ATTRIB_NAME, ["class"]], + [:includes], + ["bar"],],], + ast.to_a ) end it "parses xpath attributes" do ast = parser.parse("a/@href").first assert_equal( - [:CHILD_SELECTOR, [:ELEMENT_NAME, ["a"]], [:ATTRIB_NAME, ["@href"]]], + [:CHILD_SELECTOR, [:ELEMENT_NAME, ["a"]], [:ATTRIB_NAME, ["href"]]], ast.to_a ) end + + it "parses xpath attributes passed to xpath functions" do + ast = parser.parse("a:foo(@href)").first + assert_equal( + [:CONDITIONAL_SELECTOR, + [:ELEMENT_NAME, ["a"]], + [:PSEUDO_CLASS, + [:FUNCTION, ["foo("], + [:ATTRIB_NAME, ["href"]],],],], + ast.to_a, + ) + + ast = parser.parse("a:foo(@href,@id)").first + assert_equal( + [:CONDITIONAL_SELECTOR, + [:ELEMENT_NAME, ["a"]], + [:PSEUDO_CLASS, + [:FUNCTION, ["foo("], + [:ATTRIB_NAME, ["href"]], + [:ATTRIB_NAME, ["id"]],],],], + ast.to_a, + ) + end end end diff --git a/test/css/test_xpath_visitor.rb b/test/css/test_xpath_visitor.rb index dd1e5dada19..d4cb4b1923b 100644 --- a/test/css/test_xpath_visitor.rb +++ b/test/css/test_xpath_visitor.rb @@ -300,7 +300,9 @@ def assert_xpath(expecteds, asts) assert_xpath("//a[active(.)]", parser.parse("a:active")) assert_xpath("//a[foo(.,@href)]", parser.parse("a:foo(@href)")) + assert_xpath("//a[foo(.,@href,@id)]", parser.parse("a:foo(@href, @id)")) assert_xpath("//a[foo(.,@a,b)]", parser.parse("a:foo(@a, b)")) + assert_xpath("//a[foo(.,a,@b)]", parser.parse("a:foo(a, @b)")) assert_xpath("//a[foo(.,a,10)]", parser.parse("a:foo(a, 10)")) assert_xpath("//a[foo(.,42)]", parser.parse("a:foo(42)")) assert_xpath("//a[foo(.,'bar')]", parser.parse("a:foo('bar')")) From 1de42ced6323861151f06f70ae99949a00506c8b Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 12 Jan 2022 17:40:30 -0500 Subject: [PATCH 6/8] style: collapse whitespace in css/parser.y --- lib/nokogiri/css/parser.rb | 48 ++++++++------------------------ lib/nokogiri/css/parser.y | 57 ++++++++------------------------------ 2 files changed, 24 insertions(+), 81 deletions(-) diff --git a/lib/nokogiri/css/parser.rb b/lib/nokogiri/css/parser.rb index ffc6e96178b..2299092a709 100644 --- a/lib/nokogiri/css/parser.rb +++ b/lib/nokogiri/css/parser.rb @@ -422,42 +422,34 @@ def _reduce_11(val, _values, result) # reduce 12 omitted def _reduce_13(val, _values, result) - result = Node.new(:CONDITIONAL_SELECTOR, val) - + result = Node.new(:CONDITIONAL_SELECTOR, val) result end def _reduce_14(val, _values, result) - result = Node.new(:CONDITIONAL_SELECTOR, val) - + result = Node.new(:CONDITIONAL_SELECTOR, val) result end def _reduce_15(val, _values, result) - result = Node.new(:CONDITIONAL_SELECTOR, - [Node.new(:ELEMENT_NAME, ['*']), val[0]] - ) - + result = Node.new(:CONDITIONAL_SELECTOR, [Node.new(:ELEMENT_NAME, ['*']), val[0]]) result end # reduce 16 omitted def _reduce_17(val, _values, result) - result = Node.new(val[0], [nil, val[1]]) - + result = Node.new(val[0], [nil, val[1]]) result end def _reduce_18(val, _values, result) - result = Node.new(val[1], [val[0], val[2]]) - + result = Node.new(val[1], [val[0], val[2]]) result end def _reduce_19(val, _values, result) - result = Node.new(:DESCENDANT_SELECTOR, [val[0], val[2]]) - + result = Node.new(:DESCENDANT_SELECTOR, [val[0], val[2]]) result end @@ -476,10 +468,7 @@ def _reduce_23(val, _values, result) end def _reduce_24(val, _values, result) - result = Node.new(:ELEMENT_NAME, - [[val[0], val[2]].compact.join(':')] - ) - + result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')]) result end @@ -498,43 +487,30 @@ def _reduce_26(val, _values, result) # reduce 27 omitted def _reduce_28(val, _values, result) - result = Node.new(:ATTRIBUTE_CONDITION, - [val[1]] + (val[2] || []) - ) + result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || [])) result end def _reduce_29(val, _values, result) - result = Node.new(:ATTRIBUTE_CONDITION, - [val[1]] + (val[2] || []) - ) + result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || [])) result end def _reduce_30(val, _values, result) - # non-standard, from hpricot - result = Node.new(:PSEUDO_CLASS, - [Node.new(:FUNCTION, ['nth-child(', val[1]])] - ) + result = Node.new(:PSEUDO_CLASS, [Node.new(:FUNCTION, ['nth-child(', val[1]])]) result end def _reduce_31(val, _values, result) - result = Node.new(:ATTRIB_NAME, - [[val[0], val[2]].compact.join(':')] - ) - + result = Node.new(:ATTRIB_NAME, [[val[0], val[2]].compact.join(':')]) result end def _reduce_32(val, _values, result) - # Default namespace is not applied to attributes. - # So we don't add prefix "xmlns:" as in namespaced_ident. - result = Node.new(:ATTRIB_NAME, [val[0]]) - + result = Node.new(:ATTRIB_NAME, [val[0]]) result end diff --git a/lib/nokogiri/css/parser.y b/lib/nokogiri/css/parser.y index af1ba6bad38..0431a479e82 100644 --- a/lib/nokogiri/css/parser.y +++ b/lib/nokogiri/css/parser.y @@ -38,33 +38,19 @@ rule end } | function - | function pseudo { - result = Node.new(:CONDITIONAL_SELECTOR, val) - } - | function attrib { - result = Node.new(:CONDITIONAL_SELECTOR, val) - } - | hcap_1toN { - result = Node.new(:CONDITIONAL_SELECTOR, - [Node.new(:ELEMENT_NAME, ['*']), val[0]] - ) - } + | function pseudo { result = Node.new(:CONDITIONAL_SELECTOR, val) } + | function attrib { result = Node.new(:CONDITIONAL_SELECTOR, val) } + | hcap_1toN { result = Node.new(:CONDITIONAL_SELECTOR, [Node.new(:ELEMENT_NAME, ['*']), val[0]]) } | xpath_attribute ; prefixless_combinator_selector: - combinator simple_selector_1toN { - result = Node.new(val[0], [nil, val[1]]) - } + combinator simple_selector_1toN { result = Node.new(val[0], [nil, val[1]]) } ; simple_selector_1toN: - simple_selector combinator simple_selector_1toN { - result = Node.new(val[1], [val[0], val[2]]) - } - | simple_selector S simple_selector_1toN { - result = Node.new(:DESCENDANT_SELECTOR, [val[0], val[2]]) - } + simple_selector combinator simple_selector_1toN { result = Node.new(val[1], [val[0], val[2]]) } + | simple_selector S simple_selector_1toN { result = Node.new(:DESCENDANT_SELECTOR, [val[0], val[2]]) } | simple_selector ; @@ -78,11 +64,7 @@ rule ; namespaced_ident: - namespace '|' IDENT { - result = Node.new(:ELEMENT_NAME, - [[val[0], val[2]].compact.join(':')] - ) - } + namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')]) } | IDENT { name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0] result = Node.new(:ELEMENT_NAME, [name]) @@ -96,34 +78,19 @@ rule attrib: LSQUARE attrib_name attrib_val_0or1 RSQUARE { - result = Node.new(:ATTRIBUTE_CONDITION, - [val[1]] + (val[2] || []) - ) + result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || [])) } | LSQUARE function attrib_val_0or1 RSQUARE { - result = Node.new(:ATTRIBUTE_CONDITION, - [val[1]] + (val[2] || []) - ) + result = Node.new(:ATTRIBUTE_CONDITION, [val[1]] + (val[2] || [])) } | LSQUARE NUMBER RSQUARE { - # non-standard, from hpricot - result = Node.new(:PSEUDO_CLASS, - [Node.new(:FUNCTION, ['nth-child(', val[1]])] - ) + result = Node.new(:PSEUDO_CLASS, [Node.new(:FUNCTION, ['nth-child(', val[1]])]) } ; attrib_name: - namespace '|' IDENT { - result = Node.new(:ATTRIB_NAME, - [[val[0], val[2]].compact.join(':')] - ) - } - | IDENT { - # Default namespace is not applied to attributes. - # So we don't add prefix "xmlns:" as in namespaced_ident. - result = Node.new(:ATTRIB_NAME, [val[0]]) - } + namespace '|' IDENT { result = Node.new(:ATTRIB_NAME, [[val[0], val[2]].compact.join(':')]) } + | IDENT { result = Node.new(:ATTRIB_NAME, [val[0]]) } | xpath_attribute ; From 1b903880f1b356d5fefc858b9955c6a2416a0c7d Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 12 Jan 2022 17:41:29 -0500 Subject: [PATCH 7/8] doc: improve Searchable#css documentation And officially document the XPath attribute extensions to CSS selector syntax that we support. See #2419 for context --- lib/nokogiri/xml/searchable.rb | 91 +++++++++++++++++++--------------- 1 file changed, 51 insertions(+), 40 deletions(-) diff --git a/lib/nokogiri/xml/searchable.rb b/lib/nokogiri/xml/searchable.rb index b99dc83b802..cc493e56f47 100644 --- a/lib/nokogiri/xml/searchable.rb +++ b/lib/nokogiri/xml/searchable.rb @@ -6,7 +6,7 @@ module XML # # The Searchable module declares the interface used for searching your DOM. # - # It implements the public methods `search`, `css`, and `xpath`, + # It implements the public methods #search, #css, and #xpath, # as well as allowing specific implementations to specialize some # of the important behaviors. # @@ -30,25 +30,22 @@ module Searchable # node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'}) # node.search('bike|tire', {'bike' => 'http://schwinn.com/'}) # - # For XPath queries, a hash of variable bindings may also be - # appended to the namespace bindings. For example: + # For XPath queries, a hash of variable bindings may also be appended to the namespace + # bindings. For example: # # node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'}) # - # Custom XPath functions and CSS pseudo-selectors may also be - # defined. To define custom functions create a class and - # implement the function you want to define. The first argument - # to the method will be the current matching NodeSet. Any other - # arguments are ones that you pass in. Note that this class may - # appear anywhere in the argument list. For example: - # - # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")' - # Class.new { - # def regex node_set, regex - # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ } - # end - # }.new - # ) + # 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom + # functions create a class and implement the function you want to define. The first argument + # to the method will be the current matching NodeSet. Any other arguments are ones that you + # pass in. Note that this class may appear anywhere in the argument list. For example: + # + # handler = Class.new { + # def regex node_set, regex + # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ } + # end + # }.new + # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler) # # See Searchable#xpath and Searchable#css for further usage help. def search(*args) @@ -92,25 +89,40 @@ def at(*args) # # node.css('bike|tire', {'bike' => 'http://schwinn.com/'}) # - # Custom CSS pseudo classes may also be defined. To define - # custom pseudo classes, create a class and implement the custom - # pseudo class you want defined. The first argument to the - # method will be the current matching NodeSet. Any other - # arguments are ones that you pass in. For example: + # 💡 Custom CSS pseudo classes may also be defined which are mapped to a custom XPath + # function. To define custom pseudo classes, create a class and implement the custom pseudo + # class you want defined. The first argument to the method will be the matching context + # NodeSet. Any other arguments are ones that you pass in. For example: # - # node.css('title:regex("\w+")', Class.new { - # def regex node_set, regex + # handler = Class.new { + # def regex(node_set, regex) # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ } # end - # }.new) + # }.new + # node.css('title:regex("\w+")', handler) + # + # 💡 Some XPath syntax is supported in CSS queries. For example, to query for an attribute: # - # Note that the CSS query string is case-sensitive with regards - # to your document type. That is, if you're looking for "H1" in - # an HTML document, you'll never find anything, since HTML tags - # will match only lowercase CSS queries. However, "H1" might be - # found in an XML document, where tags names are case-sensitive - # (e.g., "H1" is distinct from "h1"). + # node.css('img > @href') # returns all +href+ attributes on an +img+ element + # node.css('img / @href') # same # + # # ⚠ this returns +class+ attributes from all +div+ elements AND THEIR CHILDREN! + # node.css('div @class') + # + # node.css + # + # 💡 Array-like syntax is supported in CSS queries as an alternative to using +:nth-child()+. + # + # ⚠ NOTE that indices are 1-based like +:nth-child+ and not 0-based like Ruby Arrays. For + # example: + # + # # equivalent to 'li:nth-child(2)' + # node.css('li[2]') # retrieve the second li element in a list + # + # ⚠ NOTE that the CSS query string is case-sensitive with regards to your document type. HTML + # tags will match only lowercase CSS queries, so if you search for "H1" in an HTML document, + # you'll never find anything. However, "H1" might be found in an XML document, where tags + # names are case-sensitive (e.g., "H1" is distinct from "h1"). def css(*args) rules, handler, ns, _ = extract_params(args) @@ -147,18 +159,17 @@ def at_css(*args) # # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'}) # - # Custom XPath functions may also be defined. To define custom - # functions create a class and implement the function you want - # to define. The first argument to the method will be the - # current matching NodeSet. Any other arguments are ones that - # you pass in. Note that this class may appear anywhere in the - # argument list. For example: + # 💡 Custom XPath functions may also be defined. To define custom functions create a class and + # implement the function you want to define. The first argument to the method will be the + # current matching NodeSet. Any other arguments are ones that you pass in. Note that this + # class may appear anywhere in the argument list. For example: # - # node.xpath('.//title[regex(., "\w+")]', Class.new { - # def regex node_set, regex + # handler = Class.new { + # def regex(node_set, regex) # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ } # end - # }.new) + # }.new + # node.xpath('.//title[regex(., "\w+")]', handler) # def xpath(*args) paths, handler, ns, binds = extract_params(args) From fd252a2f4c4aadfb39a2f2ceb6b31db81749ee1c Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Wed, 12 Jan 2022 18:35:36 -0500 Subject: [PATCH 8/8] doc: update CHANGELOG for #2419 --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 47288b816a3..baa6bce65c5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,8 @@ Nokogiri follows [Semantic Versioning](https://semver.org/), please see the [REA ### Fixed -* Fix `Nokogiri::XSLT.quote_params` regression in v1.13.0 which raised an exception when non-string stylesheet parameters were passed. Non-string parameters (e.g., integers and symbols) are now explicitly supported and both keys and values will be stringified with `#to_s`. [[#2418](https://github.com/sparklemotion/nokogiri/issues/2418)] +* Fix `Nokogiri::XSLT.quote_params` regression in v1.13.0 that raised an exception when non-string stylesheet parameters were passed. Non-string parameters (e.g., integers and symbols) are now explicitly supported and both keys and values will be stringified with `#to_s`. [[#2418](https://github.com/sparklemotion/nokogiri/issues/2418)] +* Fix CSS selector query regression in v1.13.0 that raised an `Nokogiri::XML::XPath::SyntaxError` when parsing XPath attributes mixed into the CSS query. Although this mash-up of XPath and CSS syntax previously worked unintentionally, it is now an officially supported feature and is documented as such. [[#2419](https://github.com/sparklemotion/nokogiri/issues/2419)] ## 1.13.0 / 2022-01-06