Skip to content

Commit

Permalink
Improve handling of escaped and invalid URLs
Browse files Browse the repository at this point in the history
  • Loading branch information
jgarber623 committed Aug 21, 2022
1 parent a6197ab commit b0d6c75
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 13 deletions.
16 changes: 5 additions & 11 deletions lib/nokogiri/html_ext/document.rb
Original file line number Diff line number Diff line change
Expand Up @@ -69,13 +69,15 @@ def base_href=(url)
#
# @return [String]
def resolve_relative_url(url)
unescape(
uri_parser.join(*[document.url.strip, base_href, unescape(url)].compact.map { |u| escape(u) })
url_str = url.to_s

uri_parser.unescape(
uri_parser.join(*[document.url.strip, base_href, url_str].compact.map { |u| uri_parser.escape(u) })
.normalize
.to_s
)
rescue URI::InvalidComponentError, URI::InvalidURIError
unescape(url)
url
end

# Convert the document's relative URLs to absolute URLs.
Expand All @@ -99,10 +101,6 @@ def resolve_relative_urls!

private

def escape(url)
uri_parser.escape(url.to_s)
end

def resolve_relative_urls_for(attributes_map)
attributes_map.each do |attribute, names|
xpaths = names.map { |name| "//#{name}[@#{attribute}]" }
Expand All @@ -113,10 +111,6 @@ def resolve_relative_urls_for(attributes_map)
end
end

def unescape(url)
uri_parser.unescape(url.to_s)
end

def uri_parser
@uri_parser ||= URI::DEFAULT_PARSER
end
Expand Down
8 changes: 6 additions & 2 deletions spec/lib/nokogiri/html_ext/document_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@
<body>
<a href="/home">Home</a>
<img srcset="../foo.png 480w, ../bar.png 720w, /biz.jpg">
<img src="/commons/thumb/9/96/H%C3%A5kon-Wium-Lie-2009-03.jpg/215px-H%C3%A5kon-Wium-Lie-2009-03.jpg">
<a href="/foo%2epdf">Relative escaped PDF</a>
<a href="mailto:email%40jgarber%2eexample">Valid escaped electronic mail</a>
<a href="mailto:email_at_jgarber%2eexample">Invalid electronic mail</a>
</body>
Expand All @@ -81,8 +83,10 @@
<body>
<a href="https://jgarber.example/home">Home</a>
<img srcset="https://jgarber.example/foo/foo.png 480w, https://jgarber.example/foo/bar.png 720w, https://jgarber.example/biz.jpg">
<a href="mailto:email@jgarber.example">Valid escaped electronic mail</a>
<a href="mailto:email_at_jgarber.example">Invalid electronic mail</a>
<img src="https://jgarber.example/commons/thumb/9/96/H%C3%A5kon-Wium-Lie-2009-03.jpg/215px-H%C3%A5kon-Wium-Lie-2009-03.jpg">
<a href="https://jgarber.example/foo%2epdf">Relative escaped PDF</a>
<a href="mailto:email%40jgarber%2eexample">Valid escaped electronic mail</a>
<a href="mailto:email_at_jgarber%2eexample">Invalid electronic mail</a>
</body>
</html>
HTML
Expand Down

0 comments on commit b0d6c75

Please sign in to comment.