diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8d0f5ce0..886bbe37 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,7 @@ jobs: fail-fast: false matrix: ruby: [2.7] - idna_mode: [native, pure] + idna_mode: [libidn2, libidn1, pure] os: [ubuntu-20.04] env: IDNA_MODE: ${{ matrix.idna_mode }} @@ -40,6 +40,10 @@ jobs: Profile Memory Allocation with ${{ matrix.idna_mode }} IDNA during Addressable::Template#match run: bundle exec rake profile:template_match_memory + - name: >- + Test for ${{ matrix.idna_mode }} IDNA backend memory leaks + run: bundle exec rake profile:idna_memory_leak + coverage: runs-on: ${{ matrix.os }} strategy: diff --git a/README.md b/README.md index 9892f615..cc84e5a6 100644 --- a/README.md +++ b/README.md @@ -94,8 +94,23 @@ template.extract(uri) $ gem install addressable ``` -You may optionally turn on native IDN support by installing libidn and the -idn gem: +# IDNA support (unicode hostnames) + +Three IDNA implementations are available, the first one available is used: +- A `libidn1` wrapper (if `libidn` and the `idn` gem are installed), supporting IDNA2003. +- A pure ruby implementation (slower), [almost](https://github.com/sporkmonger/addressable/issues/491) supporting IDNA2008. +- A `libidn2` wrapper (if `libidn2` is installed), supporting IDNA2008+UTS#46. + +Note: in the future major version, `libidn2` will become the default. + +To install `libidn2`: + +```console +$ sudo apt-get install libidn2-dev # Debian/Ubuntu +$ brew install libidn2 # OS X +``` + +To install `libidn1` and the `idn` gem (also add it to your Gemfile): ```console $ sudo apt-get install libidn11-dev # Debian/Ubuntu @@ -103,6 +118,22 @@ $ brew install libidn # OS X $ gem install idn-ruby ``` +Optionally you can turn on the strict mode which will raise exceptions in case of invalid hostname during IDNA conversion. The default (`false`) silently ignores them and keeps the hostname unchanged. The strictness will depend on the backend used, libidn2 is stricter than libidn1 for example. +```ruby +Addressable::IDNA.backend.strict_mode = true # default: false +``` + +Finally if you want to force a different IDNA implementation, you can do so like this (after addressable is required): + +```ruby +require "addressable/idna/pure" +Addressable::IDNA.backend = Addressable::IDNA::Pure +require "addressable/idna/libidn2" +Addressable::IDNA.backend = Addressable::IDNA::Libidn2 +# Check which implmentation is active: +puts Addressable::IDNA.backend.name +``` + # Semantic Versioning This project uses [Semantic Versioning](https://semver.org/). You can (and should) specify your diff --git a/Rakefile b/Rakefile index e19785d0..749b907f 100644 --- a/Rakefile +++ b/Rakefile @@ -20,9 +20,9 @@ additionally provides extensive support for IRIs and URI templates. TEXT PKG_FILES = FileList[ - "lib/**/*", "spec/**/*", "vendor/**/*", "data/**/*", - "tasks/**/*", - "[A-Z]*", "Rakefile" + "lib/**/*.rb", "spec/**/*.rb", "data/**/*", + "tasks/**/*.rake", + "[A-Z]*", "*.gemspec" ].exclude(/pkg/).exclude(/database\.yml/). exclude(/Gemfile\.lock/).exclude(/[_\.]git$/). exclude(/coverage/) diff --git a/addressable.gemspec b/addressable.gemspec index f43698c0..90a8874c 100644 --- a/addressable.gemspec +++ b/addressable.gemspec @@ -9,20 +9,21 @@ Gem::Specification.new do |s| s.metadata = { "changelog_uri" => "https://github.com/sporkmonger/addressable/blob/main/CHANGELOG.md" } if s.respond_to? :metadata= s.require_paths = ["lib".freeze] s.authors = ["Bob Aman".freeze] - s.date = "2023-04-09" + s.date = "2023-04-11" s.description = "Addressable is an alternative implementation to the URI implementation that is\npart of Ruby's standard library. It is flexible, offers heuristic parsing, and\nadditionally provides extensive support for IRIs and URI templates.\n".freeze s.email = "bob@sporkmonger.com".freeze s.extra_rdoc_files = ["README.md".freeze] - s.files = ["CHANGELOG.md".freeze, "Gemfile".freeze, "LICENSE.txt".freeze, "README.md".freeze, "Rakefile".freeze, "addressable.gemspec".freeze, "data/unicode.data".freeze, "lib/addressable.rb".freeze, "lib/addressable/idna.rb".freeze, "lib/addressable/idna/native.rb".freeze, "lib/addressable/idna/pure.rb".freeze, "lib/addressable/template.rb".freeze, "lib/addressable/uri.rb".freeze, "lib/addressable/version.rb".freeze, "spec/addressable/idna_spec.rb".freeze, "spec/addressable/net_http_compat_spec.rb".freeze, "spec/addressable/security_spec.rb".freeze, "spec/addressable/template_spec.rb".freeze, "spec/addressable/uri_spec.rb".freeze, "spec/spec_helper.rb".freeze, "tasks/clobber.rake".freeze, "tasks/gem.rake".freeze, "tasks/git.rake".freeze, "tasks/metrics.rake".freeze, "tasks/profile.rake".freeze, "tasks/rspec.rake".freeze, "tasks/yard.rake".freeze] + s.files = ["CHANGELOG.md".freeze, "Gemfile".freeze, "LICENSE.txt".freeze, "README.md".freeze, "Rakefile".freeze, "addressable.gemspec".freeze, "data/unicode.data".freeze, "lib/addressable.rb".freeze, "lib/addressable/idna.rb".freeze, "lib/addressable/idna/native.rb".freeze, "lib/addressable/idna/native2.rb".freeze, "lib/addressable/idna/pure.rb".freeze, "lib/addressable/template.rb".freeze, "lib/addressable/uri.rb".freeze, "lib/addressable/version.rb".freeze, "spec/addressable/idna_spec.rb".freeze, "spec/addressable/net_http_compat_spec.rb".freeze, "spec/addressable/security_spec.rb".freeze, "spec/addressable/template_spec.rb".freeze, "spec/addressable/uri_spec.rb".freeze, "spec/spec_helper.rb".freeze, "tasks/clobber.rake".freeze, "tasks/gem.rake".freeze, "tasks/git.rake".freeze, "tasks/metrics.rake".freeze, "tasks/profile.rake".freeze, "tasks/rspec.rake".freeze, "tasks/yard.rake".freeze] s.homepage = "https://github.com/sporkmonger/addressable".freeze s.licenses = ["Apache-2.0".freeze] s.rdoc_options = ["--main".freeze, "README.md".freeze] s.required_ruby_version = Gem::Requirement.new(">= 2.2".freeze) - s.rubygems_version = "3.4.10".freeze + s.rubygems_version = "3.4.11".freeze s.summary = "URI Implementation".freeze s.specification_version = 4 s.add_runtime_dependency(%q.freeze, [">= 2.0.2", "< 6.0"]) + s.add_runtime_dependency(%q.freeze, [">= 0"]) s.add_development_dependency(%q.freeze, [">= 1.0", "< 3.0"]) end diff --git a/benchmark/idna.rb b/benchmark/idna.rb new file mode 100644 index 00000000..97cc7b8c --- /dev/null +++ b/benchmark/idna.rb @@ -0,0 +1,41 @@ +# /usr/bin/env ruby +# frozen_string_literal: true. + +require "benchmark" +require "addressable/idna/libidn2" +require "addressable/idna/libidn1" +require "addressable/idna/pure" + +value = "fiᆵリ宠퐱卄.com" +expected = "xn--fi-w1k207vk59a3qk9w9r.com" +N = 100_000 + +fail "pure ruby does not match" unless expected == Addressable::IDNA::Pure.to_ascii(value) +fail "libidn does not match" unless expected == Addressable::IDNA::Libidn1.to_ascii(value) +fail "addressable does not match" unless expected == Addressable::IDNA::Libidn2.to_ascii(value) + +Benchmark.bmbm do |x| + x.report("pure") { N.times { + Addressable::IDNA::Pure.to_unicode(Addressable::IDNA::Pure.to_ascii(value)) + } } + + x.report("libidn") { N.times { + Addressable::IDNA::Libidn1.to_unicode(Addressable::IDNA::Libidn1.to_ascii(value)) + } } + + x.report("libidn2") { N.times { + Addressable::IDNA::Libidn2.to_unicode(Addressable::IDNA::Libidn2.to_ascii(value)) + } } +end + +# > ruby benchmark/idna.rb +# Rehearsal ------------------------------------------- +# pure 5.914630 0.000000 5.914630 ( 5.915326) +# libidn 0.518971 0.003672 0.522643 ( 0.522676) +# libidn2 0.763936 0.000000 0.763936 ( 0.763983) +# ---------------------------------- total: 7.201209sec + +# user system total real +# pure 6.042877 0.000000 6.042877 ( 6.043252) +# libidn 0.521668 0.000000 0.521668 ( 0.521704) +# libidn2 0.764782 0.000000 0.764782 ( 0.764863) diff --git a/lib/addressable/idna.rb b/lib/addressable/idna.rb index 2dbd3934..2299df9c 100644 --- a/lib/addressable/idna.rb +++ b/lib/addressable/idna.rb @@ -16,11 +16,50 @@ # limitations under the License. #++ +module Addressable + module IDNA + # All IDNA conversion related errors + class Error < StandardError; end + # Input is invalid. + class PunycodeBadInput < Error; end + # Output would exceed the space provided. + class PunycodeBigOutput < Error; end + # Input needs wider integers to process. + class PunycodeOverflow < Error; end + + class << self + attr_accessor :backend, :strict_mode + + # public interface implemented by all backends + def to_ascii(value) + backend.to_ascii(value) if value.is_a?(String) + rescue Error + strict_mode ? raise : value + end + + def to_unicode(value) + backend.to_unicode(value) if value.is_a?(String) + rescue Error + strict_mode ? raise : value + end + + # @deprecated Use {String#unicode_normalize(:nfkc)} instead + def unicode_normalize_kc(value) + value.to_s.unicode_normalize(:nfkc) + end + + extend Gem::Deprecate + deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4 + end + end +end begin - require "addressable/idna/native" + require "addressable/idna/libidn1" + Addressable::IDNA.backend = Addressable::IDNA::Libidn1 rescue LoadError # libidn or the idn gem was not available, fall back on a pure-Ruby # implementation... require "addressable/idna/pure" + Addressable::IDNA.backend = Addressable::IDNA::Pure end diff --git a/lib/addressable/idna/libidn1.rb b/lib/addressable/idna/libidn1.rb new file mode 100644 index 00000000..581cd1f9 --- /dev/null +++ b/lib/addressable/idna/libidn1.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +#-- +# Copyright (C) Bob Aman +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#++ + +# libidn1 implementing IDNA2003 +require "idn" + +module Addressable + module IDNA + module Libidn1 + class << self + # @deprecated Use {String#unicode_normalize(:nfkc)} instead + def unicode_normalize_kc(value) + value.to_s.unicode_normalize(:nfkc) + end + + extend Gem::Deprecate + deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4 + end + + def self.to_ascii(value) + IDN::Idna.toASCII(value, IDN::Idna::ALLOW_UNASSIGNED) + rescue IDN::Idna::IdnaError => e + Addressable::IDNA.strict_mode ? raise(Error.new(e.message)) : value + end + + def self.to_unicode(value) + IDN::Idna.toUnicode(value, IDN::Idna::ALLOW_UNASSIGNED) + rescue IDN::Idna::IdnaError => e + Addressable::IDNA.strict_mode ? raise(Error.new(e.message)) : value + end + end + end +end diff --git a/lib/addressable/idna/libidn2.rb b/lib/addressable/idna/libidn2.rb new file mode 100644 index 00000000..357f5b00 --- /dev/null +++ b/lib/addressable/idna/libidn2.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +#-- +# Copyright (C) Bob Aman +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#++ + +# libidn2 implementing IDNA2008+TR46 +require "ffi" + +module Addressable + module IDNA + module Libidn2 + extend FFI::Library + + ffi_lib ["idn2", "libidn2.0", "libidn2.so.0"] + + attach_function :idn2_to_ascii_8z, %i[string pointer int], :int + attach_function :idn2_to_unicode_8z8z, %i[string pointer int], :int + attach_function :idn2_strerror, [:int], :string + attach_function :idn2_free, [:pointer], :void + + IDN2_TRANSITIONAL = 4 + IDN2_NONTRANSITIONAL = 8 + + def self.to_ascii(value) + pointer = FFI::MemoryPointer.new(:pointer) + res = idn2_to_ascii_8z(value, pointer, IDN2_NONTRANSITIONAL) + # Fallback to Transitional mode in case of disallowed character + res = idn2_to_ascii_8z(value, pointer, IDN2_TRANSITIONAL) if res == -304 + raise Error.new("libidn2 failed to convert \"#{value}\" to ascii (#{idn2_strerror(res)})") if res != 0 + result = pointer.read_pointer.read_string + idn2_free(pointer.read_pointer) + result + end + + def self.to_unicode(value) + pointer = FFI::MemoryPointer.new(:pointer) + res = idn2_to_unicode_8z8z(value, pointer, IDN2_NONTRANSITIONAL) + raise Error.new("libidn2 failed to convert \"#{value}\" to unicode (#{idn2_strerror(res)})") if res != 0 + result = pointer.read_pointer.read_string + idn2_free(pointer.read_pointer) + result.force_encoding('UTF-8') + end + end + end +end diff --git a/lib/addressable/idna/native.rb b/lib/addressable/idna/native.rb index a718364f..3197ac37 100644 --- a/lib/addressable/idna/native.rb +++ b/lib/addressable/idna/native.rb @@ -1,66 +1,4 @@ -# frozen_string_literal: true - -#-- -# Copyright (C) Bob Aman -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#++ - - -require "idn" - -module Addressable - module IDNA - def self.punycode_encode(value) - IDN::Punycode.encode(value.to_s) - end - - def self.punycode_decode(value) - IDN::Punycode.decode(value.to_s) - end - - class << self - # @deprecated Use {String#unicode_normalize(:nfkc)} instead - def unicode_normalize_kc(value) - value.to_s.unicode_normalize(:nfkc) - end - - extend Gem::Deprecate - deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4 - end - - def self.to_ascii(value) - value.to_s.split('.', -1).map do |segment| - if segment.size > 0 && segment.size < 64 - IDN::Idna.toASCII(segment, IDN::Idna::ALLOW_UNASSIGNED) - elsif segment.size >= 64 - segment - else - '' - end - end.join('.') - end - - def self.to_unicode(value) - value.to_s.split('.', -1).map do |segment| - if segment.size > 0 && segment.size < 64 - IDN::Idna.toUnicode(segment, IDN::Idna::ALLOW_UNASSIGNED) - elsif segment.size >= 64 - segment - else - '' - end - end.join('.') - end - end -end +# Deprecated, for backward compatibility only +require "addressable/idna/libidn1" +Addressable::IDNA.backend = Addressable::IDNA::Libidn1 +warn "NOTE: loading 'addressable/idna/native' is deprecated; use 'addressable/idna/libidn1' instead and set `Addressable::IDNA.backend = Addressable::IDNA::Libidn1` to force libidn1." diff --git a/lib/addressable/idna/pure.rb b/lib/addressable/idna/pure.rb index 3d6ffbad..07ff15e5 100644 --- a/lib/addressable/idna/pure.rb +++ b/lib/addressable/idna/pure.rb @@ -17,8 +17,8 @@ #++ -module Addressable - module IDNA +module Addressable::IDNA + module Pure # This module is loosely based on idn_actionmailer by Mick Staugaard, # the unicode library by Yoshida Masato, and the punycode implementation # by Kazuhiro Nishiyama. Most of the code was copied verbatim, but @@ -203,13 +203,6 @@ def self.lookup_unicode_lowercase(codepoint) "`abcdefghijklmno" + "pqrstuvwxyz{|}~\n" - # Input is invalid. - class PunycodeBadInput < StandardError; end - # Output would exceed the space provided. - class PunycodeBigOutput < StandardError; end - # Input needs wider integers to process. - class PunycodeOverflow < StandardError; end - def self.punycode_encode(unicode) unicode = unicode.to_s unless unicode.is_a?(String) input = unicode.unpack("U*") diff --git a/lib/addressable/uri.rb b/lib/addressable/uri.rb index 50ccdaf5..581eded6 100644 --- a/lib/addressable/uri.rb +++ b/lib/addressable/uri.rb @@ -1140,6 +1140,8 @@ def normalized_host # All normalized values should be UTF-8 force_utf8_encoding_if_needed(@normalized_host) @normalized_host + rescue IDNA::Error => e + raise InvalidURIError.new(e.message) end ## @@ -2195,6 +2197,8 @@ def display_uri display_uri = self.normalize display_uri.host = ::Addressable::IDNA.to_unicode(display_uri.host) return display_uri + rescue IDNA::Error => e + raise InvalidURIError.new(e.message) end ## diff --git a/spec/addressable/idna_spec.rb b/spec/addressable/idna_spec.rb index 428c9ec8..29a978f3 100644 --- a/spec/addressable/idna_spec.rb +++ b/spec/addressable/idna_spec.rb @@ -152,10 +152,18 @@ "example..host" )).to eq("example..host") end + + it "handles nil input" do + expect(Addressable::IDNA.to_ascii(nil)).to eq(nil) + expect(Addressable::IDNA.to_ascii(45)).to eq(nil) + expect(Addressable::IDNA.to_ascii([])).to eq(nil) + expect(Addressable::IDNA.to_ascii({})).to eq(nil) + end end shared_examples_for "converting from ASCII to unicode" do long = 'AcinusFallumTrompetumNullunCreditumVisumEstAtCuadLongumEtCefallum.com' + it "should convert '#{long}' correctly" do expect(Addressable::IDNA.to_unicode(long)).to eq(long) end @@ -255,24 +263,36 @@ "example..host" )).to eq("example..host") end + + it "handles unexpected input as nil" do + expect(Addressable::IDNA.to_unicode(nil)).to eq(nil) + expect(Addressable::IDNA.to_unicode(45)).to eq(nil) + expect(Addressable::IDNA.to_unicode([])).to eq(nil) + expect(Addressable::IDNA.to_unicode({})).to eq(nil) + end end describe Addressable::IDNA, "when using the pure-Ruby implementation" do - before do - Addressable.send(:remove_const, :IDNA) - load "addressable/idna/pure.rb" + before :all do + require "addressable/idna/pure" + Addressable::IDNA.backend = Addressable::IDNA::Pure end it_should_behave_like "converting from unicode to ASCII" it_should_behave_like "converting from ASCII to unicode" + it "should implement IDNA2008 non transitional" do + expect(Addressable::IDNA.to_ascii("faß.de")).to eq("xn--fa-hia.de") + end + begin require "fiber" it "should not blow up inside fibers" do f = Fiber.new do - Addressable.send(:remove_const, :IDNA) + Addressable::IDNA.send(:remove_const, :Pure) load "addressable/idna/pure.rb" + Addressable::IDNA.backend = Addressable::IDNA::Pure end f.resume end @@ -283,20 +303,84 @@ end begin - require "idn" + require "addressable/idna/libidn1" + + describe Addressable::IDNA, "when using the libidn1 native implementation (idn gem)" do + before :all do + Addressable::IDNA.backend = Addressable::IDNA::Libidn1 + end + + it_should_behave_like "converting from unicode to ASCII" + it_should_behave_like "converting from ASCII to unicode" + + it "should implement IDNA2003" do + expect(Addressable::IDNA.to_ascii("faß.de")).to eq("fass.de") + end + + context "with strict_mode = true" do + before { Addressable::IDNA.strict_mode = true } + after { Addressable::IDNA.strict_mode = false } + + long = 'AcinusFallumTrompetumNullunCreditumVisumEstAtCuadLongumEtCefallum.com' + it "should raise on label too long (>63)" do + expect { + Addressable::IDNA.to_ascii(long) + }.to raise_error(Addressable::IDNA::Error, /too large/) + end + end + end +rescue LoadError => error + raise error if ENV["CI"] && TestHelper.native_supported? + + # Cannot test the native implementation without libidn installed. + warn('Could not load native libidn1 implementation.') +end + +begin + require "addressable/idna/libidn2" - describe Addressable::IDNA, "when using the native-code implementation" do - before do - Addressable.send(:remove_const, :IDNA) - load "addressable/idna/native.rb" + describe Addressable::IDNA, "when using the libidn2 native implementation (ffi)" do + before :all do + Addressable::IDNA.backend = Addressable::IDNA::Libidn2 end it_should_behave_like "converting from unicode to ASCII" it_should_behave_like "converting from ASCII to unicode" + + it "should implement IDNA2008 non transitional" do + expect(Addressable::IDNA.to_ascii("faß.de")).to eq("xn--fa-hia.de") + end + + context "with strict_mode = true" do + before { Addressable::IDNA.strict_mode = true } + after { Addressable::IDNA.strict_mode = false } + + long = 'AcinusFallumTrompetumNullunCreditumVisumEstAtCuadLongumEtCefallum.com' + it "should raise on label too long (>63)" do + expect { + Addressable::IDNA.to_unicode(long) + }.to raise_error(Addressable::IDNA::Error, /longer than 63 char/) + expect { + Addressable::IDNA.to_ascii(long) + }.to raise_error(Addressable::IDNA::Error, /longer than 63 char/) + end + + it "should raise when punycode decode fails" do + expect { + Addressable::IDNA.to_unicode("xn--zckp1cyg1.sblo.jp") + }.to raise_error(Addressable::IDNA::Error, /invalid punycode/) + end + + it "should raise when the ACE prefix has no suffix" do + expect { + Addressable::IDNA.to_unicode("xn--...-") + }.to raise_error(Addressable::IDNA::Error, /invalid punycode/) + end + end end rescue LoadError => error raise error if ENV["CI"] && TestHelper.native_supported? - # Cannot test the native implementation without libidn support. - warn('Could not load native IDN implementation.') + # Cannot test the native implementation without libidn2 installed. + warn('Could not load native libidn2 implementation.') end diff --git a/spec/addressable/uri_spec.rb b/spec/addressable/uri_spec.rb index c54fc3fb..63274b07 100644 --- a/spec/addressable/uri_spec.rb +++ b/spec/addressable/uri_spec.rb @@ -5234,6 +5234,118 @@ def to_s end end +describe Addressable::URI, "when parsed from invalid IDNA hostname " + + "'http://xn---3a.com/'" do + before do + @uri = Addressable::URI.parse("http://xn---3a.com/") + end + + begin + require "addressable/idna/libidn2" + context "with libidn2" do + before { Addressable::IDNA.backend = Addressable::IDNA::Libidn2 } + + context "when strict_mode is true" do + before { Addressable::IDNA.strict_mode = true } + after { Addressable::IDNA.strict_mode = false } + + it "display_uri should raise a wrapped InvalidURL error" do + expect { @uri.display_uri.to_s + }.to raise_error(Addressable::URI::InvalidURIError, /invalid punycode/) { |e| + expect(e.cause).to be_a(Addressable::IDNA::Error) + } + end + + it "normalized_host should raise a wrapped InvalidURL error" do + expect { @uri.normalized_host + }.to raise_error(Addressable::URI::InvalidURIError, /invalid punycode/) { |e| + expect(e.cause).to be_a(Addressable::IDNA::Error) + } + end + end + + it "display_uri should be kept as http://xn---3a.com/" do + expect(@uri.display_uri.to_s).to eq("http://xn---3a.com/") + end + + it "normalized_host should be kept as http://xn---3a.com/" do + expect(@uri.normalized_host).to eq("xn---3a.com") + end + end + rescue LoadError => error + raise error if ENV["CI"] && TestHelper.native_supported? + warn('Could not load native libidn2 implementation.') + end + + begin + require "addressable/idna/libidn1" + context "with libidn1" do + before { Addressable::IDNA.backend = Addressable::IDNA::Libidn1 } + + context "when strict_mode is true" do + before { Addressable::IDNA.strict_mode = true } + after { Addressable::IDNA.strict_mode = false } + + # libidn1 silently falls back in this case + it "display_uri should be kept as http://xn---3a.com/" do + expect(@uri.display_uri.to_s).to eq("http://xn---3a.com/") + end + + it "normalized_host should be kept as http://xn---3a.com/" do + expect(@uri.normalized_host).to eq("xn---3a.com") + end + end + + it "display_uri should be kept as http://xn---3a.com/" do + expect(@uri.display_uri.to_s).to eq("http://xn---3a.com/") + end + + it "normalized_host should be kept as http://xn---3a.com/" do + expect(@uri.normalized_host).to eq("xn---3a.com") + end + end + rescue LoadError => error + raise error if ENV["CI"] && TestHelper.native_supported? + warn('Could not load native libidn2 implementation.') + end + + require "addressable/idna/pure" + context "with pure-ruby IDNA implementation" do + before { Addressable::IDNA.backend = Addressable::IDNA::Pure } + + context "when strict_mode is true" do + before { Addressable::IDNA.strict_mode = true } + after { Addressable::IDNA.strict_mode = false } + + # libidn1 silently falls back in this case + it "display_uri should be kept as http://xn---3a.com/" do + pending "incorrect result" + expect { @uri.display_uri.to_s + }.to raise_error(Addressable::URI::InvalidURIError, /invalid punycode/) { |e| + expect(e.cause).to be_a(Addressable::IDNA::Error) + } + end + + it "normalized_host should be kept as http://xn---3a.com/" do + pending "incorrect result" + expect { @uri.normalized_host + }.to raise_error(Addressable::URI::InvalidURIError, /invalid punycode/) { |e| + expect(e.cause).to be_a(Addressable::IDNA::Error) + } + end + end + + it "display_uri should be kept as http://xn---3a.com/" do + pending "incorrect result" + expect(@uri.display_uri.to_s).to eq("http://xn---3a.com/") + end + + it "normalized_host should be kept as http://xn---3a.com/" do + expect(@uri.normalized_host).to eq("xn---3a.com") + end + end +end + describe Addressable::URI, "when parsed from " + "'http://www.詹姆斯.com/atomtests/iri/詹.html'" do before do diff --git a/tasks/gem.rake b/tasks/gem.rake index 24d9714b..ac67a138 100644 --- a/tasks/gem.rake +++ b/tasks/gem.rake @@ -22,6 +22,7 @@ namespace :gem do s.required_ruby_version = ">= 2.2" s.add_runtime_dependency "public_suffix", ">= 2.0.2", "< 6.0" + s.add_runtime_dependency "ffi" s.add_development_dependency "bundler", ">= 1.0", "< 3.0" s.require_path = "lib" diff --git a/tasks/profile.rake b/tasks/profile.rake index b697d489..2e5d9d81 100644 --- a/tasks/profile.rake +++ b/tasks/profile.rake @@ -1,8 +1,19 @@ # frozen_string_literal: true namespace :profile do + task :idna_selection do + require "addressable/idna" + if ENV["IDNA_MODE"] == "pure" + require "addressable/idna/pure" + Addressable::IDNA.backend = Addressable::IDNA::Pure + elsif ENV["IDNA_MODE"] == "libidn2" + require "addressable/idna/libidn2" + Addressable::IDNA.backend = Addressable::IDNA::Libidn2 + end + end + desc "Profile Template match memory allocations" - task :template_match_memory do + task :template_match_memory => :idna_selection do require "memory_profiler" require "addressable/template" @@ -35,25 +46,20 @@ namespace :profile do end desc "Profile URI parse memory allocations" - task :memory do + task :memory => :idna_selection do require "memory_profiler" require "addressable/uri" - if ENV["IDNA_MODE"] == "pure" - Addressable.send(:remove_const, :IDNA) - load "addressable/idna/pure.rb" - end start_at = Time.now.to_f report = MemoryProfiler.report do 30_000.times do Addressable::URI.parse( - "http://google.com/stuff/../?with_lots=of¶ms=asdff#!stuff" + "http://fiᆵリ宠퐱卄.com/stuff/../?with_lots=of¶ms=asdff#!stuff" ).normalize end end end_at = Time.now.to_f print_options = { scale_bytes: true, normalize_paths: true } - puts "\n\n" if ENV["CI"] report.pretty_print(**print_options) @@ -63,10 +69,41 @@ namespace :profile do puts "Total allocated: #{t_allocated} (#{report.total_allocated} objects)" puts "Total retained: #{t_retained} (#{report.total_retained} objects)" - puts "Took #{end_at - start_at} seconds" + puts "Took #{(end_at - start_at).round(1)} seconds" + puts "IDNA backend: #{Addressable::IDNA.backend.name}" FileUtils.mkdir_p("tmp") report.pretty_print(to_file: "tmp/memprof.txt", **print_options) end end + + desc "Test for IDNA backend memory leaks" + task :idna_memory_leak => :idna_selection do + value = "fiᆵリ宠퐱卄.com" + puts "\nMemory leak test for IDNA backend: #{Addressable::IDNA.backend.name}" + start_at = Time.now.to_f + GC.disable # Only run GC when manually called + samples = [] + 10.times do + 50_000.times { + Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) + } + GC.start # Run a major GC + _, size = `ps ax -o pid,rss | grep -E "^[[:space:]]*#{$$}"`.strip.split.map(&:to_i) + samples << size/1024 + puts " Memory: #{size/1024}MB" # show process memory + end + end_at = Time.now.to_f + samples.shift # remove first sample which is often unstable in pure ruby + percent = (samples.last - samples.first) * 100 / samples.first + + puts "Took #{(end_at - start_at).round(1)} seconds" + puts "Memory rose from #{samples.first}MB to #{samples.last}MB" + if percent > 10 + puts "Potential MEMORY LEAK detected (#{percent}% increase)" + exit 1 + else + puts "Looks fine (#{percent}% increase)" + end + end end