From f77bcf499d5c0244e1ff62c11a2323be10b43a82 Mon Sep 17 00:00:00 2001 From: Adrien Rey-Jarthon Date: Wed, 22 Mar 2023 19:11:22 +0100 Subject: [PATCH 01/12] libidn2 ffi implementation --- .github/workflows/test.yml | 2 +- addressable.gemspec | 4 +- benchmark/idna.rb | 65 +++++++++++++++++++++++++++++++++ lib/addressable/idna.rb | 16 +++++--- lib/addressable/idna/native.rb | 10 +---- lib/addressable/idna/native2.rb | 57 +++++++++++++++++++++++++++++ spec/addressable/idna_spec.rb | 41 ++++++++++++++++++--- tasks/profile.rake | 5 ++- 8 files changed, 177 insertions(+), 23 deletions(-) create mode 100644 benchmark/idna.rb create mode 100644 lib/addressable/idna/native2.rb diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8d0f5ce0..2d4f5beb 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,7 @@ jobs: fail-fast: false matrix: ruby: [2.7] - idna_mode: [native, pure] + idna_mode: [native2, native, pure] os: [ubuntu-20.04] env: IDNA_MODE: ${{ matrix.idna_mode }} diff --git a/addressable.gemspec b/addressable.gemspec index d51f4655..09be7f26 100644 --- a/addressable.gemspec +++ b/addressable.gemspec @@ -13,7 +13,7 @@ Gem::Specification.new do |s| s.description = "Addressable is an alternative implementation to the URI implementation that is\npart of Ruby's standard library. It is flexible, offers heuristic parsing, and\nadditionally provides extensive support for IRIs and URI templates.\n".freeze s.email = "bob@sporkmonger.com".freeze s.extra_rdoc_files = ["README.md".freeze] - s.files = ["CHANGELOG.md".freeze, "Gemfile".freeze, "LICENSE.txt".freeze, "README.md".freeze, "Rakefile".freeze, "data/unicode.data".freeze, "lib/addressable".freeze, "lib/addressable.rb".freeze, "lib/addressable/idna".freeze, "lib/addressable/idna.rb".freeze, "lib/addressable/idna/native.rb".freeze, "lib/addressable/idna/pure.rb".freeze, "lib/addressable/template.rb".freeze, "lib/addressable/uri.rb".freeze, "lib/addressable/version.rb".freeze, "spec/addressable".freeze, "spec/addressable/idna_spec.rb".freeze, "spec/addressable/net_http_compat_spec.rb".freeze, "spec/addressable/security_spec.rb".freeze, "spec/addressable/template_spec.rb".freeze, "spec/addressable/uri_spec.rb".freeze, "spec/spec_helper.rb".freeze, "tasks/clobber.rake".freeze, "tasks/gem.rake".freeze, "tasks/git.rake".freeze, "tasks/metrics.rake".freeze, "tasks/profile.rake".freeze, "tasks/rspec.rake".freeze, "tasks/yard.rake".freeze] + s.files = ["CHANGELOG.md".freeze, "Gemfile".freeze, "LICENSE.txt".freeze, "README.md".freeze, "Rakefile".freeze, "data/unicode.data".freeze, "lib/addressable".freeze, "lib/addressable.rb".freeze, "lib/addressable/idna".freeze, "lib/addressable/idna.rb".freeze, "lib/addressable/idna/native.rb".freeze, "lib/addressable/idna/native2.rb".freeze, "lib/addressable/idna/pure.rb".freeze, "lib/addressable/template.rb".freeze, "lib/addressable/uri.rb".freeze, "lib/addressable/version.rb".freeze, "spec/addressable".freeze, "spec/addressable/idna_spec.rb".freeze, "spec/addressable/net_http_compat_spec.rb".freeze, "spec/addressable/security_spec.rb".freeze, "spec/addressable/template_spec.rb".freeze, "spec/addressable/uri_spec.rb".freeze, "spec/spec_helper.rb".freeze, "tasks/clobber.rake".freeze, "tasks/gem.rake".freeze, "tasks/git.rake".freeze, "tasks/metrics.rake".freeze, "tasks/profile.rake".freeze, "tasks/rspec.rake".freeze, "tasks/yard.rake".freeze] s.homepage = "https://github.com/sporkmonger/addressable".freeze s.licenses = ["Apache-2.0".freeze] s.rdoc_options = ["--main".freeze, "README.md".freeze] @@ -27,9 +27,11 @@ Gem::Specification.new do |s| if s.respond_to? :add_runtime_dependency then s.add_runtime_dependency(%q.freeze, [">= 2.0.2", "< 6.0"]) + s.add_runtime_dependency(%q.freeze) s.add_development_dependency(%q.freeze, [">= 1.0", "< 3.0"]) else s.add_dependency(%q.freeze, [">= 2.0.2", "< 6.0"]) + s.add_dependency(%q.freeze) s.add_dependency(%q.freeze, [">= 1.0", "< 3.0"]) end end diff --git a/benchmark/idna.rb b/benchmark/idna.rb new file mode 100644 index 00000000..69978299 --- /dev/null +++ b/benchmark/idna.rb @@ -0,0 +1,65 @@ +# /usr/bin/env ruby +# frozen_string_literal: true. + +require "benchmark" + +value = "fiᆵリ宠퐱卄.com" +expected = "xn--fi-w1k207vk59a3qk9w9r.com" +N = 100_000 + +Benchmark.bmbm do |x| + x.report("pure") { + load "lib/addressable/idna/pure.rb" + fail "pure ruby does not match" unless expected == Addressable::IDNA.to_ascii(value) + N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) } + Addressable.send(:remove_const, :IDNA) + } + + x.report("libidn") { + load "lib/addressable/idna/native.rb" + fail "libidn does not match" unless expected == Addressable::IDNA.to_ascii(value) + N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) } + Addressable.send(:remove_const, :IDNA) + } + + x.report("libidn2") { + load "lib/addressable/idna/native2.rb" + fail "addressable does not match" unless expected == Addressable::IDNA.to_ascii(value) + N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) } + Addressable.send(:remove_const, :IDNA) + } +end + +# > ruby benchmark/idna.rb +# Rehearsal ------------------------------------------- +# pure 5.914630 0.000000 5.914630 ( 5.915326) +# libidn 0.518971 0.003672 0.522643 ( 0.522676) +# libidn2 0.763936 0.000000 0.763936 ( 0.763983) +# ---------------------------------- total: 7.201209sec + +# user system total real +# pure 6.042877 0.000000 6.042877 ( 6.043252) +# libidn 0.521668 0.000000 0.521668 ( 0.521704) +# libidn2 0.764782 0.000000 0.764782 ( 0.764863) + +puts "\nMemory leak test for libidn2 (memory should stabilize quickly):" +load "lib/addressable/idna/native2.rb" +GC.disable # Only run GC when manually called +10.times do + N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) } + GC.start # Run a major GC + pid, size = `ps ax -o pid,rss | grep -E "^[[:space:]]*#{$$}"`.strip.split.map(&:to_i) + puts " Memory: #{size/1024}MB" # show process memory +end + +# Memory leak test for libidn2 (memory should stabilize quickly): +# Memory: 117MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB \ No newline at end of file diff --git a/lib/addressable/idna.rb b/lib/addressable/idna.rb index 2dbd3934..d66b5db2 100644 --- a/lib/addressable/idna.rb +++ b/lib/addressable/idna.rb @@ -16,11 +16,15 @@ # limitations under the License. #++ - begin - require "addressable/idna/native" + require "addressable/idna/native2" rescue LoadError - # libidn or the idn gem was not available, fall back on a pure-Ruby - # implementation... - require "addressable/idna/pure" -end + # libidn2 or the ffi gem was not available, fall back on libidn1 + begin + require "addressable/idna/native" + rescue LoadError + # libidn or the idn gem was not available, fall back on a pure-Ruby + # implementation... + require "addressable/idna/pure" + end +end \ No newline at end of file diff --git a/lib/addressable/idna/native.rb b/lib/addressable/idna/native.rb index b225e1c3..089075d5 100644 --- a/lib/addressable/idna/native.rb +++ b/lib/addressable/idna/native.rb @@ -16,19 +16,11 @@ # limitations under the License. #++ - +# libidn1 implementing IDNA2003 require "idn" module Addressable module IDNA - def self.punycode_encode(value) - IDN::Punycode.encode(value.to_s) - end - - def self.punycode_decode(value) - IDN::Punycode.decode(value.to_s) - end - def self.to_ascii(value) value.to_s.split('.', -1).map do |segment| if segment.size > 0 && segment.size < 64 diff --git a/lib/addressable/idna/native2.rb b/lib/addressable/idna/native2.rb new file mode 100644 index 00000000..ef8e3e75 --- /dev/null +++ b/lib/addressable/idna/native2.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +#-- +# Copyright (C) Bob Aman +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#++ + +# libidn2 implementing IDNA2008+TR46 +require "ffi" + +module Addressable + module IDNA + extend FFI::Library + + ffi_lib ["idn2", "libidn2.0", "libidn2.so.0"] + + attach_function :idn2_to_ascii_8z, %i[string pointer int], :int + attach_function :idn2_to_unicode_8z8z, %i[string pointer int], :int + attach_function :idn2_strerror, [:int], :string + attach_function :idn2_free, [:pointer], :void + + IDN2_TRANSITIONAL = 4 + IDN2_NONTRANSITIONAL = 8 + + def self.to_ascii(value) + return value if value.ascii_only? + pointer = FFI::MemoryPointer.new(:pointer) + res = idn2_to_ascii_8z(value, pointer, IDN2_NONTRANSITIONAL) + # Fallback to Transitional mode in case of disallowed character + res = idn2_to_ascii_8z(value, pointer, IDN2_TRANSITIONAL) if res != 0 + raise "libidn2 failed to convert \"#{value}\" to ascii (#{idn2_strerror(res)})" if res != 0 + result = pointer.read_pointer.read_string + idn2_free(pointer.read_pointer) + result + end + + def self.to_unicode(value) + pointer = FFI::MemoryPointer.new(:pointer) + res = idn2_to_unicode_8z8z(value, pointer, IDN2_NONTRANSITIONAL) + return value if res != 0 + result = pointer.read_pointer.read_string + idn2_free(pointer.read_pointer) + result.force_encoding('UTF-8') + end + end +end diff --git a/spec/addressable/idna_spec.rb b/spec/addressable/idna_spec.rb index 428c9ec8..9e2fb7d6 100644 --- a/spec/addressable/idna_spec.rb +++ b/spec/addressable/idna_spec.rb @@ -258,7 +258,7 @@ end describe Addressable::IDNA, "when using the pure-Ruby implementation" do - before do + before :all do Addressable.send(:remove_const, :IDNA) load "addressable/idna/pure.rb" end @@ -266,6 +266,10 @@ it_should_behave_like "converting from unicode to ASCII" it_should_behave_like "converting from ASCII to unicode" + it "should implement IDNA2008 non transitional" do + expect(Addressable::IDNA.to_ascii("faß.de")).to eq("xn--fa-hia.de") + end + begin require "fiber" @@ -285,18 +289,45 @@ begin require "idn" - describe Addressable::IDNA, "when using the native-code implementation" do - before do + describe Addressable::IDNA, "when using the libidn1 native implementation (idn gem)" do + before :all do Addressable.send(:remove_const, :IDNA) load "addressable/idna/native.rb" end it_should_behave_like "converting from unicode to ASCII" it_should_behave_like "converting from ASCII to unicode" + + it "should implement IDNA2003" do + expect(Addressable::IDNA.to_ascii("faß.de")).to eq("fass.de") + end end rescue LoadError => error raise error if ENV["CI"] && TestHelper.native_supported? - # Cannot test the native implementation without libidn support. - warn('Could not load native IDN implementation.') + # Cannot test the native implementation without libidn installed. + warn('Could not load native libidn1 implementation.') end + +begin + require "addressable/idna/native2.rb" + + describe Addressable::IDNA, "when using the libidn2 native implementation (ffi)" do + before :all do + Addressable.send(:remove_const, :IDNA) + load "addressable/idna/native2.rb" + end + + it_should_behave_like "converting from unicode to ASCII" + it_should_behave_like "converting from ASCII to unicode" + + it "should implement IDNA2008 non transitional" do + expect(Addressable::IDNA.to_ascii("faß.de")).to eq("xn--fa-hia.de") + end + end +rescue LoadError => error + raise error if ENV["CI"] && TestHelper.native_supported? + + # Cannot test the native implementation without libidn2 installed. + warn('Could not load native libidn2 implementation.') +end \ No newline at end of file diff --git a/tasks/profile.rake b/tasks/profile.rake index b697d489..29bc5459 100644 --- a/tasks/profile.rake +++ b/tasks/profile.rake @@ -41,13 +41,16 @@ namespace :profile do if ENV["IDNA_MODE"] == "pure" Addressable.send(:remove_const, :IDNA) load "addressable/idna/pure.rb" + elsif ENV["IDNA_MODE"] == "native" + Addressable.send(:remove_const, :IDNA) + load "addressable/idna/native.rb" end start_at = Time.now.to_f report = MemoryProfiler.report do 30_000.times do Addressable::URI.parse( - "http://google.com/stuff/../?with_lots=of¶ms=asdff#!stuff" + "http://fiᆵリ宠퐱卄.com/stuff/../?with_lots=of¶ms=asdff#!stuff" ).normalize end end From 0a6f09194a66f07e0b095ce87c844848547d497a Mon Sep 17 00:00:00 2001 From: Adrien Rey-Jarthon Date: Tue, 11 Apr 2023 16:51:19 +0200 Subject: [PATCH 02/12] Fix gemspec generation --- Rakefile | 6 +++--- addressable.gemspec | 6 +++--- tasks/gem.rake | 1 + 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Rakefile b/Rakefile index e19785d0..749b907f 100644 --- a/Rakefile +++ b/Rakefile @@ -20,9 +20,9 @@ additionally provides extensive support for IRIs and URI templates. TEXT PKG_FILES = FileList[ - "lib/**/*", "spec/**/*", "vendor/**/*", "data/**/*", - "tasks/**/*", - "[A-Z]*", "Rakefile" + "lib/**/*.rb", "spec/**/*.rb", "data/**/*", + "tasks/**/*.rake", + "[A-Z]*", "*.gemspec" ].exclude(/pkg/).exclude(/database\.yml/). exclude(/Gemfile\.lock/).exclude(/[_\.]git$/). exclude(/coverage/) diff --git a/addressable.gemspec b/addressable.gemspec index d9293f46..90a8874c 100644 --- a/addressable.gemspec +++ b/addressable.gemspec @@ -9,7 +9,7 @@ Gem::Specification.new do |s| s.metadata = { "changelog_uri" => "https://github.com/sporkmonger/addressable/blob/main/CHANGELOG.md" } if s.respond_to? :metadata= s.require_paths = ["lib".freeze] s.authors = ["Bob Aman".freeze] - s.date = "2023-04-09" + s.date = "2023-04-11" s.description = "Addressable is an alternative implementation to the URI implementation that is\npart of Ruby's standard library. It is flexible, offers heuristic parsing, and\nadditionally provides extensive support for IRIs and URI templates.\n".freeze s.email = "bob@sporkmonger.com".freeze s.extra_rdoc_files = ["README.md".freeze] @@ -18,12 +18,12 @@ Gem::Specification.new do |s| s.licenses = ["Apache-2.0".freeze] s.rdoc_options = ["--main".freeze, "README.md".freeze] s.required_ruby_version = Gem::Requirement.new(">= 2.2".freeze) - s.rubygems_version = "3.4.10".freeze + s.rubygems_version = "3.4.11".freeze s.summary = "URI Implementation".freeze s.specification_version = 4 s.add_runtime_dependency(%q.freeze, [">= 2.0.2", "< 6.0"]) - s.add_runtime_dependency(%q.freeze) + s.add_runtime_dependency(%q.freeze, [">= 0"]) s.add_development_dependency(%q.freeze, [">= 1.0", "< 3.0"]) end diff --git a/tasks/gem.rake b/tasks/gem.rake index 24d9714b..ac67a138 100644 --- a/tasks/gem.rake +++ b/tasks/gem.rake @@ -22,6 +22,7 @@ namespace :gem do s.required_ruby_version = ">= 2.2" s.add_runtime_dependency "public_suffix", ">= 2.0.2", "< 6.0" + s.add_runtime_dependency "ffi" s.add_development_dependency "bundler", ">= 1.0", "< 3.0" s.require_path = "lib" From a1fb7dec9fac2fed6e93653243cecbd01e78e9e1 Mon Sep 17 00:00:00 2001 From: Adrien Rey-Jarthon Date: Tue, 11 Apr 2023 19:23:16 +0200 Subject: [PATCH 03/12] Rename IDNA backend implementations and refactor loading mechanism --- .github/workflows/test.yml | 2 +- README.md | 32 +++++++++++++++- benchmark/idna.rb | 37 +++++++++--------- lib/addressable/idna.rb | 32 +++++++++++++++- lib/addressable/idna/libidn1.rb | 60 +++++++++++++++++++++++++++++ lib/addressable/idna/libidn2.rb | 59 +++++++++++++++++++++++++++++ lib/addressable/idna/native.rb | 62 ++---------------------------- lib/addressable/idna/native2.rb | 67 --------------------------------- lib/addressable/idna/pure.rb | 6 +-- spec/addressable/idna_spec.rb | 17 ++++----- tasks/profile.rake | 12 +++--- 11 files changed, 218 insertions(+), 168 deletions(-) create mode 100644 lib/addressable/idna/libidn1.rb create mode 100644 lib/addressable/idna/libidn2.rb delete mode 100644 lib/addressable/idna/native2.rb diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2d4f5beb..08d804a5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -13,7 +13,7 @@ jobs: fail-fast: false matrix: ruby: [2.7] - idna_mode: [native2, native, pure] + idna_mode: [libidn2, libidn1, pure] os: [ubuntu-20.04] env: IDNA_MODE: ${{ matrix.idna_mode }} diff --git a/README.md b/README.md index 9892f615..bd981761 100644 --- a/README.md +++ b/README.md @@ -94,8 +94,21 @@ template.extract(uri) $ gem install addressable ``` -You may optionally turn on native IDN support by installing libidn and the -idn gem: +# IDNA support (unicode hostnames) + +Three IDNA implementations are available, the first one available is used: +- A `libidn2` wrapper (if `libidn2` is installed), supporting IDNA2008+UTS#46. +- A `libidn1` wrapper (if `libidn` and the `idn` gem are installed), supporting IDNA2003. +- A pure ruby implementation (slower), [almost](https://github.com/sporkmonger/addressable/issues/491) supporting IDNA2008. + +To install `libidn2`: + +```console +$ sudo apt-get install libidn2-dev # Debian/Ubuntu +$ brew install libidn # OS X +``` + +To install the legacy `libidn1` and the `idn` gem (also add it to your Gemfile): ```console $ sudo apt-get install libidn11-dev # Debian/Ubuntu @@ -103,6 +116,21 @@ $ brew install libidn # OS X $ gem install idn-ruby ``` +You can check which implementation is active with: + +```ruby +puts Addressable::IDNA.backend.name +``` + +Finally if you want to force a different IDNA implementation, you can do so like this (after addressable is required): + +```ruby +require "addressable/idna/pure.rb" +Addressable::IDNA.backend = Addressable::IDNA::Pure +require "addressable/idna/libidn1" +Addressable::IDNA.backend = Addressable::IDNA::Libidn1 +``` + # Semantic Versioning This project uses [Semantic Versioning](https://semver.org/). You can (and should) specify your diff --git a/benchmark/idna.rb b/benchmark/idna.rb index 69978299..0cadc11a 100644 --- a/benchmark/idna.rb +++ b/benchmark/idna.rb @@ -2,32 +2,30 @@ # frozen_string_literal: true. require "benchmark" +require "addressable/idna/libidn2" +require "addressable/idna/libidn1" +require "addressable/idna/pure" value = "fiᆵリ宠퐱卄.com" expected = "xn--fi-w1k207vk59a3qk9w9r.com" N = 100_000 +fail "pure ruby does not match" unless expected == Addressable::IDNA::Pure.to_ascii(value) +fail "libidn does not match" unless expected == Addressable::IDNA::Libidn1.to_ascii(value) +fail "addressable does not match" unless expected == Addressable::IDNA::Libidn2.to_ascii(value) + Benchmark.bmbm do |x| - x.report("pure") { - load "lib/addressable/idna/pure.rb" - fail "pure ruby does not match" unless expected == Addressable::IDNA.to_ascii(value) - N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) } - Addressable.send(:remove_const, :IDNA) - } + x.report("pure") { N.times { + Addressable::IDNA::Pure.to_unicode(Addressable::IDNA::Pure.to_ascii(value)) + } } - x.report("libidn") { - load "lib/addressable/idna/native.rb" - fail "libidn does not match" unless expected == Addressable::IDNA.to_ascii(value) - N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) } - Addressable.send(:remove_const, :IDNA) - } + x.report("libidn") { N.times { + Addressable::IDNA::Libidn1.to_unicode(Addressable::IDNA::Libidn1.to_ascii(value)) + } } - x.report("libidn2") { - load "lib/addressable/idna/native2.rb" - fail "addressable does not match" unless expected == Addressable::IDNA.to_ascii(value) - N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) } - Addressable.send(:remove_const, :IDNA) - } + x.report("libidn2") { N.times { + Addressable::IDNA::Libidn2.to_unicode(Addressable::IDNA::Libidn2.to_ascii(value)) + } } end # > ruby benchmark/idna.rb @@ -43,10 +41,9 @@ # libidn2 0.764782 0.000000 0.764782 ( 0.764863) puts "\nMemory leak test for libidn2 (memory should stabilize quickly):" -load "lib/addressable/idna/native2.rb" GC.disable # Only run GC when manually called 10.times do - N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) } + N.times { Addressable::IDNA::Libidn2.to_unicode(Addressable::IDNA::Libidn2.to_ascii(value)) } GC.start # Run a major GC pid, size = `ps ax -o pid,rss | grep -E "^[[:space:]]*#{$$}"`.strip.split.map(&:to_i) puts " Memory: #{size/1024}MB" # show process memory diff --git a/lib/addressable/idna.rb b/lib/addressable/idna.rb index d66b5db2..db7f8c2e 100644 --- a/lib/addressable/idna.rb +++ b/lib/addressable/idna.rb @@ -16,15 +16,43 @@ # limitations under the License. #++ +module Addressable + module IDNA + class << self + attr_accessor :backend + + # public interface implemented by all backends + def to_ascii(value) + backend.to_ascii(value) + end + + def to_unicode(value) + backend.to_unicode(value) + end + + # @deprecated Use {String#unicode_normalize(:nfkc)} instead + def unicode_normalize_kc(value) + value.to_s.unicode_normalize(:nfkc) + end + + extend Gem::Deprecate + deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4 + end + end +end + begin - require "addressable/idna/native2" + require "addressable/idna/libidn2" + Addressable::IDNA.backend = Addressable::IDNA::Libidn2 rescue LoadError # libidn2 or the ffi gem was not available, fall back on libidn1 begin - require "addressable/idna/native" + require "addressable/idna/libidn1" + Addressable::IDNA.backend = Addressable::IDNA::Libidn1 rescue LoadError # libidn or the idn gem was not available, fall back on a pure-Ruby # implementation... require "addressable/idna/pure" + Addressable::IDNA.backend = Addressable::IDNA::Pure end end \ No newline at end of file diff --git a/lib/addressable/idna/libidn1.rb b/lib/addressable/idna/libidn1.rb new file mode 100644 index 00000000..0ca5e66e --- /dev/null +++ b/lib/addressable/idna/libidn1.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +#-- +# Copyright (C) Bob Aman +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#++ + +# libidn1 implementing IDNA2003 +require "idn" + +module Addressable + module IDNA + module Libidn1 + class << self + # @deprecated Use {String#unicode_normalize(:nfkc)} instead + def unicode_normalize_kc(value) + value.to_s.unicode_normalize(:nfkc) + end + + extend Gem::Deprecate + deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4 + end + + def self.to_ascii(value) + value.to_s.split('.', -1).map do |segment| + if segment.size > 0 && segment.size < 64 + IDN::Idna.toASCII(segment, IDN::Idna::ALLOW_UNASSIGNED) + elsif segment.size >= 64 + segment + else + '' + end + end.join('.') + end + + def self.to_unicode(value) + value.to_s.split('.', -1).map do |segment| + if segment.size > 0 && segment.size < 64 + IDN::Idna.toUnicode(segment, IDN::Idna::ALLOW_UNASSIGNED) + elsif segment.size >= 64 + segment + else + '' + end + end.join('.') + end + end + end +end diff --git a/lib/addressable/idna/libidn2.rb b/lib/addressable/idna/libidn2.rb new file mode 100644 index 00000000..a63df507 --- /dev/null +++ b/lib/addressable/idna/libidn2.rb @@ -0,0 +1,59 @@ +# frozen_string_literal: true + +#-- +# Copyright (C) Bob Aman +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#++ + +# libidn2 implementing IDNA2008+TR46 +require "ffi" + +module Addressable + module IDNA + module Libidn2 + extend FFI::Library + + ffi_lib ["idn2", "libidn2.0", "libidn2.so.0"] + + attach_function :idn2_to_ascii_8z, %i[string pointer int], :int + attach_function :idn2_to_unicode_8z8z, %i[string pointer int], :int + attach_function :idn2_strerror, [:int], :string + attach_function :idn2_free, [:pointer], :void + + IDN2_TRANSITIONAL = 4 + IDN2_NONTRANSITIONAL = 8 + + def self.to_ascii(value) + return value if value.ascii_only? + pointer = FFI::MemoryPointer.new(:pointer) + res = idn2_to_ascii_8z(value, pointer, IDN2_NONTRANSITIONAL) + # Fallback to Transitional mode in case of disallowed character + res = idn2_to_ascii_8z(value, pointer, IDN2_TRANSITIONAL) if res != 0 + raise "libidn2 failed to convert \"#{value}\" to ascii (#{idn2_strerror(res)})" if res != 0 + result = pointer.read_pointer.read_string + idn2_free(pointer.read_pointer) + result + end + + def self.to_unicode(value) + pointer = FFI::MemoryPointer.new(:pointer) + res = idn2_to_unicode_8z8z(value, pointer, IDN2_NONTRANSITIONAL) + return value if res != 0 + result = pointer.read_pointer.read_string + idn2_free(pointer.read_pointer) + result.force_encoding('UTF-8') + end + end + end +end diff --git a/lib/addressable/idna/native.rb b/lib/addressable/idna/native.rb index 1ebdf720..88ef5fde 100644 --- a/lib/addressable/idna/native.rb +++ b/lib/addressable/idna/native.rb @@ -1,58 +1,4 @@ -# frozen_string_literal: true - -#-- -# Copyright (C) Bob Aman -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#++ - -# libidn1 implementing IDNA2003 -require "idn" - -module Addressable - module IDNA - class << self - # @deprecated Use {String#unicode_normalize(:nfkc)} instead - def unicode_normalize_kc(value) - value.to_s.unicode_normalize(:nfkc) - end - - extend Gem::Deprecate - deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4 - end - - def self.to_ascii(value) - value.to_s.split('.', -1).map do |segment| - if segment.size > 0 && segment.size < 64 - IDN::Idna.toASCII(segment, IDN::Idna::ALLOW_UNASSIGNED) - elsif segment.size >= 64 - segment - else - '' - end - end.join('.') - end - - def self.to_unicode(value) - value.to_s.split('.', -1).map do |segment| - if segment.size > 0 && segment.size < 64 - IDN::Idna.toUnicode(segment, IDN::Idna::ALLOW_UNASSIGNED) - elsif segment.size >= 64 - segment - else - '' - end - end.join('.') - end - end -end +# Deprecated, for backward compatibility only +require "addressable/idna/libidn1" +Addressable::IDNA.backend = Addressable::IDNA::Libidn1 +warn "NOTE: loading 'addressable/idna/native' is deprecated; use 'addressable/idna/libidn1' instead and set `Addressable::IDNA.backend = Addressable::IDNA::Libidn1` to force libidn1." \ No newline at end of file diff --git a/lib/addressable/idna/native2.rb b/lib/addressable/idna/native2.rb deleted file mode 100644 index 9bb59e46..00000000 --- a/lib/addressable/idna/native2.rb +++ /dev/null @@ -1,67 +0,0 @@ -# frozen_string_literal: true - -#-- -# Copyright (C) Bob Aman -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -#++ - -# libidn2 implementing IDNA2008+TR46 -require "ffi" - -module Addressable - module IDNA - extend FFI::Library - - ffi_lib ["idn2", "libidn2.0", "libidn2.so.0"] - - attach_function :idn2_to_ascii_8z, %i[string pointer int], :int - attach_function :idn2_to_unicode_8z8z, %i[string pointer int], :int - attach_function :idn2_strerror, [:int], :string - attach_function :idn2_free, [:pointer], :void - - IDN2_TRANSITIONAL = 4 - IDN2_NONTRANSITIONAL = 8 - - class << self - # @deprecated Use {String#unicode_normalize(:nfkc)} instead - def unicode_normalize_kc(value) - value.to_s.unicode_normalize(:nfkc) - end - - extend Gem::Deprecate - deprecate :unicode_normalize_kc, "String#unicode_normalize(:nfkc)", 2023, 4 - end - - def self.to_ascii(value) - return value if value.ascii_only? - pointer = FFI::MemoryPointer.new(:pointer) - res = idn2_to_ascii_8z(value, pointer, IDN2_NONTRANSITIONAL) - # Fallback to Transitional mode in case of disallowed character - res = idn2_to_ascii_8z(value, pointer, IDN2_TRANSITIONAL) if res != 0 - raise "libidn2 failed to convert \"#{value}\" to ascii (#{idn2_strerror(res)})" if res != 0 - result = pointer.read_pointer.read_string - idn2_free(pointer.read_pointer) - result - end - - def self.to_unicode(value) - pointer = FFI::MemoryPointer.new(:pointer) - res = idn2_to_unicode_8z8z(value, pointer, IDN2_NONTRANSITIONAL) - return value if res != 0 - result = pointer.read_pointer.read_string - idn2_free(pointer.read_pointer) - result.force_encoding('UTF-8') - end - end -end diff --git a/lib/addressable/idna/pure.rb b/lib/addressable/idna/pure.rb index 3d6ffbad..78696022 100644 --- a/lib/addressable/idna/pure.rb +++ b/lib/addressable/idna/pure.rb @@ -17,8 +17,8 @@ #++ -module Addressable - module IDNA +module Addressable::IDNA + module Pure # This module is loosely based on idn_actionmailer by Mick Staugaard, # the unicode library by Yoshida Masato, and the punycode implementation # by Kazuhiro Nishiyama. Most of the code was copied verbatim, but @@ -97,7 +97,7 @@ def self.to_unicode(input) if part =~ /^#{ACE_PREFIX}(.+)/ begin punycode_decode(part[/^#{ACE_PREFIX}(.+)/, 1]) - rescue Addressable::IDNA::PunycodeBadInput + rescue Addressable::IDNA::Pure::PunycodeBadInput # toUnicode is explicitly defined as never-fails by the spec part end diff --git a/spec/addressable/idna_spec.rb b/spec/addressable/idna_spec.rb index 9e2fb7d6..f8a8cec3 100644 --- a/spec/addressable/idna_spec.rb +++ b/spec/addressable/idna_spec.rb @@ -259,8 +259,8 @@ describe Addressable::IDNA, "when using the pure-Ruby implementation" do before :all do - Addressable.send(:remove_const, :IDNA) - load "addressable/idna/pure.rb" + require "addressable/idna/pure" + Addressable::IDNA.backend = Addressable::IDNA::Pure end it_should_behave_like "converting from unicode to ASCII" @@ -275,8 +275,9 @@ it "should not blow up inside fibers" do f = Fiber.new do - Addressable.send(:remove_const, :IDNA) + Addressable::IDNA.send(:remove_const, :Pure) load "addressable/idna/pure.rb" + Addressable::IDNA.backend = Addressable::IDNA::Pure end f.resume end @@ -287,12 +288,11 @@ end begin - require "idn" + require "addressable/idna/libidn1" describe Addressable::IDNA, "when using the libidn1 native implementation (idn gem)" do before :all do - Addressable.send(:remove_const, :IDNA) - load "addressable/idna/native.rb" + Addressable::IDNA.backend = Addressable::IDNA::Libidn1 end it_should_behave_like "converting from unicode to ASCII" @@ -310,12 +310,11 @@ end begin - require "addressable/idna/native2.rb" + require "addressable/idna/libidn2" describe Addressable::IDNA, "when using the libidn2 native implementation (ffi)" do before :all do - Addressable.send(:remove_const, :IDNA) - load "addressable/idna/native2.rb" + Addressable::IDNA.backend = Addressable::IDNA::Libidn2 end it_should_behave_like "converting from unicode to ASCII" diff --git a/tasks/profile.rake b/tasks/profile.rake index 29bc5459..1ec75e0a 100644 --- a/tasks/profile.rake +++ b/tasks/profile.rake @@ -39,11 +39,11 @@ namespace :profile do require "memory_profiler" require "addressable/uri" if ENV["IDNA_MODE"] == "pure" - Addressable.send(:remove_const, :IDNA) - load "addressable/idna/pure.rb" - elsif ENV["IDNA_MODE"] == "native" - Addressable.send(:remove_const, :IDNA) - load "addressable/idna/native.rb" + require "addressable/idna/pure" + Addressable::IDNA.backend = Addressable::IDNA::Pure + elsif ENV["IDNA_MODE"] == "libidn1" + require "addressable/idna/libidn1" + Addressable::IDNA.backend = Addressable::IDNA::Libidn1 end start_at = Time.now.to_f @@ -56,7 +56,6 @@ namespace :profile do end end_at = Time.now.to_f print_options = { scale_bytes: true, normalize_paths: true } - puts "\n\n" if ENV["CI"] report.pretty_print(**print_options) @@ -67,6 +66,7 @@ namespace :profile do puts "Total allocated: #{t_allocated} (#{report.total_allocated} objects)" puts "Total retained: #{t_retained} (#{report.total_retained} objects)" puts "Took #{end_at - start_at} seconds" + puts "IDNA backend: #{Addressable::IDNA.backend.name}" FileUtils.mkdir_p("tmp") report.pretty_print(to_file: "tmp/memprof.txt", **print_options) From 421cdcaa019b7ecc30f944d8c0131770b71d8829 Mon Sep 17 00:00:00 2001 From: Adrien Rey-Jarthon Date: Tue, 11 Apr 2023 22:59:26 +0200 Subject: [PATCH 04/12] Add Automatic IDNA memory leak detection to CI + keep libidn2 opt-in for the moment --- .github/workflows/test.yml | 4 +++ README.md | 10 ++++--- benchmark/idna.rb | 21 --------------- lib/addressable/idna.rb | 20 +++++--------- tasks/profile.rake | 54 +++++++++++++++++++++++++++++++------- 5 files changed, 61 insertions(+), 48 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 08d804a5..886bbe37 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -40,6 +40,10 @@ jobs: Profile Memory Allocation with ${{ matrix.idna_mode }} IDNA during Addressable::Template#match run: bundle exec rake profile:template_match_memory + - name: >- + Test for ${{ matrix.idna_mode }} IDNA backend memory leaks + run: bundle exec rake profile:idna_memory_leak + coverage: runs-on: ${{ matrix.os }} strategy: diff --git a/README.md b/README.md index bd981761..8bc8cad4 100644 --- a/README.md +++ b/README.md @@ -97,9 +97,11 @@ $ gem install addressable # IDNA support (unicode hostnames) Three IDNA implementations are available, the first one available is used: -- A `libidn2` wrapper (if `libidn2` is installed), supporting IDNA2008+UTS#46. - A `libidn1` wrapper (if `libidn` and the `idn` gem are installed), supporting IDNA2003. - A pure ruby implementation (slower), [almost](https://github.com/sporkmonger/addressable/issues/491) supporting IDNA2008. +- A `libidn2` wrapper (if `libidn2` is installed), supporting IDNA2008+UTS#46. + +Note: in the future major version, `libidn2` will become the default. To install `libidn2`: @@ -108,7 +110,7 @@ $ sudo apt-get install libidn2-dev # Debian/Ubuntu $ brew install libidn # OS X ``` -To install the legacy `libidn1` and the `idn` gem (also add it to your Gemfile): +To install `libidn1` and the `idn` gem (also add it to your Gemfile): ```console $ sudo apt-get install libidn11-dev # Debian/Ubuntu @@ -127,8 +129,8 @@ Finally if you want to force a different IDNA implementation, you can do so like ```ruby require "addressable/idna/pure.rb" Addressable::IDNA.backend = Addressable::IDNA::Pure -require "addressable/idna/libidn1" -Addressable::IDNA.backend = Addressable::IDNA::Libidn1 +require "addressable/idna/libidn2" +Addressable::IDNA.backend = Addressable::IDNA::Libidn2 ``` # Semantic Versioning diff --git a/benchmark/idna.rb b/benchmark/idna.rb index 0cadc11a..97cc7b8c 100644 --- a/benchmark/idna.rb +++ b/benchmark/idna.rb @@ -39,24 +39,3 @@ # pure 6.042877 0.000000 6.042877 ( 6.043252) # libidn 0.521668 0.000000 0.521668 ( 0.521704) # libidn2 0.764782 0.000000 0.764782 ( 0.764863) - -puts "\nMemory leak test for libidn2 (memory should stabilize quickly):" -GC.disable # Only run GC when manually called -10.times do - N.times { Addressable::IDNA::Libidn2.to_unicode(Addressable::IDNA::Libidn2.to_ascii(value)) } - GC.start # Run a major GC - pid, size = `ps ax -o pid,rss | grep -E "^[[:space:]]*#{$$}"`.strip.split.map(&:to_i) - puts " Memory: #{size/1024}MB" # show process memory -end - -# Memory leak test for libidn2 (memory should stabilize quickly): -# Memory: 117MB -# Memory: 121MB -# Memory: 121MB -# Memory: 121MB -# Memory: 121MB -# Memory: 121MB -# Memory: 121MB -# Memory: 121MB -# Memory: 121MB -# Memory: 121MB \ No newline at end of file diff --git a/lib/addressable/idna.rb b/lib/addressable/idna.rb index db7f8c2e..fcb7b2d2 100644 --- a/lib/addressable/idna.rb +++ b/lib/addressable/idna.rb @@ -42,17 +42,11 @@ def unicode_normalize_kc(value) end begin - require "addressable/idna/libidn2" - Addressable::IDNA.backend = Addressable::IDNA::Libidn2 + require "addressable/idna/libidn1" + Addressable::IDNA.backend = Addressable::IDNA::Libidn1 rescue LoadError - # libidn2 or the ffi gem was not available, fall back on libidn1 - begin - require "addressable/idna/libidn1" - Addressable::IDNA.backend = Addressable::IDNA::Libidn1 - rescue LoadError - # libidn or the idn gem was not available, fall back on a pure-Ruby - # implementation... - require "addressable/idna/pure" - Addressable::IDNA.backend = Addressable::IDNA::Pure - end -end \ No newline at end of file + # libidn or the idn gem was not available, fall back on a pure-Ruby + # implementation... + require "addressable/idna/pure" + Addressable::IDNA.backend = Addressable::IDNA::Pure +end diff --git a/tasks/profile.rake b/tasks/profile.rake index 1ec75e0a..2e5d9d81 100644 --- a/tasks/profile.rake +++ b/tasks/profile.rake @@ -1,8 +1,19 @@ # frozen_string_literal: true namespace :profile do + task :idna_selection do + require "addressable/idna" + if ENV["IDNA_MODE"] == "pure" + require "addressable/idna/pure" + Addressable::IDNA.backend = Addressable::IDNA::Pure + elsif ENV["IDNA_MODE"] == "libidn2" + require "addressable/idna/libidn2" + Addressable::IDNA.backend = Addressable::IDNA::Libidn2 + end + end + desc "Profile Template match memory allocations" - task :template_match_memory do + task :template_match_memory => :idna_selection do require "memory_profiler" require "addressable/template" @@ -35,16 +46,9 @@ namespace :profile do end desc "Profile URI parse memory allocations" - task :memory do + task :memory => :idna_selection do require "memory_profiler" require "addressable/uri" - if ENV["IDNA_MODE"] == "pure" - require "addressable/idna/pure" - Addressable::IDNA.backend = Addressable::IDNA::Pure - elsif ENV["IDNA_MODE"] == "libidn1" - require "addressable/idna/libidn1" - Addressable::IDNA.backend = Addressable::IDNA::Libidn1 - end start_at = Time.now.to_f report = MemoryProfiler.report do @@ -65,11 +69,41 @@ namespace :profile do puts "Total allocated: #{t_allocated} (#{report.total_allocated} objects)" puts "Total retained: #{t_retained} (#{report.total_retained} objects)" - puts "Took #{end_at - start_at} seconds" + puts "Took #{(end_at - start_at).round(1)} seconds" puts "IDNA backend: #{Addressable::IDNA.backend.name}" FileUtils.mkdir_p("tmp") report.pretty_print(to_file: "tmp/memprof.txt", **print_options) end end + + desc "Test for IDNA backend memory leaks" + task :idna_memory_leak => :idna_selection do + value = "fiᆵリ宠퐱卄.com" + puts "\nMemory leak test for IDNA backend: #{Addressable::IDNA.backend.name}" + start_at = Time.now.to_f + GC.disable # Only run GC when manually called + samples = [] + 10.times do + 50_000.times { + Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) + } + GC.start # Run a major GC + _, size = `ps ax -o pid,rss | grep -E "^[[:space:]]*#{$$}"`.strip.split.map(&:to_i) + samples << size/1024 + puts " Memory: #{size/1024}MB" # show process memory + end + end_at = Time.now.to_f + samples.shift # remove first sample which is often unstable in pure ruby + percent = (samples.last - samples.first) * 100 / samples.first + + puts "Took #{(end_at - start_at).round(1)} seconds" + puts "Memory rose from #{samples.first}MB to #{samples.last}MB" + if percent > 10 + puts "Potential MEMORY LEAK detected (#{percent}% increase)" + exit 1 + else + puts "Looks fine (#{percent}% increase)" + end + end end From 84fdb89ef413f60b219f89c9910604247d3b84f0 Mon Sep 17 00:00:00 2001 From: Adrien Rey-Jarthon Date: Wed, 12 Apr 2023 01:02:37 +0200 Subject: [PATCH 05/12] Restore exceptions path + various fixes --- README.md | 4 ++-- lib/addressable/idna.rb | 9 +++++++++ lib/addressable/idna/libidn2.rb | 2 +- lib/addressable/idna/native.rb | 2 +- lib/addressable/idna/pure.rb | 9 +-------- spec/addressable/idna_spec.rb | 2 +- 6 files changed, 15 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 8bc8cad4..c946c90c 100644 --- a/README.md +++ b/README.md @@ -107,7 +107,7 @@ To install `libidn2`: ```console $ sudo apt-get install libidn2-dev # Debian/Ubuntu -$ brew install libidn # OS X +$ brew install libidn2 # OS X ``` To install `libidn1` and the `idn` gem (also add it to your Gemfile): @@ -127,7 +127,7 @@ puts Addressable::IDNA.backend.name Finally if you want to force a different IDNA implementation, you can do so like this (after addressable is required): ```ruby -require "addressable/idna/pure.rb" +require "addressable/idna/pure" Addressable::IDNA.backend = Addressable::IDNA::Pure require "addressable/idna/libidn2" Addressable::IDNA.backend = Addressable::IDNA::Libidn2 diff --git a/lib/addressable/idna.rb b/lib/addressable/idna.rb index fcb7b2d2..50bf2098 100644 --- a/lib/addressable/idna.rb +++ b/lib/addressable/idna.rb @@ -18,6 +18,15 @@ module Addressable module IDNA + # All IDNA conversion related errors + class Error < StandardError; end + # Input is invalid. + class PunycodeBadInput < Error; end + # Output would exceed the space provided. + class PunycodeBigOutput < Error; end + # Input needs wider integers to process. + class PunycodeOverflow < Error; end + class << self attr_accessor :backend diff --git a/lib/addressable/idna/libidn2.rb b/lib/addressable/idna/libidn2.rb index a63df507..cb88f6df 100644 --- a/lib/addressable/idna/libidn2.rb +++ b/lib/addressable/idna/libidn2.rb @@ -40,7 +40,7 @@ def self.to_ascii(value) res = idn2_to_ascii_8z(value, pointer, IDN2_NONTRANSITIONAL) # Fallback to Transitional mode in case of disallowed character res = idn2_to_ascii_8z(value, pointer, IDN2_TRANSITIONAL) if res != 0 - raise "libidn2 failed to convert \"#{value}\" to ascii (#{idn2_strerror(res)})" if res != 0 + raise Error.new("libidn2 failed to convert \"#{value}\" to ascii (#{idn2_strerror(res)})") if res != 0 result = pointer.read_pointer.read_string idn2_free(pointer.read_pointer) result diff --git a/lib/addressable/idna/native.rb b/lib/addressable/idna/native.rb index 88ef5fde..3197ac37 100644 --- a/lib/addressable/idna/native.rb +++ b/lib/addressable/idna/native.rb @@ -1,4 +1,4 @@ # Deprecated, for backward compatibility only require "addressable/idna/libidn1" Addressable::IDNA.backend = Addressable::IDNA::Libidn1 -warn "NOTE: loading 'addressable/idna/native' is deprecated; use 'addressable/idna/libidn1' instead and set `Addressable::IDNA.backend = Addressable::IDNA::Libidn1` to force libidn1." \ No newline at end of file +warn "NOTE: loading 'addressable/idna/native' is deprecated; use 'addressable/idna/libidn1' instead and set `Addressable::IDNA.backend = Addressable::IDNA::Libidn1` to force libidn1." diff --git a/lib/addressable/idna/pure.rb b/lib/addressable/idna/pure.rb index 78696022..07ff15e5 100644 --- a/lib/addressable/idna/pure.rb +++ b/lib/addressable/idna/pure.rb @@ -97,7 +97,7 @@ def self.to_unicode(input) if part =~ /^#{ACE_PREFIX}(.+)/ begin punycode_decode(part[/^#{ACE_PREFIX}(.+)/, 1]) - rescue Addressable::IDNA::Pure::PunycodeBadInput + rescue Addressable::IDNA::PunycodeBadInput # toUnicode is explicitly defined as never-fails by the spec part end @@ -203,13 +203,6 @@ def self.lookup_unicode_lowercase(codepoint) "`abcdefghijklmno" + "pqrstuvwxyz{|}~\n" - # Input is invalid. - class PunycodeBadInput < StandardError; end - # Output would exceed the space provided. - class PunycodeBigOutput < StandardError; end - # Input needs wider integers to process. - class PunycodeOverflow < StandardError; end - def self.punycode_encode(unicode) unicode = unicode.to_s unless unicode.is_a?(String) input = unicode.unpack("U*") diff --git a/spec/addressable/idna_spec.rb b/spec/addressable/idna_spec.rb index f8a8cec3..94c13295 100644 --- a/spec/addressable/idna_spec.rb +++ b/spec/addressable/idna_spec.rb @@ -329,4 +329,4 @@ # Cannot test the native implementation without libidn2 installed. warn('Could not load native libidn2 implementation.') -end \ No newline at end of file +end From a5e87c6824ff927adc05db4d21103fccb4ce3277 Mon Sep 17 00:00:00 2001 From: Adrien Rey-Jarthon Date: Mon, 17 Apr 2023 21:35:29 +0200 Subject: [PATCH 06/12] Add strict mode for IDNA conversion --- README.md | 7 ++++--- lib/addressable/idna.rb | 6 +++++- lib/addressable/idna/libidn1.rb | 20 ++------------------ lib/addressable/idna/libidn2.rb | 5 ++--- spec/addressable/idna_spec.rb | 28 ++++++++++++++++++++++++++++ 5 files changed, 41 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index c946c90c..b55b43b4 100644 --- a/README.md +++ b/README.md @@ -118,10 +118,9 @@ $ brew install libidn # OS X $ gem install idn-ruby ``` -You can check which implementation is active with: - +Optionally you can turn on the strict mode which will raise exceptions in case of invalid hostname during IDNA conversion. The default (`false`) silently ignores them and keeps the hostname unchanged. The strictness will depend on the backend used, libidn2 is stricted than libidn1 for example. ```ruby -puts Addressable::IDNA.backend.name +Addressable::IDNA.backend.strict_mode = true # default: false ``` Finally if you want to force a different IDNA implementation, you can do so like this (after addressable is required): @@ -131,6 +130,8 @@ require "addressable/idna/pure" Addressable::IDNA.backend = Addressable::IDNA::Pure require "addressable/idna/libidn2" Addressable::IDNA.backend = Addressable::IDNA::Libidn2 +# Check which implmentation is active: +puts Addressable::IDNA.backend.name ``` # Semantic Versioning diff --git a/lib/addressable/idna.rb b/lib/addressable/idna.rb index 50bf2098..5cf6e6cc 100644 --- a/lib/addressable/idna.rb +++ b/lib/addressable/idna.rb @@ -28,15 +28,19 @@ class PunycodeBigOutput < Error; end class PunycodeOverflow < Error; end class << self - attr_accessor :backend + attr_accessor :backend, :strict_mode # public interface implemented by all backends def to_ascii(value) backend.to_ascii(value) + rescue Error, IDN::Idna::IdnaError + strict_mode ? raise : value end def to_unicode(value) backend.to_unicode(value) + rescue Error, IDN::Idna::IdnaError + strict_mode ? raise : value end # @deprecated Use {String#unicode_normalize(:nfkc)} instead diff --git a/lib/addressable/idna/libidn1.rb b/lib/addressable/idna/libidn1.rb index 0ca5e66e..5eecbab1 100644 --- a/lib/addressable/idna/libidn1.rb +++ b/lib/addressable/idna/libidn1.rb @@ -33,27 +33,11 @@ def unicode_normalize_kc(value) end def self.to_ascii(value) - value.to_s.split('.', -1).map do |segment| - if segment.size > 0 && segment.size < 64 - IDN::Idna.toASCII(segment, IDN::Idna::ALLOW_UNASSIGNED) - elsif segment.size >= 64 - segment - else - '' - end - end.join('.') + IDN::Idna.toASCII(value, IDN::Idna::ALLOW_UNASSIGNED) end def self.to_unicode(value) - value.to_s.split('.', -1).map do |segment| - if segment.size > 0 && segment.size < 64 - IDN::Idna.toUnicode(segment, IDN::Idna::ALLOW_UNASSIGNED) - elsif segment.size >= 64 - segment - else - '' - end - end.join('.') + IDN::Idna.toUnicode(value, IDN::Idna::ALLOW_UNASSIGNED) end end end diff --git a/lib/addressable/idna/libidn2.rb b/lib/addressable/idna/libidn2.rb index cb88f6df..357f5b00 100644 --- a/lib/addressable/idna/libidn2.rb +++ b/lib/addressable/idna/libidn2.rb @@ -35,11 +35,10 @@ module Libidn2 IDN2_NONTRANSITIONAL = 8 def self.to_ascii(value) - return value if value.ascii_only? pointer = FFI::MemoryPointer.new(:pointer) res = idn2_to_ascii_8z(value, pointer, IDN2_NONTRANSITIONAL) # Fallback to Transitional mode in case of disallowed character - res = idn2_to_ascii_8z(value, pointer, IDN2_TRANSITIONAL) if res != 0 + res = idn2_to_ascii_8z(value, pointer, IDN2_TRANSITIONAL) if res == -304 raise Error.new("libidn2 failed to convert \"#{value}\" to ascii (#{idn2_strerror(res)})") if res != 0 result = pointer.read_pointer.read_string idn2_free(pointer.read_pointer) @@ -49,7 +48,7 @@ def self.to_ascii(value) def self.to_unicode(value) pointer = FFI::MemoryPointer.new(:pointer) res = idn2_to_unicode_8z8z(value, pointer, IDN2_NONTRANSITIONAL) - return value if res != 0 + raise Error.new("libidn2 failed to convert \"#{value}\" to unicode (#{idn2_strerror(res)})") if res != 0 result = pointer.read_pointer.read_string idn2_free(pointer.read_pointer) result.force_encoding('UTF-8') diff --git a/spec/addressable/idna_spec.rb b/spec/addressable/idna_spec.rb index 94c13295..f38d8307 100644 --- a/spec/addressable/idna_spec.rb +++ b/spec/addressable/idna_spec.rb @@ -156,6 +156,7 @@ shared_examples_for "converting from ASCII to unicode" do long = 'AcinusFallumTrompetumNullunCreditumVisumEstAtCuadLongumEtCefallum.com' + it "should convert '#{long}' correctly" do expect(Addressable::IDNA.to_unicode(long)).to eq(long) end @@ -323,6 +324,33 @@ it "should implement IDNA2008 non transitional" do expect(Addressable::IDNA.to_ascii("faß.de")).to eq("xn--fa-hia.de") end + + context "with strict_mode = true" do + before { Addressable::IDNA.strict_mode = true } + after { Addressable::IDNA.strict_mode = false } + + long = 'AcinusFallumTrompetumNullunCreditumVisumEstAtCuadLongumEtCefallum.com' + it "should raise on label too long (>63)" do + expect { + Addressable::IDNA.to_unicode(long) + }.to raise_error(/longer than 63 char|too large/) + expect { + Addressable::IDNA.to_ascii(long) + }.to raise_error(/longer than 63 char|too large/) + end + + it "should raise when punycode decode fails" do + expect { + Addressable::IDNA.to_unicode("xn--zckp1cyg1.sblo.jp") + }.to raise_error(Addressable::IDNA::Error, /invalid punycode/) + end + + it "should raise when the ACE prefix has no suffix" do + expect { + Addressable::IDNA.to_unicode("xn--...-") + }.to raise_error(Addressable::IDNA::Error, /invalid punycode/) + end + end end rescue LoadError => error raise error if ENV["CI"] && TestHelper.native_supported? From 3482be073ffe822ab5bed9737304fae1affa3854 Mon Sep 17 00:00:00 2001 From: Adrien Rey-Jarthon Date: Tue, 18 Apr 2023 10:44:56 +0200 Subject: [PATCH 07/12] Update README.md Co-authored-by: Patrik Ragnarsson --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b55b43b4..cc84e5a6 100644 --- a/README.md +++ b/README.md @@ -118,7 +118,7 @@ $ brew install libidn # OS X $ gem install idn-ruby ``` -Optionally you can turn on the strict mode which will raise exceptions in case of invalid hostname during IDNA conversion. The default (`false`) silently ignores them and keeps the hostname unchanged. The strictness will depend on the backend used, libidn2 is stricted than libidn1 for example. +Optionally you can turn on the strict mode which will raise exceptions in case of invalid hostname during IDNA conversion. The default (`false`) silently ignores them and keeps the hostname unchanged. The strictness will depend on the backend used, libidn2 is stricter than libidn1 for example. ```ruby Addressable::IDNA.backend.strict_mode = true # default: false ``` From c63f9baa5d1e33aeb374389b491430355edbd5c1 Mon Sep 17 00:00:00 2001 From: Adrien Rey-Jarthon Date: Tue, 18 Apr 2023 12:32:11 +0200 Subject: [PATCH 08/12] Move libidn1 specific exception handling inside the libidn1.rb file as it requires the gem to be loaded --- lib/addressable/idna.rb | 4 ++-- lib/addressable/idna/libidn1.rb | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/lib/addressable/idna.rb b/lib/addressable/idna.rb index 5cf6e6cc..177360ad 100644 --- a/lib/addressable/idna.rb +++ b/lib/addressable/idna.rb @@ -33,13 +33,13 @@ class << self # public interface implemented by all backends def to_ascii(value) backend.to_ascii(value) - rescue Error, IDN::Idna::IdnaError + rescue Error strict_mode ? raise : value end def to_unicode(value) backend.to_unicode(value) - rescue Error, IDN::Idna::IdnaError + rescue Error strict_mode ? raise : value end diff --git a/lib/addressable/idna/libidn1.rb b/lib/addressable/idna/libidn1.rb index 5eecbab1..53b9842b 100644 --- a/lib/addressable/idna/libidn1.rb +++ b/lib/addressable/idna/libidn1.rb @@ -34,10 +34,14 @@ def unicode_normalize_kc(value) def self.to_ascii(value) IDN::Idna.toASCII(value, IDN::Idna::ALLOW_UNASSIGNED) + rescue IDN::Idna::IdnaError + Addressable::IDNA.strict_mode ? raise : value end def self.to_unicode(value) IDN::Idna.toUnicode(value, IDN::Idna::ALLOW_UNASSIGNED) + rescue IDN::Idna::IdnaError + Addressable::IDNA.strict_mode ? raise : value end end end From ccbcb9e419de67ed17630908f229bf03e41acecf Mon Sep 17 00:00:00 2001 From: Adrien Rey-Jarthon Date: Tue, 18 Apr 2023 16:12:09 +0200 Subject: [PATCH 09/12] Change IDNA::Error exception to inherit URI::InvalidURIError --- lib/addressable/idna.rb | 7 ++++++- lib/addressable/uri.rb | 5 ----- spec/addressable/idna_spec.rb | 12 ++++++++++-- 3 files changed, 16 insertions(+), 8 deletions(-) diff --git a/lib/addressable/idna.rb b/lib/addressable/idna.rb index 177360ad..5da84c86 100644 --- a/lib/addressable/idna.rb +++ b/lib/addressable/idna.rb @@ -17,9 +17,14 @@ #++ module Addressable + # moved from uri.rb here so we can inherit it + class URI + class InvalidURIError < StandardError; end + end + module IDNA # All IDNA conversion related errors - class Error < StandardError; end + class Error < Addressable::URI::InvalidURIError; end # Input is invalid. class PunycodeBadInput < Error; end # Output would exceed the space provided. diff --git a/lib/addressable/uri.rb b/lib/addressable/uri.rb index 50ccdaf5..4a06daf5 100644 --- a/lib/addressable/uri.rb +++ b/lib/addressable/uri.rb @@ -29,11 +29,6 @@ module Addressable # RFC 3986, # RFC 3987. class URI - ## - # Raised if something other than a uri is supplied. - class InvalidURIError < StandardError - end - ## # Container for the character classes specified in # RFC 3986. diff --git a/spec/addressable/idna_spec.rb b/spec/addressable/idna_spec.rb index f38d8307..c0f0e286 100644 --- a/spec/addressable/idna_spec.rb +++ b/spec/addressable/idna_spec.rb @@ -271,6 +271,14 @@ expect(Addressable::IDNA.to_ascii("faß.de")).to eq("xn--fa-hia.de") end + it "throws exceptions which inherits Addressable::URI::InvalidURIError" do + # this way IDNA exceptions are also caught by existing rescue on InvalidURIError + expect(Addressable::IDNA::Error).to be < Addressable::URI::InvalidURIError + expect(Addressable::IDNA::PunycodeBadInput).to be < Addressable::IDNA::Error + expect(Addressable::IDNA::PunycodeBigOutput).to be < Addressable::IDNA::Error + expect(Addressable::IDNA::PunycodeOverflow).to be < Addressable::IDNA::Error + end + begin require "fiber" @@ -333,10 +341,10 @@ it "should raise on label too long (>63)" do expect { Addressable::IDNA.to_unicode(long) - }.to raise_error(/longer than 63 char|too large/) + }.to raise_error(Addressable::IDNA::Error, /longer than 63 char/) expect { Addressable::IDNA.to_ascii(long) - }.to raise_error(/longer than 63 char|too large/) + }.to raise_error(Addressable::URI::InvalidURIError, /longer than 63 char/) end it "should raise when punycode decode fails" do From 9eb3910a6376fa2c36ad0a9ab04285f49905a761 Mon Sep 17 00:00:00 2001 From: Adrien Rey-Jarthon Date: Tue, 18 Apr 2023 18:40:59 +0200 Subject: [PATCH 10/12] try with option 2: wrap errors in URI --- lib/addressable/idna.rb | 7 +- lib/addressable/idna/libidn1.rb | 8 +-- lib/addressable/uri.rb | 9 +++ spec/addressable/idna_spec.rb | 22 ++++--- spec/addressable/uri_spec.rb | 112 ++++++++++++++++++++++++++++++++ 5 files changed, 139 insertions(+), 19 deletions(-) diff --git a/lib/addressable/idna.rb b/lib/addressable/idna.rb index 5da84c86..177360ad 100644 --- a/lib/addressable/idna.rb +++ b/lib/addressable/idna.rb @@ -17,14 +17,9 @@ #++ module Addressable - # moved from uri.rb here so we can inherit it - class URI - class InvalidURIError < StandardError; end - end - module IDNA # All IDNA conversion related errors - class Error < Addressable::URI::InvalidURIError; end + class Error < StandardError; end # Input is invalid. class PunycodeBadInput < Error; end # Output would exceed the space provided. diff --git a/lib/addressable/idna/libidn1.rb b/lib/addressable/idna/libidn1.rb index 53b9842b..f6d1b0fe 100644 --- a/lib/addressable/idna/libidn1.rb +++ b/lib/addressable/idna/libidn1.rb @@ -34,14 +34,14 @@ def unicode_normalize_kc(value) def self.to_ascii(value) IDN::Idna.toASCII(value, IDN::Idna::ALLOW_UNASSIGNED) - rescue IDN::Idna::IdnaError - Addressable::IDNA.strict_mode ? raise : value + rescue IDN::Idna::IdnaError => e + Addressable::IDNA.strict_mode ? raise(Error.new(e)) : value end def self.to_unicode(value) IDN::Idna.toUnicode(value, IDN::Idna::ALLOW_UNASSIGNED) - rescue IDN::Idna::IdnaError - Addressable::IDNA.strict_mode ? raise : value + rescue IDN::Idna::IdnaError => e + Addressable::IDNA.strict_mode ? raise(Error.new(e)) : value end end end diff --git a/lib/addressable/uri.rb b/lib/addressable/uri.rb index 4a06daf5..5b1b91a9 100644 --- a/lib/addressable/uri.rb +++ b/lib/addressable/uri.rb @@ -29,6 +29,11 @@ module Addressable # RFC 3986, # RFC 3987. class URI + ## + # Raised if something other than a uri is supplied. + class InvalidURIError < StandardError + end + ## # Container for the character classes specified in # RFC 3986. @@ -1135,6 +1140,8 @@ def normalized_host # All normalized values should be UTF-8 force_utf8_encoding_if_needed(@normalized_host) @normalized_host + rescue IDNA::Error => e + raise InvalidURIError.new(e) end ## @@ -2190,6 +2197,8 @@ def display_uri display_uri = self.normalize display_uri.host = ::Addressable::IDNA.to_unicode(display_uri.host) return display_uri + rescue IDNA::Error => e + raise InvalidURIError.new(e) end ## diff --git a/spec/addressable/idna_spec.rb b/spec/addressable/idna_spec.rb index c0f0e286..1ccfe268 100644 --- a/spec/addressable/idna_spec.rb +++ b/spec/addressable/idna_spec.rb @@ -271,14 +271,6 @@ expect(Addressable::IDNA.to_ascii("faß.de")).to eq("xn--fa-hia.de") end - it "throws exceptions which inherits Addressable::URI::InvalidURIError" do - # this way IDNA exceptions are also caught by existing rescue on InvalidURIError - expect(Addressable::IDNA::Error).to be < Addressable::URI::InvalidURIError - expect(Addressable::IDNA::PunycodeBadInput).to be < Addressable::IDNA::Error - expect(Addressable::IDNA::PunycodeBigOutput).to be < Addressable::IDNA::Error - expect(Addressable::IDNA::PunycodeOverflow).to be < Addressable::IDNA::Error - end - begin require "fiber" @@ -310,6 +302,18 @@ it "should implement IDNA2003" do expect(Addressable::IDNA.to_ascii("faß.de")).to eq("fass.de") end + + context "with strict_mode = true" do + before { Addressable::IDNA.strict_mode = true } + after { Addressable::IDNA.strict_mode = false } + + long = 'AcinusFallumTrompetumNullunCreditumVisumEstAtCuadLongumEtCefallum.com' + it "should raise on label too long (>63)" do + expect { + Addressable::IDNA.to_ascii(long) + }.to raise_error(Addressable::IDNA::Error, /too large/) + end + end end rescue LoadError => error raise error if ENV["CI"] && TestHelper.native_supported? @@ -344,7 +348,7 @@ }.to raise_error(Addressable::IDNA::Error, /longer than 63 char/) expect { Addressable::IDNA.to_ascii(long) - }.to raise_error(Addressable::URI::InvalidURIError, /longer than 63 char/) + }.to raise_error(Addressable::IDNA::Error, /longer than 63 char/) end it "should raise when punycode decode fails" do diff --git a/spec/addressable/uri_spec.rb b/spec/addressable/uri_spec.rb index c54fc3fb..63274b07 100644 --- a/spec/addressable/uri_spec.rb +++ b/spec/addressable/uri_spec.rb @@ -5234,6 +5234,118 @@ def to_s end end +describe Addressable::URI, "when parsed from invalid IDNA hostname " + + "'http://xn---3a.com/'" do + before do + @uri = Addressable::URI.parse("http://xn---3a.com/") + end + + begin + require "addressable/idna/libidn2" + context "with libidn2" do + before { Addressable::IDNA.backend = Addressable::IDNA::Libidn2 } + + context "when strict_mode is true" do + before { Addressable::IDNA.strict_mode = true } + after { Addressable::IDNA.strict_mode = false } + + it "display_uri should raise a wrapped InvalidURL error" do + expect { @uri.display_uri.to_s + }.to raise_error(Addressable::URI::InvalidURIError, /invalid punycode/) { |e| + expect(e.cause).to be_a(Addressable::IDNA::Error) + } + end + + it "normalized_host should raise a wrapped InvalidURL error" do + expect { @uri.normalized_host + }.to raise_error(Addressable::URI::InvalidURIError, /invalid punycode/) { |e| + expect(e.cause).to be_a(Addressable::IDNA::Error) + } + end + end + + it "display_uri should be kept as http://xn---3a.com/" do + expect(@uri.display_uri.to_s).to eq("http://xn---3a.com/") + end + + it "normalized_host should be kept as http://xn---3a.com/" do + expect(@uri.normalized_host).to eq("xn---3a.com") + end + end + rescue LoadError => error + raise error if ENV["CI"] && TestHelper.native_supported? + warn('Could not load native libidn2 implementation.') + end + + begin + require "addressable/idna/libidn1" + context "with libidn1" do + before { Addressable::IDNA.backend = Addressable::IDNA::Libidn1 } + + context "when strict_mode is true" do + before { Addressable::IDNA.strict_mode = true } + after { Addressable::IDNA.strict_mode = false } + + # libidn1 silently falls back in this case + it "display_uri should be kept as http://xn---3a.com/" do + expect(@uri.display_uri.to_s).to eq("http://xn---3a.com/") + end + + it "normalized_host should be kept as http://xn---3a.com/" do + expect(@uri.normalized_host).to eq("xn---3a.com") + end + end + + it "display_uri should be kept as http://xn---3a.com/" do + expect(@uri.display_uri.to_s).to eq("http://xn---3a.com/") + end + + it "normalized_host should be kept as http://xn---3a.com/" do + expect(@uri.normalized_host).to eq("xn---3a.com") + end + end + rescue LoadError => error + raise error if ENV["CI"] && TestHelper.native_supported? + warn('Could not load native libidn2 implementation.') + end + + require "addressable/idna/pure" + context "with pure-ruby IDNA implementation" do + before { Addressable::IDNA.backend = Addressable::IDNA::Pure } + + context "when strict_mode is true" do + before { Addressable::IDNA.strict_mode = true } + after { Addressable::IDNA.strict_mode = false } + + # libidn1 silently falls back in this case + it "display_uri should be kept as http://xn---3a.com/" do + pending "incorrect result" + expect { @uri.display_uri.to_s + }.to raise_error(Addressable::URI::InvalidURIError, /invalid punycode/) { |e| + expect(e.cause).to be_a(Addressable::IDNA::Error) + } + end + + it "normalized_host should be kept as http://xn---3a.com/" do + pending "incorrect result" + expect { @uri.normalized_host + }.to raise_error(Addressable::URI::InvalidURIError, /invalid punycode/) { |e| + expect(e.cause).to be_a(Addressable::IDNA::Error) + } + end + end + + it "display_uri should be kept as http://xn---3a.com/" do + pending "incorrect result" + expect(@uri.display_uri.to_s).to eq("http://xn---3a.com/") + end + + it "normalized_host should be kept as http://xn---3a.com/" do + expect(@uri.normalized_host).to eq("xn---3a.com") + end + end +end + describe Addressable::URI, "when parsed from " + "'http://www.詹姆斯.com/atomtests/iri/詹.html'" do before do From 7d75d6c0c27be22411870709cab3db5ae438eadc Mon Sep 17 00:00:00 2001 From: Adrien Rey-Jarthon Date: Tue, 18 Apr 2023 21:30:39 +0200 Subject: [PATCH 11/12] explicit exception message passing --- lib/addressable/idna/libidn1.rb | 4 ++-- lib/addressable/uri.rb | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/addressable/idna/libidn1.rb b/lib/addressable/idna/libidn1.rb index f6d1b0fe..581cd1f9 100644 --- a/lib/addressable/idna/libidn1.rb +++ b/lib/addressable/idna/libidn1.rb @@ -35,13 +35,13 @@ def unicode_normalize_kc(value) def self.to_ascii(value) IDN::Idna.toASCII(value, IDN::Idna::ALLOW_UNASSIGNED) rescue IDN::Idna::IdnaError => e - Addressable::IDNA.strict_mode ? raise(Error.new(e)) : value + Addressable::IDNA.strict_mode ? raise(Error.new(e.message)) : value end def self.to_unicode(value) IDN::Idna.toUnicode(value, IDN::Idna::ALLOW_UNASSIGNED) rescue IDN::Idna::IdnaError => e - Addressable::IDNA.strict_mode ? raise(Error.new(e)) : value + Addressable::IDNA.strict_mode ? raise(Error.new(e.message)) : value end end end diff --git a/lib/addressable/uri.rb b/lib/addressable/uri.rb index 5b1b91a9..581eded6 100644 --- a/lib/addressable/uri.rb +++ b/lib/addressable/uri.rb @@ -1141,7 +1141,7 @@ def normalized_host force_utf8_encoding_if_needed(@normalized_host) @normalized_host rescue IDNA::Error => e - raise InvalidURIError.new(e) + raise InvalidURIError.new(e.message) end ## @@ -2198,7 +2198,7 @@ def display_uri display_uri.host = ::Addressable::IDNA.to_unicode(display_uri.host) return display_uri rescue IDNA::Error => e - raise InvalidURIError.new(e) + raise InvalidURIError.new(e.message) end ## From f0b98df88ecedbe59643888254fd9965f3c6fd0c Mon Sep 17 00:00:00 2001 From: Adrien Rey-Jarthon Date: Wed, 19 Apr 2023 00:23:15 +0200 Subject: [PATCH 12/12] more consistent IDNA handling of wrong input --- lib/addressable/idna.rb | 4 ++-- spec/addressable/idna_spec.rb | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/addressable/idna.rb b/lib/addressable/idna.rb index 177360ad..2299df9c 100644 --- a/lib/addressable/idna.rb +++ b/lib/addressable/idna.rb @@ -32,13 +32,13 @@ class << self # public interface implemented by all backends def to_ascii(value) - backend.to_ascii(value) + backend.to_ascii(value) if value.is_a?(String) rescue Error strict_mode ? raise : value end def to_unicode(value) - backend.to_unicode(value) + backend.to_unicode(value) if value.is_a?(String) rescue Error strict_mode ? raise : value end diff --git a/spec/addressable/idna_spec.rb b/spec/addressable/idna_spec.rb index 1ccfe268..29a978f3 100644 --- a/spec/addressable/idna_spec.rb +++ b/spec/addressable/idna_spec.rb @@ -152,6 +152,13 @@ "example..host" )).to eq("example..host") end + + it "handles nil input" do + expect(Addressable::IDNA.to_ascii(nil)).to eq(nil) + expect(Addressable::IDNA.to_ascii(45)).to eq(nil) + expect(Addressable::IDNA.to_ascii([])).to eq(nil) + expect(Addressable::IDNA.to_ascii({})).to eq(nil) + end end shared_examples_for "converting from ASCII to unicode" do @@ -256,6 +263,13 @@ "example..host" )).to eq("example..host") end + + it "handles unexpected input as nil" do + expect(Addressable::IDNA.to_unicode(nil)).to eq(nil) + expect(Addressable::IDNA.to_unicode(45)).to eq(nil) + expect(Addressable::IDNA.to_unicode([])).to eq(nil) + expect(Addressable::IDNA.to_unicode({})).to eq(nil) + end end describe Addressable::IDNA, "when using the pure-Ruby implementation" do