diff --git a/addressable.gemspec b/addressable.gemspec index d51f4655..09be7f26 100644 --- a/addressable.gemspec +++ b/addressable.gemspec @@ -13,7 +13,7 @@ Gem::Specification.new do |s| s.description = "Addressable is an alternative implementation to the URI implementation that is\npart of Ruby's standard library. It is flexible, offers heuristic parsing, and\nadditionally provides extensive support for IRIs and URI templates.\n".freeze s.email = "bob@sporkmonger.com".freeze s.extra_rdoc_files = ["README.md".freeze] - s.files = ["CHANGELOG.md".freeze, "Gemfile".freeze, "LICENSE.txt".freeze, "README.md".freeze, "Rakefile".freeze, "data/unicode.data".freeze, "lib/addressable".freeze, "lib/addressable.rb".freeze, "lib/addressable/idna".freeze, "lib/addressable/idna.rb".freeze, "lib/addressable/idna/native.rb".freeze, "lib/addressable/idna/pure.rb".freeze, "lib/addressable/template.rb".freeze, "lib/addressable/uri.rb".freeze, "lib/addressable/version.rb".freeze, "spec/addressable".freeze, "spec/addressable/idna_spec.rb".freeze, "spec/addressable/net_http_compat_spec.rb".freeze, "spec/addressable/security_spec.rb".freeze, "spec/addressable/template_spec.rb".freeze, "spec/addressable/uri_spec.rb".freeze, "spec/spec_helper.rb".freeze, "tasks/clobber.rake".freeze, "tasks/gem.rake".freeze, "tasks/git.rake".freeze, "tasks/metrics.rake".freeze, "tasks/profile.rake".freeze, "tasks/rspec.rake".freeze, "tasks/yard.rake".freeze] + s.files = ["CHANGELOG.md".freeze, "Gemfile".freeze, "LICENSE.txt".freeze, "README.md".freeze, "Rakefile".freeze, "data/unicode.data".freeze, "lib/addressable".freeze, "lib/addressable.rb".freeze, "lib/addressable/idna".freeze, "lib/addressable/idna.rb".freeze, "lib/addressable/idna/native.rb".freeze, "lib/addressable/idna/native2.rb".freeze, "lib/addressable/idna/pure.rb".freeze, "lib/addressable/template.rb".freeze, "lib/addressable/uri.rb".freeze, "lib/addressable/version.rb".freeze, "spec/addressable".freeze, "spec/addressable/idna_spec.rb".freeze, "spec/addressable/net_http_compat_spec.rb".freeze, "spec/addressable/security_spec.rb".freeze, "spec/addressable/template_spec.rb".freeze, "spec/addressable/uri_spec.rb".freeze, "spec/spec_helper.rb".freeze, "tasks/clobber.rake".freeze, "tasks/gem.rake".freeze, "tasks/git.rake".freeze, "tasks/metrics.rake".freeze, "tasks/profile.rake".freeze, "tasks/rspec.rake".freeze, "tasks/yard.rake".freeze] s.homepage = "https://github.com/sporkmonger/addressable".freeze s.licenses = ["Apache-2.0".freeze] s.rdoc_options = ["--main".freeze, "README.md".freeze] @@ -27,9 +27,11 @@ Gem::Specification.new do |s| if s.respond_to? :add_runtime_dependency then s.add_runtime_dependency(%q.freeze, [">= 2.0.2", "< 6.0"]) + s.add_runtime_dependency(%q.freeze) s.add_development_dependency(%q.freeze, [">= 1.0", "< 3.0"]) else s.add_dependency(%q.freeze, [">= 2.0.2", "< 6.0"]) + s.add_dependency(%q.freeze) s.add_dependency(%q.freeze, [">= 1.0", "< 3.0"]) end end diff --git a/benchmark/idna.rb b/benchmark/idna.rb new file mode 100644 index 00000000..69978299 --- /dev/null +++ b/benchmark/idna.rb @@ -0,0 +1,65 @@ +# /usr/bin/env ruby +# frozen_string_literal: true. + +require "benchmark" + +value = "fiᆵリ宠퐱卄.com" +expected = "xn--fi-w1k207vk59a3qk9w9r.com" +N = 100_000 + +Benchmark.bmbm do |x| + x.report("pure") { + load "lib/addressable/idna/pure.rb" + fail "pure ruby does not match" unless expected == Addressable::IDNA.to_ascii(value) + N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) } + Addressable.send(:remove_const, :IDNA) + } + + x.report("libidn") { + load "lib/addressable/idna/native.rb" + fail "libidn does not match" unless expected == Addressable::IDNA.to_ascii(value) + N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) } + Addressable.send(:remove_const, :IDNA) + } + + x.report("libidn2") { + load "lib/addressable/idna/native2.rb" + fail "addressable does not match" unless expected == Addressable::IDNA.to_ascii(value) + N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) } + Addressable.send(:remove_const, :IDNA) + } +end + +# > ruby benchmark/idna.rb +# Rehearsal ------------------------------------------- +# pure 5.914630 0.000000 5.914630 ( 5.915326) +# libidn 0.518971 0.003672 0.522643 ( 0.522676) +# libidn2 0.763936 0.000000 0.763936 ( 0.763983) +# ---------------------------------- total: 7.201209sec + +# user system total real +# pure 6.042877 0.000000 6.042877 ( 6.043252) +# libidn 0.521668 0.000000 0.521668 ( 0.521704) +# libidn2 0.764782 0.000000 0.764782 ( 0.764863) + +puts "\nMemory leak test for libidn2 (memory should stabilize quickly):" +load "lib/addressable/idna/native2.rb" +GC.disable # Only run GC when manually called +10.times do + N.times { Addressable::IDNA.to_unicode(Addressable::IDNA.to_ascii(value)) } + GC.start # Run a major GC + pid, size = `ps ax -o pid,rss | grep -E "^[[:space:]]*#{$$}"`.strip.split.map(&:to_i) + puts " Memory: #{size/1024}MB" # show process memory +end + +# Memory leak test for libidn2 (memory should stabilize quickly): +# Memory: 117MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB +# Memory: 121MB \ No newline at end of file diff --git a/lib/addressable/idna.rb b/lib/addressable/idna.rb index 2dbd3934..d66b5db2 100644 --- a/lib/addressable/idna.rb +++ b/lib/addressable/idna.rb @@ -16,11 +16,15 @@ # limitations under the License. #++ - begin - require "addressable/idna/native" + require "addressable/idna/native2" rescue LoadError - # libidn or the idn gem was not available, fall back on a pure-Ruby - # implementation... - require "addressable/idna/pure" -end + # libidn2 or the ffi gem was not available, fall back on libidn1 + begin + require "addressable/idna/native" + rescue LoadError + # libidn or the idn gem was not available, fall back on a pure-Ruby + # implementation... + require "addressable/idna/pure" + end +end \ No newline at end of file diff --git a/lib/addressable/idna/native.rb b/lib/addressable/idna/native.rb index b225e1c3..089075d5 100644 --- a/lib/addressable/idna/native.rb +++ b/lib/addressable/idna/native.rb @@ -16,19 +16,11 @@ # limitations under the License. #++ - +# libidn1 implementing IDNA2003 require "idn" module Addressable module IDNA - def self.punycode_encode(value) - IDN::Punycode.encode(value.to_s) - end - - def self.punycode_decode(value) - IDN::Punycode.decode(value.to_s) - end - def self.to_ascii(value) value.to_s.split('.', -1).map do |segment| if segment.size > 0 && segment.size < 64 diff --git a/lib/addressable/idna/native2.rb b/lib/addressable/idna/native2.rb new file mode 100644 index 00000000..f4fd6296 --- /dev/null +++ b/lib/addressable/idna/native2.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +#-- +# Copyright (C) Bob Aman +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#++ + +# libidn2 implementing IDNA2008+TR46 +require "ffi" + +module Addressable + module IDNA + extend FFI::Library + + ffi_lib ["idn2", "libidn2.0", "libidn2.so.0"] + # ffi_lib "idn3" + + attach_function :idn2_to_ascii_8z, %i[string pointer int], :int + attach_function :idn2_to_unicode_8z8z, %i[string pointer int], :int + attach_function :idn2_strerror, [:int], :string + attach_function :idn2_free, [:pointer], :void + + IDN2_TRANSITIONAL = 4 + IDN2_NONTRANSITIONAL = 8 + + def self.to_ascii(value) + return value if value.ascii_only? + pointer = FFI::MemoryPointer.new(:pointer) + res = idn2_to_ascii_8z(value, pointer, IDN2_NONTRANSITIONAL) + # Fallback to Transitional mode in case of disallowed character + res = idn2_to_ascii_8z(value, pointer, IDN2_TRANSITIONAL) if res != 0 + raise "libidn2 failed to convert \"#{value}\" to ascii (#{idn2_strerror(res)})" if res != 0 + result = pointer.read_pointer.read_string + idn2_free(pointer.read_pointer) + result + end + + def self.to_unicode(value) + pointer = FFI::MemoryPointer.new(:pointer) + res = idn2_to_unicode_8z8z(value, pointer, IDN2_NONTRANSITIONAL) + return value if res != 0 + result = pointer.read_pointer.read_string + idn2_free(pointer.read_pointer) + result.force_encoding('UTF-8') + end + end +end diff --git a/spec/addressable/idna_spec.rb b/spec/addressable/idna_spec.rb index 428c9ec8..9e2fb7d6 100644 --- a/spec/addressable/idna_spec.rb +++ b/spec/addressable/idna_spec.rb @@ -258,7 +258,7 @@ end describe Addressable::IDNA, "when using the pure-Ruby implementation" do - before do + before :all do Addressable.send(:remove_const, :IDNA) load "addressable/idna/pure.rb" end @@ -266,6 +266,10 @@ it_should_behave_like "converting from unicode to ASCII" it_should_behave_like "converting from ASCII to unicode" + it "should implement IDNA2008 non transitional" do + expect(Addressable::IDNA.to_ascii("faß.de")).to eq("xn--fa-hia.de") + end + begin require "fiber" @@ -285,18 +289,45 @@ begin require "idn" - describe Addressable::IDNA, "when using the native-code implementation" do - before do + describe Addressable::IDNA, "when using the libidn1 native implementation (idn gem)" do + before :all do Addressable.send(:remove_const, :IDNA) load "addressable/idna/native.rb" end it_should_behave_like "converting from unicode to ASCII" it_should_behave_like "converting from ASCII to unicode" + + it "should implement IDNA2003" do + expect(Addressable::IDNA.to_ascii("faß.de")).to eq("fass.de") + end end rescue LoadError => error raise error if ENV["CI"] && TestHelper.native_supported? - # Cannot test the native implementation without libidn support. - warn('Could not load native IDN implementation.') + # Cannot test the native implementation without libidn installed. + warn('Could not load native libidn1 implementation.') end + +begin + require "addressable/idna/native2.rb" + + describe Addressable::IDNA, "when using the libidn2 native implementation (ffi)" do + before :all do + Addressable.send(:remove_const, :IDNA) + load "addressable/idna/native2.rb" + end + + it_should_behave_like "converting from unicode to ASCII" + it_should_behave_like "converting from ASCII to unicode" + + it "should implement IDNA2008 non transitional" do + expect(Addressable::IDNA.to_ascii("faß.de")).to eq("xn--fa-hia.de") + end + end +rescue LoadError => error + raise error if ENV["CI"] && TestHelper.native_supported? + + # Cannot test the native implementation without libidn2 installed. + warn('Could not load native libidn2 implementation.') +end \ No newline at end of file