Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add URI::Generic#decoded_#{user,password} #17

Merged
merged 1 commit into from
May 12, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 33 additions & 8 deletions lib/uri/common.rb
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,7 @@ def self.regexp(schemes = nil)
256.times do |i|
TBLENCWWWCOMP_[-i.chr] = -('%%%02X' % i)
end
TBLENCURICOMP_ = TBLENCWWWCOMP_.dup.freeze
TBLENCWWWCOMP_[' '] = '+'
TBLENCWWWCOMP_.freeze
TBLDECWWWCOMP_ = {} # :nodoc:
Expand All @@ -303,6 +304,33 @@ def self.regexp(schemes = nil)
#
# See URI.decode_www_form_component, URI.encode_www_form.
def self.encode_www_form_component(str, enc=nil)
_encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc)
end

# Decodes given +str+ of URL-encoded form data.
#
# This decodes + to SP.
#
# See URI.encode_www_form_component, URI.decode_www_form.
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
_decode_uri_component(/\+|%\h\h/, str, enc)
end

# Encodes +str+ using URL encoding
#
# This encodes SP to %20 instead of +.
def self.encode_uri_component(str, enc=nil)
_encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc)
end

# Decodes given +str+ of URL-encoded data.
#
# This does not decode + to SP.
def self.decode_uri_component(str, enc=Encoding::UTF_8)
_decode_uri_component(/%\h\h/, str, enc)
end

def self._encode_uri_component(regexp, table, str, enc)
str = str.to_s.dup
if str.encoding != Encoding::ASCII_8BIT
if enc && enc != Encoding::ASCII_8BIT
Expand All @@ -311,19 +339,16 @@ def self.encode_www_form_component(str, enc=nil)
end
str.force_encoding(Encoding::ASCII_8BIT)
end
str.gsub!(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_)
str.gsub!(regexp, table)
str.force_encoding(Encoding::US_ASCII)
end
private_class_method :_encode_uri_component

# Decodes given +str+ of URL-encoded form data.
#
# This decodes + to SP.
#
# See URI.encode_www_form_component, URI.decode_www_form.
def self.decode_www_form_component(str, enc=Encoding::UTF_8)
def self._decode_uri_component(regexp, str, enc)
raise ArgumentError, "invalid %-encoding (#{str})" if /%(?!\h\h)/.match?(str)
str.b.gsub(/\+|%\h\h/, TBLDECWWWCOMP_).force_encoding(enc)
str.b.gsub(regexp, TBLDECWWWCOMP_).force_encoding(enc)
end
private_class_method :_decode_uri_component

# Generates URL-encoded form data from given +enum+.
#
Expand Down
14 changes: 12 additions & 2 deletions lib/uri/generic.rb
Original file line number Diff line number Diff line change
Expand Up @@ -564,16 +564,26 @@ def userinfo
end
end

# Returns the user component.
# Returns the user component (without URI decoding).
def user
@user
end

# Returns the password component.
# Returns the password component (without URI decoding).
def password
@password
end

# Returns the user component after URI decoding.
def decoded_user
URI.decode_uri_component(@user) if @user
end

# Returns the password component after URI decoding.
def decoded_password
URI.decode_uri_component(@password) if @password
end

#
# Checks the host +v+ component for RFC2396 compliance
# and against the URI::Parser Regexp for :HOST.
Expand Down
52 changes: 52 additions & 0 deletions test/uri/test_common.rb
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,58 @@ def test_decode_www_form_component
assert_nothing_raised(ArgumentError){URI.decode_www_form_component("x"*(1024*1024))}
end

def test_encode_uri_component
assert_equal("%00%20%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E",
URI.encode_uri_component("\x00 !\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~"))
assert_equal("%95A", URI.encode_uri_component(
"\x95\x41".force_encoding(Encoding::Shift_JIS)))
assert_equal("0B", URI.encode_uri_component(
"\x30\x42".force_encoding(Encoding::UTF_16BE)))
assert_equal("%1B%24B%24%22%1B%28B", URI.encode_uri_component(
"\e$B$\"\e(B".force_encoding(Encoding::ISO_2022_JP)))

assert_equal("%E3%81%82", URI.encode_uri_component(
"\u3042", Encoding::ASCII_8BIT))
assert_equal("%82%A0", URI.encode_uri_component(
"\u3042", Encoding::Windows_31J))
assert_equal("%E3%81%82", URI.encode_uri_component(
"\u3042", Encoding::UTF_8))

assert_equal("%82%A0", URI.encode_uri_component(
"\u3042".encode("sjis"), Encoding::ASCII_8BIT))
assert_equal("%A4%A2", URI.encode_uri_component(
"\u3042".encode("sjis"), Encoding::EUC_JP))
assert_equal("%E3%81%82", URI.encode_uri_component(
"\u3042".encode("sjis"), Encoding::UTF_8))
assert_equal("B0", URI.encode_uri_component(
"\u3042".encode("sjis"), Encoding::UTF_16LE))
assert_equal("%26%23730%3B", URI.encode_uri_component(
"\u02DA", Encoding::WINDOWS_1252))

# invalid
assert_equal("%EF%BF%BD%EF%BF%BD", URI.encode_uri_component(
"\xE3\x81\xFF", "utf-8"))
assert_equal("%E6%9F%8A%EF%BF%BD%EF%BF%BD", URI.encode_uri_component(
"\x95\x41\xff\xff".force_encoding(Encoding::Shift_JIS), "utf-8"))
end

def test_decode_uri_component
assert_equal(" +!\"\#$%&'()*+,-./09:;<=>?@AZ[\\]^_`az{|}~",
URI.decode_uri_component(
"%20+%21%22%23%24%25%26%27%28%29*%2B%2C-.%2F09%3A%3B%3C%3D%3E%3F%40" \
"AZ%5B%5C%5D%5E_%60az%7B%7C%7D%7E"))
assert_equal("\xA1\xA2".force_encoding(Encoding::EUC_JP),
URI.decode_uri_component("%A1%A2", "EUC-JP"))
assert_equal("\xE3\x81\x82\xE3\x81\x82".force_encoding("UTF-8"),
URI.decode_uri_component("\xE3\x81\x82%E3%81%82".force_encoding("UTF-8")))

assert_raise(ArgumentError){URI.decode_uri_component("%")}
assert_raise(ArgumentError){URI.decode_uri_component("%a")}
assert_raise(ArgumentError){URI.decode_uri_component("x%a_")}
assert_nothing_raised(ArgumentError){URI.decode_uri_component("x"*(1024*1024))}
end

def test_encode_www_form
assert_equal("a=1", URI.encode_www_form("a" => "1"))
assert_equal("a=1", URI.encode_www_form(a: 1))
Expand Down
9 changes: 9 additions & 0 deletions test/uri/test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ def test_parse_query_pct_encoded
assert_raise(URI::InvalidURIError) { URI.parse('https://www.example.com/search?q=%XX') }
end

def test_parse_auth
str = "http://al%40ice:p%40s%25sword@example.com/dir%2Fname/subdir?foo=bar%40example.com"
uri = URI.parse(str)
assert_equal "al%40ice", uri.user
assert_equal "p%40s%25sword", uri.password
assert_equal "al@ice", uri.decoded_user
assert_equal "p@s%sword", uri.decoded_password
end

def test_raise_bad_uri_for_integer
assert_raise(URI::InvalidURIError) do
URI.parse(1)
Expand Down