Skip to content

do not lie about the strings encoding #921

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 12 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 54 additions & 22 deletions lib/mail/encodings.rb
Original file line number Diff line number Diff line change
Expand Up @@ -115,27 +115,25 @@ def Encodings.decode_encode(str, output_type)
def Encodings.value_decode(str)
# Optimization: If there's no encoded-words in the string, just return it
return str unless str =~ ENCODED_VALUE

lines = collapse_adjacent_encodings(str)

# Split on white-space boundaries with capture, so we capture the white-space as well
lines.each do |line|
line.gsub!(ENCODED_VALUE) do |string|
case $1
lines = collapse_adjacent_encodings(str).collect do |line|
match = line.match(ENCODED_VALUE)
if match
string, type = match[0], match[1]
case type
when *B_VALUES then b_value_decode(string)
when *Q_VALUES then q_value_decode(string)
end
else
line
end
end.join("")
end

# Takes an encoded string of the format =?<encoding>?[QB]?<string>?=
def Encodings.unquote_and_convert_to(str, to_encoding)
output = value_decode( str ).to_s # output is already converted to UTF-8
output = value_decode( str )

if 'utf8' == to_encoding.to_s.downcase.gsub("-", "")
output
elsif to_encoding
if to_encoding
begin
if RUBY_VERSION >= '1.9'
output.encode(to_encoding)
Expand Down Expand Up @@ -268,22 +266,56 @@ def Encodings.split_value_encoding_from_string(str)
#
# String has to be of the format =?<encoding>?[QB]?<string>?=
def Encodings.collapse_adjacent_encodings(str)
lines = str.split(/(\?=)\s*(=\?)/).each_slice(2).map(&:join)
results = []
previous_encoding = nil
return [] if str.empty?
offset = 0 # initialize offset to 0

lines.each do |line|
encoding = split_value_encoding_from_string(line)
beginMarker = str.index("=?")
# if no begin marker not a valid encoded string.
if beginMarker == nil
return Array(str)
end

if encoding == previous_encoding
line = results.pop + line
end
# incremenet offset by the begin marker's offset and length.
offset = beginMarker + 2

encoding_seperator = str.index("?", offset)
# if no encoding seperator not a valid encoded string.
if encoding_seperator == nil
return Array(str)
end

# increment offset by the encoding seperator's offset and length.
offset = encoding_seperator + 1

type_character = str.index("?", offset)
# if no type character not a valid encoded string.
if type_character == nil
return Array(str)
end

# increment offset by the type character seperator's offset and length.
offset = type_character + 1 # increment offset by the seperator length

endMarker = str.index("?=", offset)
# if no end marker not a valid encoded string.
if endMarker == nil
return Array(str)
end

# incremenet offset by the endMarker's offset and length.
offset = endMarker + 2

previous_encoding = encoding
results << line
result = []
# emit the substring before the beginMarker (if any)
if beginMarker != 0
substr = str[0..beginMarker-1]
# only emit if not a blank string.
result << substr if !substr.blank?
end

results
result << str[beginMarker..offset-1]
# return the encoded characters section
return result.concat(collapse_adjacent_encodings(str[offset..-1]))
end
end
end
6 changes: 2 additions & 4 deletions lib/mail/version_specific/ruby_1_9.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ def Ruby19.b_value_decode(str)
decoded = str.encode(Encoding::UTF_8, :invalid => :replace, :replace => "")
decoded.valid_encoding? ? decoded : decoded.encode(Encoding::UTF_16LE, :invalid => :replace, :replace => "").encode(Encoding::UTF_8)
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
warn "Encoding conversion failed #{$!}"
str.dup.force_encoding(Encoding::UTF_8)
str.dup
end

def Ruby19.q_value_encode(str, encoding = nil)
Expand All @@ -113,8 +112,7 @@ def Ruby19.q_value_decode(str)
decoded = str.encode(Encoding::UTF_8, :invalid => :replace, :replace => "")
decoded.valid_encoding? ? decoded : decoded.encode(Encoding::UTF_16LE, :invalid => :replace, :replace => "").encode(Encoding::UTF_8)
rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError
warn "Encoding conversion failed #{$!}"
str.dup.force_encoding(Encoding::UTF_8)
str.dup
end

def Ruby19.param_decode(str, encoding)
Expand Down
5 changes: 2 additions & 3 deletions spec/mail/encodings_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@
# the encoded-word would contain an exact multiple of 3 encoded
# octets), except that the second 'encoded-word' uses a different
# 'charset' than the first one.
#

it "should just return the string if us-ascii and asked to B encoded string" do
string = "This is a string"
result = "This is a string"
Expand Down Expand Up @@ -133,7 +133,7 @@
it "should decode a long encoded string" do
string = '=?UTF-8?B?VGhpcyBpcyDjgYIgcmVhbGx5IGxvbmcgc3RyaW5nIFRoaXMgaXMg44GCIHJl?= =?UTF-8?B?YWxseSBsb25nIHN0cmluZyBUaGlzIGlzIOOBgiByZWFsbHkgbG9uZyBzdHJp?= =?UTF-8?B?bmcgVGhpcyBpcyDjgYIgcmVhbGx5IGxvbmcgc3RyaW5nIFRoaXMgaXMg44GC?= =?UTF-8?B?IHJlYWxseSBsb25nIHN0cmluZw==?='
result = "This is あ really long string This is あ really long string This is あ really long string This is あ really long string This is あ really long string"
result.force_encoding('UTF-8') if RUBY_VERSION >= '1.9'
result.encode!('UTF-8') if RUBY_VERSION >= '1.9'
expect(Mail::Encodings.value_decode(string)).to eq result
end

Expand Down Expand Up @@ -706,7 +706,6 @@
encoded = "=?ISO-8859-1?Q?\nRe=3A_ol=E1?="
expect(Mail::Encodings.value_decode(encoded)).to eq expected
end

end

describe "pre encoding non usascii text" do
Expand Down