diff --git a/lib/mail/utilities.rb b/lib/mail/utilities.rb index b17117c67..cf6376f80 100644 --- a/lib/mail/utilities.rb +++ b/lib/mail/utilities.rb @@ -306,10 +306,10 @@ def encode(string, charset) end class BestEffortCharsetEncoder - def encode(string, charset) + def encode(string, charset, raise_utf7_exceptions = false) case charset when /utf-?7/i - Mail::Utilities.decode_utf7(string) + Mail::Utilities.decode_utf7(string, raise_utf7_exceptions) else string.force_encoding(pick_encoding(charset)) end @@ -396,10 +396,20 @@ def Utilities.encode_utf7(string) end.force_encoding(Encoding::ASCII_8BIT) end - def Utilities.decode_utf7(utf7) + # Note: UTF-7 is an obscure encoding, and it's not clear that it's + # even valid for email bodies (the case in which we need + # raise_utf7_exceptions=false.) If we remove support for UTF-7 in + # email bodies, we can also remove the argument here. + def Utilities.decode_utf7(utf7, raise_utf7_exceptions) utf7.gsub(/&([^-]+)?-/n) do if $1 - ($1.tr(",", "/") + "===").unpack1("m").encode(Encoding::UTF_8, Encoding::UTF_16BE) + utf_16be = ($1.tr(",", "/") + "===").unpack("m")[0].force_encoding(Encoding::UTF_16BE) + if raise_utf7_exceptions + # special case - in B encoding, raise exceptions on conversion errors + utf_16be.encode(Encoding::UTF_8) + else + transcode_to_scrubbed_utf8(utf_16be) + end else "&" end @@ -416,7 +426,7 @@ def Utilities.b_value_decode(str) if match charset = match[1] str = Utilities.decode_base64(match[2]) - str = charset_encoder.encode(str, charset) + str = charset_encoder.encode(str, charset, true) end transcode_to_scrubbed_utf8(str) rescue Encoding::UndefinedConversionError, ArgumentError, Encoding::ConverterNotFoundError, Encoding::InvalidByteSequenceError diff --git a/spec/fixtures/emails/multi_charset/quoted-printable.eml b/spec/fixtures/emails/multi_charset/quoted-printable.eml new file mode 100644 index 000000000..117763b0b --- /dev/null +++ b/spec/fixtures/emails/multi_charset/quoted-printable.eml @@ -0,0 +1,4 @@ +Content-Type: text/plain; charset="utf-8" +Content-Transfer-Encoding: quoted-printable + +Valid: =E6=98=AF Invalid: =E6 diff --git a/spec/fixtures/emails/multi_charset/utf7.eml b/spec/fixtures/emails/multi_charset/utf7.eml new file mode 100644 index 000000000..f3d962b27 --- /dev/null +++ b/spec/fixtures/emails/multi_charset/utf7.eml @@ -0,0 +1,3 @@ +Content-Type: text/plain; charset="utf-7" + +Valid: &Zi8- Invalid: &5g- diff --git a/spec/mail/message_spec.rb b/spec/mail/message_spec.rb index e2405cf58..531b17eff 100644 --- a/spec/mail/message_spec.rb +++ b/spec/mail/message_spec.rb @@ -401,6 +401,16 @@ def create_mail_with_splat_args expect(raw_message.encoding).to eq original_encoding if raw_message.respond_to?(:encoding) end + it "should parse utf-7 email without raising exceptions" do + mail = read_fixture('emails', 'multi_charset', 'utf7.eml') + expect(mail.decoded.chomp).to eq "Valid: 是 Invalid: �" + end + + it "should parse quoted-printable email without raising exceptions" do + mail = read_fixture('emails', 'multi_charset', 'quoted-printable.eml') + expect(mail.decoded.chomp).to eq "Valid: 是 Invalid: �" + end + if '1.9+'.respond_to?(:encoding) it "should be able to normalize CRLFs on non-UTF8 encodings" do File.open(fixture_path('emails', 'multi_charset', 'japanese_shift_jis.eml'), 'rb') do |io|