Re-work overlong lines

sebbASF · sebbASF · commit 382590a6da2e · 2023-02-22T22:31:52.000Z
diff --git a/lib/mail/fields/unstructured_field.rb b/lib/mail/fields/unstructured_field.rb
@@ -105,7 +105,8 @@ def wrap_lines(name, folded_lines)
     # 
     # The list of individual words is then used to fill up the output lines without overflowing.
     # This is not always guaranteed to work, because there is a wide variation in the number of
-    # characters that are needed to encode a given character.
+    # characters that are needed to encode a given character. If the resulting line would be too
+    # long, divide the original word into two chunks and add the pieces separately.
     def fold(prepend = 0) # :nodoc:
       #  prepend is the length to allow for the header prefix on the first line (e.g. 'Subject: ')
       encoding       = normalized_encoding
@@ -115,6 +116,7 @@ def fold(prepend = 0) # :nodoc:
       decoded_string = decoded.to_s
       words = decoded_string.split(/[ \t]/)
       should_encode  = !decoded_string.ascii_only? || words.any? {|word| word.length > max_safe_word}
+      encoding_overhead = 0 unless should_encode
       if should_encode
         max_safe_re = Regexp.new(".{#{max_safe_word}}|.+$")
         first = true
@@ -134,13 +136,14 @@ def fold(prepend = 0) # :nodoc:
 
       folded_lines   = []
       while !words.empty?
-        limit = 78 - prepend
-        limit = limit - encoding_overhead if should_encode
+        limit = 78 - prepend - encoding_overhead
         line = String.new
         first_word = true
         while !words.empty?
           break unless word = words.first.dup
 
+          original_word = word # in case we need to try again
+
           # Convert on 1.9+ only since we aren't sure of the current
           # charset encoding on 1.8. We'd need to track internal/external
           # charset on each field.
@@ -152,17 +155,29 @@ def fold(prepend = 0) # :nodoc:
           word = encode_crlf(word)
           # Skip to next line if we're going to go past the limit
           # Unless this is the first word, in which case we're going to add it anyway
-          break if !line.empty? && (line.length + word.length + 1 > limit)
+          break if !line.empty? && (line.length + word.length >= limit)
           # Remove the word from the queue ...
           words.shift
           # Add word separator
           if first_word
             first_word = false
           else
-            line << " " if !should_encode
+            line << " " unless should_encode
           end
 
           # ... add it in encoded form to the current line
+          #  but first check if we have overflowed
+          # should only happen for the first word on a line
+          if should_encode && (line.length + word.length > limit)
+            word, remain = original_word.scan(/.{3}|.+$/) # roughly half the original split
+            words.unshift remain # put the unused bit back
+            # re-encode shorter word
+            if charset && word.respond_to?(:encoding)
+              word = Encodings.transcode_charset(word, word.encoding, charset)
+            end
+            word = encode(word)
+            word = encode_crlf(word)  
+          end
           line << word
         end
         # Mark the line as encoded if necessary
diff --git a/spec/mail/fields/unstructured_field_spec.rb b/spec/mail/fields/unstructured_field_spec.rb
@@ -178,13 +178,15 @@
       value = "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
       expect(value.length).to be > 78 - "Subject: ".length
       @field = Mail::UnstructuredField.new("Subject", value)
+      expect(@field.decoded).to eq value
       lines = @field.encoded.split("\r\n")
       lines.each { |line| expect(line.length).to be <= 78 }
     end
 
     it "should fold an ASCII-only subject with more than 998 characters and no white space" do
       value = "ThisIsASubjectHeaderMessageThatIsGoingToBeMoreThan998CharactersLong." * 20
       @field = Mail::UnstructuredField.new("Subject", value)
+      expect(@field.decoded).to eq value
       lines = @field.encoded.split("\r\n")
       lines.each { |line| expect(line.length).to be <= 78 }
     end
@@ -193,16 +195,18 @@
     it "should fold a Japanese subject with more than 998 characters long and no white space" do
       value = "これは非常に長い日本語のSubjectです。空白はありません。" * 1
       @field = Mail::UnstructuredField.new("Subject", value)
+      expect(@field.decoded).to eq value
       lines = @field.encoded.split("\r\n")
-      lines.each { |line| expect(line.length).to be <= 90 }
+      lines.each { |line| expect(line.length).to be <= 78 }
     end
 
     # TODO: tweak unstructured_field to always generate lines of at most 78 chars
     it "should fold full of emoji subject that is going to be more than 998 bytes unfolded" do
       value = "😄" * 90
       @field = Mail::UnstructuredField.new("Subject", value)
+      expect(@field.decoded).to eq value
       lines = @field.encoded.split("\r\n")
-      lines.each { |line| expect(line.length).to be < 110 }
+      lines.each { |line| expect(line.length).to be <= 78 }
     end
 
   end
@@ -216,9 +220,13 @@
   end
 
   describe "iso-2022-jp Subject" do
-    it "should encoded with ISO-2022-JP encoding" do
-      @field = Mail::UnstructuredField.new("Subject", "あいうえお")
+    it "should be encoded with ISO-2022-JP encoding" do
+      value = "あいうえお"
+      @field = Mail::UnstructuredField.new("Subject", value)
       @field.charset = 'iso-2022-jp'
+      expect(@field.decoded).to eq value
+      lines = @field.encoded.split("\r\n")
+      lines.each { |line| expect(line.length).to be <= 78 }
       expect(@field.encoded).to eq "Subject: =?ISO-2022-JP?Q?=1B$B$=22$$$&$=28$*=1B=28B?=\r\n"
     end
   end