Skip to content

Commit 382590a

Browse files
committed
Re-work overlong lines
1 parent e000cb2 commit 382590a

File tree

2 files changed

+32
-9
lines changed

2 files changed

+32
-9
lines changed

lib/mail/fields/unstructured_field.rb

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,8 @@ def wrap_lines(name, folded_lines)
105105
#
106106
# The list of individual words is then used to fill up the output lines without overflowing.
107107
# This is not always guaranteed to work, because there is a wide variation in the number of
108-
# characters that are needed to encode a given character.
108+
# characters that are needed to encode a given character. If the resulting line would be too
109+
# long, divide the original word into two chunks and add the pieces separately.
109110
def fold(prepend = 0) # :nodoc:
110111
# prepend is the length to allow for the header prefix on the first line (e.g. 'Subject: ')
111112
encoding = normalized_encoding
@@ -115,6 +116,7 @@ def fold(prepend = 0) # :nodoc:
115116
decoded_string = decoded.to_s
116117
words = decoded_string.split(/[ \t]/)
117118
should_encode = !decoded_string.ascii_only? || words.any? {|word| word.length > max_safe_word}
119+
encoding_overhead = 0 unless should_encode
118120
if should_encode
119121
max_safe_re = Regexp.new(".{#{max_safe_word}}|.+$")
120122
first = true
@@ -134,13 +136,14 @@ def fold(prepend = 0) # :nodoc:
134136

135137
folded_lines = []
136138
while !words.empty?
137-
limit = 78 - prepend
138-
limit = limit - encoding_overhead if should_encode
139+
limit = 78 - prepend - encoding_overhead
139140
line = String.new
140141
first_word = true
141142
while !words.empty?
142143
break unless word = words.first.dup
143144

145+
original_word = word # in case we need to try again
146+
144147
# Convert on 1.9+ only since we aren't sure of the current
145148
# charset encoding on 1.8. We'd need to track internal/external
146149
# charset on each field.
@@ -152,17 +155,29 @@ def fold(prepend = 0) # :nodoc:
152155
word = encode_crlf(word)
153156
# Skip to next line if we're going to go past the limit
154157
# Unless this is the first word, in which case we're going to add it anyway
155-
break if !line.empty? && (line.length + word.length + 1 > limit)
158+
break if !line.empty? && (line.length + word.length >= limit)
156159
# Remove the word from the queue ...
157160
words.shift
158161
# Add word separator
159162
if first_word
160163
first_word = false
161164
else
162-
line << " " if !should_encode
165+
line << " " unless should_encode
163166
end
164167

165168
# ... add it in encoded form to the current line
169+
# but first check if we have overflowed
170+
# should only happen for the first word on a line
171+
if should_encode && (line.length + word.length > limit)
172+
word, remain = original_word.scan(/.{3}|.+$/) # roughly half the original split
173+
words.unshift remain # put the unused bit back
174+
# re-encode shorter word
175+
if charset && word.respond_to?(:encoding)
176+
word = Encodings.transcode_charset(word, word.encoding, charset)
177+
end
178+
word = encode(word)
179+
word = encode_crlf(word)
180+
end
166181
line << word
167182
end
168183
# Mark the line as encoded if necessary

spec/mail/fields/unstructured_field_spec.rb

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -178,13 +178,15 @@
178178
value = "12345678901234567890123456789012345678901234567890123456789012345678901234567890"
179179
expect(value.length).to be > 78 - "Subject: ".length
180180
@field = Mail::UnstructuredField.new("Subject", value)
181+
expect(@field.decoded).to eq value
181182
lines = @field.encoded.split("\r\n")
182183
lines.each { |line| expect(line.length).to be <= 78 }
183184
end
184185

185186
it "should fold an ASCII-only subject with more than 998 characters and no white space" do
186187
value = "ThisIsASubjectHeaderMessageThatIsGoingToBeMoreThan998CharactersLong." * 20
187188
@field = Mail::UnstructuredField.new("Subject", value)
189+
expect(@field.decoded).to eq value
188190
lines = @field.encoded.split("\r\n")
189191
lines.each { |line| expect(line.length).to be <= 78 }
190192
end
@@ -193,16 +195,18 @@
193195
it "should fold a Japanese subject with more than 998 characters long and no white space" do
194196
value = "これは非常に長い日本語のSubjectです。空白はありません。" * 1
195197
@field = Mail::UnstructuredField.new("Subject", value)
198+
expect(@field.decoded).to eq value
196199
lines = @field.encoded.split("\r\n")
197-
lines.each { |line| expect(line.length).to be <= 90 }
200+
lines.each { |line| expect(line.length).to be <= 78 }
198201
end
199202

200203
# TODO: tweak unstructured_field to always generate lines of at most 78 chars
201204
it "should fold full of emoji subject that is going to be more than 998 bytes unfolded" do
202205
value = "😄" * 90
203206
@field = Mail::UnstructuredField.new("Subject", value)
207+
expect(@field.decoded).to eq value
204208
lines = @field.encoded.split("\r\n")
205-
lines.each { |line| expect(line.length).to be < 110 }
209+
lines.each { |line| expect(line.length).to be <= 78 }
206210
end
207211

208212
end
@@ -216,9 +220,13 @@
216220
end
217221

218222
describe "iso-2022-jp Subject" do
219-
it "should encoded with ISO-2022-JP encoding" do
220-
@field = Mail::UnstructuredField.new("Subject", "あいうえお")
223+
it "should be encoded with ISO-2022-JP encoding" do
224+
value = "あいうえお"
225+
@field = Mail::UnstructuredField.new("Subject", value)
221226
@field.charset = 'iso-2022-jp'
227+
expect(@field.decoded).to eq value
228+
lines = @field.encoded.split("\r\n")
229+
lines.each { |line| expect(line.length).to be <= 78 }
222230
expect(@field.encoded).to eq "Subject: =?ISO-2022-JP?Q?=1B$B$=22$$$&$=28$*=1B=28B?=\r\n"
223231
end
224232
end

0 commit comments

Comments
 (0)