-
Notifications
You must be signed in to change notification settings - Fork 4
/
ParsedMail.php
355 lines (312 loc) · 10.1 KB
/
ParsedMail.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
<?php
namespace Lasso\MailParserBundle;
use ArrayIterator;
use Exception;
use UnexpectedValueException;
use Zend\Mail\Storage\Part;
class ParsedMail extends ParseHelper {
private $rawMail;
/** @var Part */
private $mail;
/**
* Linear array of all parts in the email. The parts of enveloped emails will also be in here
*
* @var Part[]
*/
private $parts = [];
/**
* Keeps a list of parts that produced charset problems while decoding the body
*
* @var array
*/
private $problematicParts = [];
/** @var Part */
private $envelopedEmail = null;
/** @var array */
private $knownCharsets;
/** @var array */
private $allEmailAddressesByField;
/** @var array */
private $loggingEmails;
/**
* Identifies exception for unknown charsets
*
* @var int
*/
const INVALID_CHARSET_ERROR_CODE = 525;
function __construct(
$rawMail,
Part $mail,
$parts,
$loggingEmails,
$allEmailAddressesByField,
$envelopedEmail
) {
$this->rawMail = $rawMail;
$this->mail = $mail;
$this->parts = $parts;
$this->loggingEmails = $loggingEmails;
$this->allEmailAddressesByField = $allEmailAddressesByField;
$this->envelopedEmail = $envelopedEmail;
$this->knownCharsets = array_map([$this, 'prepareEncodingName'], mb_list_encodings());
}
public function getRawMail()
{
return $this->rawMail;
}
/**
* @return Part
*/
public function getMail()
{
return $this->mail;
}
/**
* Concatenates all the parts of an email. Will concatenate
* html if there were html parts, else the text parts
* are concatenated. Will not return any other parts (such as file attachments).
*
* The callable $glue, if given, will be called when
* concatenating parts like this:
*
* $partOne . $glue($contentType) . $partTwo
*
* $glue needs to return a string. Using a functions allows to
* return different values for different content types, e.g. <hr />
* for html content.
*
* The content type of the parts will be passed in, like "text/html"
* or "text/plain".
*
* @param callable $glue
*
* @return null|string
*/
public function getPrimaryContent(Callable $glue = null)
{
/*
* Simple no-op function. No if-statements necessary later.
*/
if (empty($glue)) {
$glue = function () {
return '';
};
}
if ($this->getMail()->isMultipart()) {
$parts = $this->parts;
} else {
$parts = [$this->getMail()];
}
$textContent = [];
$htmlContent = [];
foreach ($parts as $part) {
$contentType = 'text/plain';
if ($this->hasHeader($part, 'Content-Type')) {
$contentType = $this->getContentType($part)
->getType();
}
try {
if ($contentType == 'text/plain') {
$textContent[] = $this->decodeBody($part);
}
if ($contentType == 'text/html') {
$htmlContent[] = $this->decodeBody($part);
}
} catch (UnexpectedValueException $exception) {
if ($exception->getCode() == self::INVALID_CHARSET_ERROR_CODE) {
$this->problematicParts[] = $part;
/*
* Couldn't convert content, in all likelihood can't work with it, so
* return an empty string
*/
return "";
}
}
}
/**
* Takes an array of parts and combines them with the glue
* function. With a foreach loop, there's a need to track
* whether the current part is the last part so $glue isn't
* called after the last part.
*
* Using array_reduce() makes the last-part-tracking unnecessary.
*
* @param $parts
* @param $contentType
*
* @return mixed
*/
$combineParts = function ($parts, $contentType) use ($glue) {
$first = array_shift($parts);
return array_reduce(
$parts,
function ($soFar, $part) use ($glue, $contentType) {
return $soFar . $glue($contentType) . $part;
},
$first
);
};
if (!empty($htmlContent)) {
return $combineParts($htmlContent, 'text/html');
}
if (!empty($textContent)) {
return $combineParts($textContent, 'text/plain');
}
return null;
}
/**
* Returns all email addresses contained in the email headers. This includes, to, from, cc, and bcc.
*
* @param array $fields
*
* @return array
*/
public function getAllEmailAddresses($fields = ['to', 'from', 'cc', 'bcc'])
{
$addresses = [];
foreach ($fields as $field) {
if(isset($this->allEmailAddressesByField[$field])) {
foreach ($this->allEmailAddressesByField[$field] as $emailAddress) {
$addresses[] = $emailAddress;
}
}
}
return array_unique($addresses);
}
/**
* Returns a list of all emails in the parser, including
* the message id. This is mostly useful for logging.
*
* @return array
*/
public function getLoggingEmails()
{
return $this->loggingEmails;
}
/**
* If the email contained an enveloped email, this method will provide the enveloped email. It can
* then be used to extract information about the original exchange.
*
* @return Part
*/
public function getEnvelopedEmail()
{
return $this->envelopedEmail;
}
/**
* Check whether an enveloped email was found
*
* @return bool
*/
public function hasEnvelopedEmail()
{
return !empty($this->envelopedEmail);
}
/**
* Returns all parts that had charset problems while decoding the content
*
* @return Part[]
*/
public function getProblematicParts()
{
return $this->problematicParts;
}
/**
* Check if there were any parts with charset problems
*
* @return bool
*/
public function hasProblematicParts()
{
return !empty($this->problematicParts);
}
/**
* @param Part $part
*
* @return string
* @throws \UnexpectedValueException
*/
private function decodeBody(Part $part)
{
$content = '';
$contentTransferEncoding = '7-bit';
$contentCharset = 'auto';
$headers = $part->getHeaders();
if (!empty($headers)) {
if ($headers->has('Content-Transfer-Encoding')) {
$contentTransferEncodingHeader = $headers->get('Content-Transfer-Encoding');
if (is_a($contentTransferEncodingHeader, 'ArrayIterator')) {
/*
* Multiple transfer encoding headers don't really make sense and are
* indicative of a malformed message. Just choose the first one and hope
* it works.
*/
$contentTransferEncodingHeader = $headers->get('Content-Transfer-Encoding')[0];
}
$contentTransferEncoding = $contentTransferEncodingHeader->getFieldValue();
}
if ($this->hasHeader($part, 'Content-Type')) {
$newContentCharset = $this
->getContentType($part)
->getParameter('charset');
if (!empty($newContentCharset)
&& in_array($this->prepareEncodingName($newContentCharset), $this->knownCharsets)
) {
$contentCharset = $newContentCharset;
}
}
}
switch ($contentTransferEncoding) {
case 'base64':
$content = base64_decode($part->getContent());
break;
case 'quoted-printable':
$content = quoted_printable_decode($part->getContent());
break;
default:
try {
$content = $part->getContent();
} catch (Exception $e) {
/*
* do nothing, email has not content, there is not function
* to check if the email is empty and $content is already
* set to an empty string
*/
}
break;
}
/*
* mb_convert_encoding might produce warnings/error if the $contentCharset is wrong.
* mb_check_encoding for some reason doesn't fail those cases, so there's no way
* to check if the encoding is correct.
*
* Using a custom error handler allows marking the part as problematic when
* mb_convert_encoding produces a warning, while preventing a php-internal warning.
*
* This way, log files won't get cluttered and there's an easy way to deal with the
* problematic parts.
*/
$hasError = false;
set_error_handler(function($errorLevel, $errorMessage) use (&$hasError) {
$hasError = true;
return true;
}, E_ALL);
$convertedContent = mb_convert_encoding($content, 'UTF-8', $contentCharset);
restore_error_handler();
if ($hasError) {
throw new UnexpectedValueException('Content: ' . $content, ParsedMail::INVALID_CHARSET_ERROR_CODE);
}
return trim($convertedContent);
}
/**
* Prepares an encoding name for lookup with php's internal functions
*
* @param string $name
*
* @return string
*/
protected function prepareEncodingName($name)
{
return strtolower(preg_replace('/[^a-z0-9]/i', '', $name));
}
}