Skip to content

Commit

Permalink
Added more unit tests for *MimeParser and added inline comments
Browse files Browse the repository at this point in the history
Refactored MimeReader StepHeaders/Async a bit just to make it easier to
understand what is happening in some "invalid header" cases.
  • Loading branch information
jstedfast committed Dec 21, 2024
1 parent c0678bb commit dc64351
Show file tree
Hide file tree
Showing 4 changed files with 266 additions and 22 deletions.
28 changes: 18 additions & 10 deletions MimeKit/AsyncMimeReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,6 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)

headerBlockBegin = GetOffset (inputIndex);
boundary = BoundaryType.None;
//preHeaderLength = 0;
headerCount = 0;

currentContentLength = null;
Expand All @@ -159,6 +158,7 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)

if (left == 0) {
if (toplevel && headerCount == 0 && headerBlockBegin == GetOffset (inputIndex)) {
// EOF has been reached before any headers have been parsed for Parse[Headers,Entity,Message]Async.
state = MimeParserState.Eos;
return;
}
Expand Down Expand Up @@ -210,6 +210,12 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast)
break;
} while (true);

// Note: If a boundary was discovered, then the state will be updated to MimeParserState.Boundary.
if (state == MimeParserState.Boundary)
break;

// Fall through and act as if we're consuming a header.
} else if (input[inputIndex] == (byte) 'F' || input[inputIndex] == (byte) '>') {
// Check for an mbox-style From-line. Again, if the message is properly formatted and not truncated, this will NEVER happen.
do {
Expand All @@ -225,10 +231,18 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast)
break;
} while (true);
}

if (state != MimeParserState.MessageHeaders && state != MimeParserState.Headers)
break;
// state will be one of the following values:
// 1. Complete: This means that we've found an actual mbox marker
// 2. Error: Invalid *first* header and it was not a valid mbox marker
// 3. MessageHeaders or Headers: let it fall through and treat it as an invalid headers
if (state != MimeParserState.MessageHeaders && state != MimeParserState.Headers)
break;

// Fall through and act as if we're consuming a header.
} else {
// Fall through and act as if we're consuming a header.
}

if (toplevel && eos && inputIndex + headerFieldLength >= inputEnd) {
state = MimeParserState.Error;
Expand Down Expand Up @@ -431,12 +445,6 @@ async Task<int> ConstructMessagePartAsync (int depth, CancellationToken cancella

await OnMimeMessageBeginAsync (currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false);

//if (preHeaderLength > 0) {
// FIXME: how to solve this?
//message.MboxMarker = new byte[preHeaderLength];
//Buffer.BlockCopy (preHeaderBuffer, 0, message.MboxMarker, 0, preHeaderLength);
//}

var type = GetContentType (null);
MimeEntityType entityType;
int lines;
Expand Down
28 changes: 18 additions & 10 deletions MimeKit/MimeReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1674,7 +1674,6 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken)

headerBlockBegin = GetOffset (inputIndex);
boundary = BoundaryType.None;
//preHeaderLength = 0;
headerCount = 0;

currentContentLength = null;
Expand All @@ -1700,6 +1699,7 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken)

if (left == 0) {
if (toplevel && headerCount == 0 && headerBlockBegin == GetOffset (inputIndex)) {
// EOF has been reached before any headers have been parsed for Parse[Headers,Entity,Message].
state = MimeParserState.Eos;
return;
}
Expand Down Expand Up @@ -1737,6 +1737,12 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken)
if (ReadAhead (atleast, 0, cancellationToken) < atleast)
break;
}

// Note: If a boundary was discovered, then the state will be updated to MimeParserState.Boundary.
if (state == MimeParserState.Boundary)
break;

// Fall through and act as if we're consuming a header.
} else if (input[inputIndex] == (byte) 'F' || input[inputIndex] == (byte) '>') {
// Check for an mbox-style From-line. Again, if the message is properly formatted and not truncated, this will NEVER happen.
while (!TryCheckMboxMarkerWithinHeaderBlock (inbuf)) {
Expand All @@ -1745,10 +1751,18 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken)
if (ReadAhead (atleast, 0, cancellationToken) < atleast)
break;
}
}

if (state != MimeParserState.MessageHeaders && state != MimeParserState.Headers)
break;
// state will be one of the following values:
// 1. Complete: This means that we've found an actual mbox marker
// 2. Error: Invalid *first* header and it was not a valid mbox marker
// 3. MessageHeaders or Headers: let it fall through and treat it as an invalid headers
if (state != MimeParserState.MessageHeaders && state != MimeParserState.Headers)
break;

// Fall through and act as if we're consuming a header.
} else {
// Fall through and act as if we're consuming a header.
}

if (toplevel && eos && inputIndex + headerFieldLength >= inputEnd) {
state = MimeParserState.Error;
Expand Down Expand Up @@ -2193,12 +2207,6 @@ unsafe int ConstructMessagePart (byte* inbuf, int depth, CancellationToken cance

OnMimeMessageBegin (currentBeginOffset, beginLineNumber, cancellationToken);

//if (preHeaderLength > 0) {
// FIXME: how to solve this?
//message.MboxMarker = new byte[preHeaderLength];
//Buffer.BlockCopy (preHeaderBuffer, 0, message.MboxMarker, 0, preHeaderLength);
//}

var type = GetContentType (null);
MimeEntityType entityType;
int lines;
Expand Down
116 changes: 115 additions & 1 deletion UnitTests/ExperimentalMimeParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1406,7 +1406,7 @@ This is the message body.
}

[Test]
public async Task TestMultipartSubpartHeadersLineStartsWithDashDashyAsync ()
public async Task TestMultipartSubpartHeadersLineStartsWithDashDashAsync ()
{
string text = @"From: mimekit@example.com
To: mimekit@example.com
Expand Down Expand Up @@ -1467,6 +1467,120 @@ This is the message body.
}
}

[Test]
public void TestMultipartSubpartHeadersLineStartsWithDashDashEOF ()
{
string text = @"From: mimekit@example.com
To: mimekit@example.com
Subject: test of multipart subpart headers ending with a boundary
Date: Tue, 12 Nov 2013 09:12:42 -0500
MIME-Version: 1.0
Message-ID: <54AD68C9E3B0184CAC6041320424FD1B5B81E74D@localhost.localdomain>
X-Mailer: Microsoft Office Outlook 12.0
Content-Type: multipart/mixed;
boundary=""----=_NextPart_000_003F_01CE98CE.6E826F90""
------=_NextPart_000_003F_01CE98CE.6E826F90
Content-Type: text/plain; charset=utf-8
--not-the-boundary-muhahaha".Replace ("\r\n", "\n");

using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text), false)) {
var parser = new ExperimentalMimeParser (stream, MimeFormat.Entity);
var message = parser.ParseMessage ();

Assert.That (message.Body, Is.InstanceOf<Multipart> (), "Expected top-level to be a multipart");
var multipart = (Multipart) message.Body;
Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child");
Assert.That (multipart[0], Is.InstanceOf<TextPart> (), "Expected first child of the multipart to be text/plain");
var body = (TextPart) multipart[0];

Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8"));
Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8"));
Assert.That (body.Headers.Count, Is.EqualTo (2));
Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid");
Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha"));

Assert.That (body.Text, Is.EqualTo (string.Empty));
}

using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text.Replace ("\n", "\r\n")), false)) {
var parser = new ExperimentalMimeParser (stream, MimeFormat.Entity);
var message = parser.ParseMessage ();

Assert.That (message.Body, Is.InstanceOf<Multipart> (), "Expected top-level to be a multipart");
var multipart = (Multipart) message.Body;
Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child");
Assert.That (multipart[0], Is.InstanceOf<TextPart> (), "Expected first child of the multipart to be text/plain");
var body = (TextPart) multipart[0];

Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8"));
Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8"));
Assert.That (body.Headers.Count, Is.EqualTo (2));
Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid");
Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha"));

Assert.That (body.Text, Is.EqualTo (string.Empty));
}
}

[Test]
public async Task TestMultipartSubpartHeadersLineStartsWithDashDashEOFAsync ()
{
string text = @"From: mimekit@example.com
To: mimekit@example.com
Subject: test of multipart subpart headers ending with a boundary
Date: Tue, 12 Nov 2013 09:12:42 -0500
MIME-Version: 1.0
Message-ID: <54AD68C9E3B0184CAC6041320424FD1B5B81E74D@localhost.localdomain>
X-Mailer: Microsoft Office Outlook 12.0
Content-Type: multipart/mixed;
boundary=""----=_NextPart_000_003F_01CE98CE.6E826F90""
------=_NextPart_000_003F_01CE98CE.6E826F90
Content-Type: text/plain; charset=utf-8
--not-the-boundary-muhahaha".Replace ("\r\n", "\n");

using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text), false)) {
var parser = new ExperimentalMimeParser (stream, MimeFormat.Entity);
var message = await parser.ParseMessageAsync ();

Assert.That (message.Body, Is.InstanceOf<Multipart> (), "Expected top-level to be a multipart");
var multipart = (Multipart) message.Body;
Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child");
Assert.That (multipart[0], Is.InstanceOf<TextPart> (), "Expected first child of the multipart to be text/plain");
var body = (TextPart) multipart[0];

Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8"));
Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8"));
Assert.That (body.Headers.Count, Is.EqualTo (2));
Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid");
Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha"));

Assert.That (body.Text, Is.EqualTo (string.Empty));
}

using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text.Replace ("\n", "\r\n")), false)) {
var parser = new ExperimentalMimeParser (stream, MimeFormat.Entity);
var message = await parser.ParseMessageAsync ();

Assert.That (message.Body, Is.InstanceOf<Multipart> (), "Expected top-level to be a multipart");
var multipart = (Multipart) message.Body;
Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child");
Assert.That (multipart[0], Is.InstanceOf<TextPart> (), "Expected first child of the multipart to be text/plain");
var body = (TextPart) multipart[0];

Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8"));
Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8"));
Assert.That (body.Headers.Count, Is.EqualTo (2));
Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid");
Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha"));

Assert.That (body.Text, Is.EqualTo (string.Empty));
}
}

[Test]
public void TestMultipartBoundaryLineWithTrailingSpacesAndThenMoreCharacters ()
{
Expand Down
116 changes: 115 additions & 1 deletion UnitTests/MimeParserTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1398,7 +1398,7 @@ This is the message body.
}

[Test]
public async Task TestMultipartSubpartHeadersLineStartsWithDashDashyAsync ()
public async Task TestMultipartSubpartHeadersLineStartsWithDashDashAsync ()
{
string text = @"From: mimekit@example.com
To: mimekit@example.com
Expand Down Expand Up @@ -1459,6 +1459,120 @@ This is the message body.
}
}

[Test]
public void TestMultipartSubpartHeadersLineStartsWithDashDashEOF ()
{
string text = @"From: mimekit@example.com
To: mimekit@example.com
Subject: test of multipart subpart headers ending with a boundary
Date: Tue, 12 Nov 2013 09:12:42 -0500
MIME-Version: 1.0
Message-ID: <54AD68C9E3B0184CAC6041320424FD1B5B81E74D@localhost.localdomain>
X-Mailer: Microsoft Office Outlook 12.0
Content-Type: multipart/mixed;
boundary=""----=_NextPart_000_003F_01CE98CE.6E826F90""
------=_NextPart_000_003F_01CE98CE.6E826F90
Content-Type: text/plain; charset=utf-8
--not-the-boundary-muhahaha".Replace ("\r\n", "\n");

using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text), false)) {
var parser = new MimeParser (stream, MimeFormat.Entity);
var message = parser.ParseMessage ();

Assert.That (message.Body, Is.InstanceOf<Multipart> (), "Expected top-level to be a multipart");
var multipart = (Multipart) message.Body;
Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child");
Assert.That (multipart[0], Is.InstanceOf<TextPart> (), "Expected first child of the multipart to be text/plain");
var body = (TextPart) multipart[0];

Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8"));
Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8"));
Assert.That (body.Headers.Count, Is.EqualTo (2));
Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid");
Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha"));

Assert.That (body.Text, Is.EqualTo (string.Empty));
}

using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text.Replace ("\n", "\r\n")), false)) {
var parser = new MimeParser (stream, MimeFormat.Entity);
var message = parser.ParseMessage ();

Assert.That (message.Body, Is.InstanceOf<Multipart> (), "Expected top-level to be a multipart");
var multipart = (Multipart) message.Body;
Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child");
Assert.That (multipart[0], Is.InstanceOf<TextPart> (), "Expected first child of the multipart to be text/plain");
var body = (TextPart) multipart[0];

Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8"));
Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8"));
Assert.That (body.Headers.Count, Is.EqualTo (2));
Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid");
Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha"));

Assert.That (body.Text, Is.EqualTo (string.Empty));
}
}

[Test]
public async Task TestMultipartSubpartHeadersLineStartsWithDashDashEOFAsync ()
{
string text = @"From: mimekit@example.com
To: mimekit@example.com
Subject: test of multipart subpart headers ending with a boundary
Date: Tue, 12 Nov 2013 09:12:42 -0500
MIME-Version: 1.0
Message-ID: <54AD68C9E3B0184CAC6041320424FD1B5B81E74D@localhost.localdomain>
X-Mailer: Microsoft Office Outlook 12.0
Content-Type: multipart/mixed;
boundary=""----=_NextPart_000_003F_01CE98CE.6E826F90""
------=_NextPart_000_003F_01CE98CE.6E826F90
Content-Type: text/plain; charset=utf-8
--not-the-boundary-muhahaha".Replace ("\r\n", "\n");

using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text), false)) {
var parser = new MimeParser (stream, MimeFormat.Entity);
var message = await parser.ParseMessageAsync ();

Assert.That (message.Body, Is.InstanceOf<Multipart> (), "Expected top-level to be a multipart");
var multipart = (Multipart) message.Body;
Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child");
Assert.That (multipart[0], Is.InstanceOf<TextPart> (), "Expected first child of the multipart to be text/plain");
var body = (TextPart) multipart[0];

Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8"));
Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8"));
Assert.That (body.Headers.Count, Is.EqualTo (2));
Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid");
Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha"));

Assert.That (body.Text, Is.EqualTo (string.Empty));
}

using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text.Replace ("\n", "\r\n")), false)) {
var parser = new MimeParser (stream, MimeFormat.Entity);
var message = await parser.ParseMessageAsync ();

Assert.That (message.Body, Is.InstanceOf<Multipart> (), "Expected top-level to be a multipart");
var multipart = (Multipart) message.Body;
Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child");
Assert.That (multipart[0], Is.InstanceOf<TextPart> (), "Expected first child of the multipart to be text/plain");
var body = (TextPart) multipart[0];

Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8"));
Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8"));
Assert.That (body.Headers.Count, Is.EqualTo (2));
Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid");
Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha"));

Assert.That (body.Text, Is.EqualTo (string.Empty));
}
}

[Test]
public void TestMultipartBoundaryLineWithTrailingSpacesAndThenMoreCharacters ()
{
Expand Down

0 comments on commit dc64351

Please sign in to comment.