From dc6435180a00a08cbd42fed1456e6d8020e894aa Mon Sep 17 00:00:00 2001 From: Jeffrey Stedfast Date: Sat, 21 Dec 2024 17:33:34 -0500 Subject: [PATCH] Added more unit tests for *MimeParser and added inline comments Refactored MimeReader StepHeaders/Async a bit just to make it easier to understand what is happening in some "invalid header" cases. --- MimeKit/AsyncMimeReader.cs | 28 ++++-- MimeKit/MimeReader.cs | 28 ++++-- UnitTests/ExperimentalMimeParserTests.cs | 116 ++++++++++++++++++++++- UnitTests/MimeParserTests.cs | 116 ++++++++++++++++++++++- 4 files changed, 266 insertions(+), 22 deletions(-) diff --git a/MimeKit/AsyncMimeReader.cs b/MimeKit/AsyncMimeReader.cs index 6be86a070c..ad94fb2041 100644 --- a/MimeKit/AsyncMimeReader.cs +++ b/MimeKit/AsyncMimeReader.cs @@ -133,7 +133,6 @@ async Task StepHeadersAsync (CancellationToken cancellationToken) headerBlockBegin = GetOffset (inputIndex); boundary = BoundaryType.None; - //preHeaderLength = 0; headerCount = 0; currentContentLength = null; @@ -159,6 +158,7 @@ async Task StepHeadersAsync (CancellationToken cancellationToken) if (left == 0) { if (toplevel && headerCount == 0 && headerBlockBegin == GetOffset (inputIndex)) { + // EOF has been reached before any headers have been parsed for Parse[Headers,Entity,Message]Async. state = MimeParserState.Eos; return; } @@ -210,6 +210,12 @@ async Task StepHeadersAsync (CancellationToken cancellationToken) if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast) break; } while (true); + + // Note: If a boundary was discovered, then the state will be updated to MimeParserState.Boundary. + if (state == MimeParserState.Boundary) + break; + + // Fall through and act as if we're consuming a header. } else if (input[inputIndex] == (byte) 'F' || input[inputIndex] == (byte) '>') { // Check for an mbox-style From-line. Again, if the message is properly formatted and not truncated, this will NEVER happen. do { @@ -225,10 +231,18 @@ async Task StepHeadersAsync (CancellationToken cancellationToken) if (await ReadAheadAsync (atleast, 0, cancellationToken).ConfigureAwait (false) < atleast) break; } while (true); - } - if (state != MimeParserState.MessageHeaders && state != MimeParserState.Headers) - break; + // state will be one of the following values: + // 1. Complete: This means that we've found an actual mbox marker + // 2. Error: Invalid *first* header and it was not a valid mbox marker + // 3. MessageHeaders or Headers: let it fall through and treat it as an invalid headers + if (state != MimeParserState.MessageHeaders && state != MimeParserState.Headers) + break; + + // Fall through and act as if we're consuming a header. + } else { + // Fall through and act as if we're consuming a header. + } if (toplevel && eos && inputIndex + headerFieldLength >= inputEnd) { state = MimeParserState.Error; @@ -431,12 +445,6 @@ async Task ConstructMessagePartAsync (int depth, CancellationToken cancella await OnMimeMessageBeginAsync (currentBeginOffset, beginLineNumber, cancellationToken).ConfigureAwait (false); - //if (preHeaderLength > 0) { - // FIXME: how to solve this? - //message.MboxMarker = new byte[preHeaderLength]; - //Buffer.BlockCopy (preHeaderBuffer, 0, message.MboxMarker, 0, preHeaderLength); - //} - var type = GetContentType (null); MimeEntityType entityType; int lines; diff --git a/MimeKit/MimeReader.cs b/MimeKit/MimeReader.cs index e0297d02cb..ab3a3c9e1d 100644 --- a/MimeKit/MimeReader.cs +++ b/MimeKit/MimeReader.cs @@ -1674,7 +1674,6 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken) headerBlockBegin = GetOffset (inputIndex); boundary = BoundaryType.None; - //preHeaderLength = 0; headerCount = 0; currentContentLength = null; @@ -1700,6 +1699,7 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken) if (left == 0) { if (toplevel && headerCount == 0 && headerBlockBegin == GetOffset (inputIndex)) { + // EOF has been reached before any headers have been parsed for Parse[Headers,Entity,Message]. state = MimeParserState.Eos; return; } @@ -1737,6 +1737,12 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken) if (ReadAhead (atleast, 0, cancellationToken) < atleast) break; } + + // Note: If a boundary was discovered, then the state will be updated to MimeParserState.Boundary. + if (state == MimeParserState.Boundary) + break; + + // Fall through and act as if we're consuming a header. } else if (input[inputIndex] == (byte) 'F' || input[inputIndex] == (byte) '>') { // Check for an mbox-style From-line. Again, if the message is properly formatted and not truncated, this will NEVER happen. while (!TryCheckMboxMarkerWithinHeaderBlock (inbuf)) { @@ -1745,10 +1751,18 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken) if (ReadAhead (atleast, 0, cancellationToken) < atleast) break; } - } - if (state != MimeParserState.MessageHeaders && state != MimeParserState.Headers) - break; + // state will be one of the following values: + // 1. Complete: This means that we've found an actual mbox marker + // 2. Error: Invalid *first* header and it was not a valid mbox marker + // 3. MessageHeaders or Headers: let it fall through and treat it as an invalid headers + if (state != MimeParserState.MessageHeaders && state != MimeParserState.Headers) + break; + + // Fall through and act as if we're consuming a header. + } else { + // Fall through and act as if we're consuming a header. + } if (toplevel && eos && inputIndex + headerFieldLength >= inputEnd) { state = MimeParserState.Error; @@ -2193,12 +2207,6 @@ unsafe int ConstructMessagePart (byte* inbuf, int depth, CancellationToken cance OnMimeMessageBegin (currentBeginOffset, beginLineNumber, cancellationToken); - //if (preHeaderLength > 0) { - // FIXME: how to solve this? - //message.MboxMarker = new byte[preHeaderLength]; - //Buffer.BlockCopy (preHeaderBuffer, 0, message.MboxMarker, 0, preHeaderLength); - //} - var type = GetContentType (null); MimeEntityType entityType; int lines; diff --git a/UnitTests/ExperimentalMimeParserTests.cs b/UnitTests/ExperimentalMimeParserTests.cs index 65445f498c..17a4e36fa6 100644 --- a/UnitTests/ExperimentalMimeParserTests.cs +++ b/UnitTests/ExperimentalMimeParserTests.cs @@ -1406,7 +1406,7 @@ This is the message body. } [Test] - public async Task TestMultipartSubpartHeadersLineStartsWithDashDashyAsync () + public async Task TestMultipartSubpartHeadersLineStartsWithDashDashAsync () { string text = @"From: mimekit@example.com To: mimekit@example.com @@ -1467,6 +1467,120 @@ This is the message body. } } + [Test] + public void TestMultipartSubpartHeadersLineStartsWithDashDashEOF () + { + string text = @"From: mimekit@example.com +To: mimekit@example.com +Subject: test of multipart subpart headers ending with a boundary +Date: Tue, 12 Nov 2013 09:12:42 -0500 +MIME-Version: 1.0 +Message-ID: <54AD68C9E3B0184CAC6041320424FD1B5B81E74D@localhost.localdomain> +X-Mailer: Microsoft Office Outlook 12.0 +Content-Type: multipart/mixed; + boundary=""----=_NextPart_000_003F_01CE98CE.6E826F90"" + + +------=_NextPart_000_003F_01CE98CE.6E826F90 +Content-Type: text/plain; charset=utf-8 +--not-the-boundary-muhahaha".Replace ("\r\n", "\n"); + + using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text), false)) { + var parser = new ExperimentalMimeParser (stream, MimeFormat.Entity); + var message = parser.ParseMessage (); + + Assert.That (message.Body, Is.InstanceOf (), "Expected top-level to be a multipart"); + var multipart = (Multipart) message.Body; + Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child"); + Assert.That (multipart[0], Is.InstanceOf (), "Expected first child of the multipart to be text/plain"); + var body = (TextPart) multipart[0]; + + Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8")); + Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8")); + Assert.That (body.Headers.Count, Is.EqualTo (2)); + Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid"); + Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha")); + + Assert.That (body.Text, Is.EqualTo (string.Empty)); + } + + using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text.Replace ("\n", "\r\n")), false)) { + var parser = new ExperimentalMimeParser (stream, MimeFormat.Entity); + var message = parser.ParseMessage (); + + Assert.That (message.Body, Is.InstanceOf (), "Expected top-level to be a multipart"); + var multipart = (Multipart) message.Body; + Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child"); + Assert.That (multipart[0], Is.InstanceOf (), "Expected first child of the multipart to be text/plain"); + var body = (TextPart) multipart[0]; + + Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8")); + Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8")); + Assert.That (body.Headers.Count, Is.EqualTo (2)); + Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid"); + Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha")); + + Assert.That (body.Text, Is.EqualTo (string.Empty)); + } + } + + [Test] + public async Task TestMultipartSubpartHeadersLineStartsWithDashDashEOFAsync () + { + string text = @"From: mimekit@example.com +To: mimekit@example.com +Subject: test of multipart subpart headers ending with a boundary +Date: Tue, 12 Nov 2013 09:12:42 -0500 +MIME-Version: 1.0 +Message-ID: <54AD68C9E3B0184CAC6041320424FD1B5B81E74D@localhost.localdomain> +X-Mailer: Microsoft Office Outlook 12.0 +Content-Type: multipart/mixed; + boundary=""----=_NextPart_000_003F_01CE98CE.6E826F90"" + + +------=_NextPart_000_003F_01CE98CE.6E826F90 +Content-Type: text/plain; charset=utf-8 +--not-the-boundary-muhahaha".Replace ("\r\n", "\n"); + + using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text), false)) { + var parser = new ExperimentalMimeParser (stream, MimeFormat.Entity); + var message = await parser.ParseMessageAsync (); + + Assert.That (message.Body, Is.InstanceOf (), "Expected top-level to be a multipart"); + var multipart = (Multipart) message.Body; + Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child"); + Assert.That (multipart[0], Is.InstanceOf (), "Expected first child of the multipart to be text/plain"); + var body = (TextPart) multipart[0]; + + Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8")); + Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8")); + Assert.That (body.Headers.Count, Is.EqualTo (2)); + Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid"); + Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha")); + + Assert.That (body.Text, Is.EqualTo (string.Empty)); + } + + using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text.Replace ("\n", "\r\n")), false)) { + var parser = new ExperimentalMimeParser (stream, MimeFormat.Entity); + var message = await parser.ParseMessageAsync (); + + Assert.That (message.Body, Is.InstanceOf (), "Expected top-level to be a multipart"); + var multipart = (Multipart) message.Body; + Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child"); + Assert.That (multipart[0], Is.InstanceOf (), "Expected first child of the multipart to be text/plain"); + var body = (TextPart) multipart[0]; + + Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8")); + Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8")); + Assert.That (body.Headers.Count, Is.EqualTo (2)); + Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid"); + Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha")); + + Assert.That (body.Text, Is.EqualTo (string.Empty)); + } + } + [Test] public void TestMultipartBoundaryLineWithTrailingSpacesAndThenMoreCharacters () { diff --git a/UnitTests/MimeParserTests.cs b/UnitTests/MimeParserTests.cs index 57a273ba67..4161a6e38b 100644 --- a/UnitTests/MimeParserTests.cs +++ b/UnitTests/MimeParserTests.cs @@ -1398,7 +1398,7 @@ This is the message body. } [Test] - public async Task TestMultipartSubpartHeadersLineStartsWithDashDashyAsync () + public async Task TestMultipartSubpartHeadersLineStartsWithDashDashAsync () { string text = @"From: mimekit@example.com To: mimekit@example.com @@ -1459,6 +1459,120 @@ This is the message body. } } + [Test] + public void TestMultipartSubpartHeadersLineStartsWithDashDashEOF () + { + string text = @"From: mimekit@example.com +To: mimekit@example.com +Subject: test of multipart subpart headers ending with a boundary +Date: Tue, 12 Nov 2013 09:12:42 -0500 +MIME-Version: 1.0 +Message-ID: <54AD68C9E3B0184CAC6041320424FD1B5B81E74D@localhost.localdomain> +X-Mailer: Microsoft Office Outlook 12.0 +Content-Type: multipart/mixed; + boundary=""----=_NextPart_000_003F_01CE98CE.6E826F90"" + + +------=_NextPart_000_003F_01CE98CE.6E826F90 +Content-Type: text/plain; charset=utf-8 +--not-the-boundary-muhahaha".Replace ("\r\n", "\n"); + + using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text), false)) { + var parser = new MimeParser (stream, MimeFormat.Entity); + var message = parser.ParseMessage (); + + Assert.That (message.Body, Is.InstanceOf (), "Expected top-level to be a multipart"); + var multipart = (Multipart) message.Body; + Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child"); + Assert.That (multipart[0], Is.InstanceOf (), "Expected first child of the multipart to be text/plain"); + var body = (TextPart) multipart[0]; + + Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8")); + Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8")); + Assert.That (body.Headers.Count, Is.EqualTo (2)); + Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid"); + Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha")); + + Assert.That (body.Text, Is.EqualTo (string.Empty)); + } + + using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text.Replace ("\n", "\r\n")), false)) { + var parser = new MimeParser (stream, MimeFormat.Entity); + var message = parser.ParseMessage (); + + Assert.That (message.Body, Is.InstanceOf (), "Expected top-level to be a multipart"); + var multipart = (Multipart) message.Body; + Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child"); + Assert.That (multipart[0], Is.InstanceOf (), "Expected first child of the multipart to be text/plain"); + var body = (TextPart) multipart[0]; + + Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8")); + Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8")); + Assert.That (body.Headers.Count, Is.EqualTo (2)); + Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid"); + Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha")); + + Assert.That (body.Text, Is.EqualTo (string.Empty)); + } + } + + [Test] + public async Task TestMultipartSubpartHeadersLineStartsWithDashDashEOFAsync () + { + string text = @"From: mimekit@example.com +To: mimekit@example.com +Subject: test of multipart subpart headers ending with a boundary +Date: Tue, 12 Nov 2013 09:12:42 -0500 +MIME-Version: 1.0 +Message-ID: <54AD68C9E3B0184CAC6041320424FD1B5B81E74D@localhost.localdomain> +X-Mailer: Microsoft Office Outlook 12.0 +Content-Type: multipart/mixed; + boundary=""----=_NextPart_000_003F_01CE98CE.6E826F90"" + + +------=_NextPart_000_003F_01CE98CE.6E826F90 +Content-Type: text/plain; charset=utf-8 +--not-the-boundary-muhahaha".Replace ("\r\n", "\n"); + + using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text), false)) { + var parser = new MimeParser (stream, MimeFormat.Entity); + var message = await parser.ParseMessageAsync (); + + Assert.That (message.Body, Is.InstanceOf (), "Expected top-level to be a multipart"); + var multipart = (Multipart) message.Body; + Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child"); + Assert.That (multipart[0], Is.InstanceOf (), "Expected first child of the multipart to be text/plain"); + var body = (TextPart) multipart[0]; + + Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8")); + Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8")); + Assert.That (body.Headers.Count, Is.EqualTo (2)); + Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid"); + Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha")); + + Assert.That (body.Text, Is.EqualTo (string.Empty)); + } + + using (var stream = new MemoryStream (Encoding.ASCII.GetBytes (text.Replace ("\n", "\r\n")), false)) { + var parser = new MimeParser (stream, MimeFormat.Entity); + var message = await parser.ParseMessageAsync (); + + Assert.That (message.Body, Is.InstanceOf (), "Expected top-level to be a multipart"); + var multipart = (Multipart) message.Body; + Assert.That (multipart.Count, Is.EqualTo (1), "Expected 1 child"); + Assert.That (multipart[0], Is.InstanceOf (), "Expected first child of the multipart to be text/plain"); + var body = (TextPart) multipart[0]; + + Assert.That (body.Headers[HeaderId.ContentType], Is.EqualTo ("text/plain; charset=utf-8")); + Assert.That (body.ContentType.Charset, Is.EqualTo ("utf-8")); + Assert.That (body.Headers.Count, Is.EqualTo (2)); + Assert.That (body.Headers[1].IsInvalid, Is.True, "IsInvalid"); + Assert.That (body.Headers[1].Field, Is.EqualTo ("--not-the-boundary-muhahaha")); + + Assert.That (body.Text, Is.EqualTo (string.Empty)); + } + } + [Test] public void TestMultipartBoundaryLineWithTrailingSpacesAndThenMoreCharacters () {