Skip to content

Commit

Permalink
Improved MimeReader header parsing (mostly just state tracking improv…
Browse files Browse the repository at this point in the history
…ements)

This allows us to now throw the appropriate exception if EOS is reached
before parsing any headers.
  • Loading branch information
jstedfast committed Dec 21, 2024
1 parent 77d78e8 commit c0678bb
Show file tree
Hide file tree
Showing 2 changed files with 70 additions and 18 deletions.
39 changes: 31 additions & 8 deletions MimeKit/AsyncMimeReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ async Task<int> ReadAheadAsync (int atleast, int save, CancellationToken cancell
async Task<bool> StepByteOrderMarkAsync (CancellationToken cancellationToken)
{
int bomIndex = 0;
bool complete;

do {
var available = await ReadAheadAsync (ReadAheadSize, 0, cancellationToken).ConfigureAwait (false);
Expand All @@ -64,12 +65,12 @@ async Task<bool> StepByteOrderMarkAsync (CancellationToken cancellationToken)

unsafe {
fixed (byte* inbuf = input) {
StepByteOrderMark (inbuf, ref bomIndex);
complete = StepByteOrderMark (inbuf, ref bomIndex);
}
}
} while (inputIndex == inputEnd);
} while (!complete && inputIndex == inputEnd);

return bomIndex == 0 || bomIndex == UTF8ByteOrderMark.Length;
return complete;
}

async Task StepMboxMarkerAsync (CancellationToken cancellationToken)
Expand Down Expand Up @@ -157,14 +158,21 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
left = await ReadAheadAsync (2, 0, cancellationToken).ConfigureAwait (false);

if (left == 0) {
if (toplevel && headerCount == 0 && headerBlockBegin == GetOffset (inputIndex)) {
state = MimeParserState.Eos;
return;
}

// FIXME: Should this be Content or Error?
state = MimeParserState.Content;
eof = true;
break;
}

// Check for an empty line denoting the end of the header block.
if (IsEndOfHeaderBlock (left))
if (IsEndOfHeaderBlock (left)) {
state = MimeParserState.Content;
break;
}

// Scan ahead a bit to see if this looks like an invalid header.
do {
Expand Down Expand Up @@ -245,6 +253,7 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
}

if (await ReadAheadAsync (1, 0, cancellationToken).ConfigureAwait (false) == 0) {
state = MimeParserState.Content;
eof = true;
break;
}
Expand All @@ -260,6 +269,11 @@ async Task StepHeadersAsync (CancellationToken cancellationToken)
await OnHeaderReadAsync (header, beginLineNumber, cancellationToken).ConfigureAwait (false);
} while (!eof);

if (state == MimeParserState.MessageHeaders || state == MimeParserState.Headers) {
// Ideally, we never get here. If we do, there's an exit in the loop above that should be fixed.
state = MimeParserState.Content;
}

headerBlockEnd = GetOffset (inputIndex);

await OnHeadersEndAsync (headerBlockBegin, headersBeginLineNumber, headerBlockEnd, lineNumber, cancellationToken).ConfigureAwait (false);
Expand Down Expand Up @@ -673,8 +687,13 @@ public async Task ReadEntityAsync (CancellationToken cancellationToken = default
state = MimeParserState.Headers;
toplevel = true;

if (await StepAsync (cancellationToken).ConfigureAwait (false) == MimeParserState.Error)
// parse the headers
switch (await StepAsync (cancellationToken).ConfigureAwait (false)) {
case MimeParserState.Error:
throw new FormatException ("Failed to parse entity headers.");
case MimeParserState.Eos:
throw new FormatException ("End of stream.");
}

var type = GetContentType (null);
var currentHeadersEndOffset = headerBlockEnd;
Expand Down Expand Up @@ -743,12 +762,16 @@ public async Task ReadMessageAsync (CancellationToken cancellationToken = defaul
}
}

var beginLineNumber = lineNumber;
toplevel = true;

// parse the headers
var beginLineNumber = lineNumber;
if (state < MimeParserState.Content && await StepAsync (cancellationToken).ConfigureAwait (false) == MimeParserState.Error)
switch (await StepAsync (cancellationToken).ConfigureAwait (false)) {
case MimeParserState.Error:
throw new FormatException ("Failed to parse message headers.");
case MimeParserState.Eos:
throw new FormatException ("End of stream.");
}

var currentHeadersEndOffset = headerBlockEnd;
var currentBeginOffset = headerBlockBegin;
Expand Down
49 changes: 39 additions & 10 deletions MimeKit/MimeReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1175,7 +1175,7 @@ static unsafe bool CStringsEqual (byte* str1, byte* str2, int length)
return true;
}

unsafe void StepByteOrderMark (byte* inbuf, ref int bomIndex)
unsafe bool StepByteOrderMark (byte* inbuf, ref int bomIndex)
{
byte* inptr = inbuf + inputIndex;
byte* inend = inbuf + inputEnd;
Expand All @@ -1186,11 +1186,14 @@ unsafe void StepByteOrderMark (byte* inbuf, ref int bomIndex)
}

inputIndex = (int) (inptr - inbuf);

return bomIndex == 0 || bomIndex == UTF8ByteOrderMark.Length;
}

unsafe bool StepByteOrderMark (byte* inbuf, CancellationToken cancellationToken)
{
int bomIndex = 0;
bool complete;

do {
var available = ReadAhead (ReadAheadSize, 0, cancellationToken);
Expand All @@ -1201,10 +1204,10 @@ unsafe bool StepByteOrderMark (byte* inbuf, CancellationToken cancellationToken)
return false;
}

StepByteOrderMark (inbuf, ref bomIndex);
} while (inputIndex == inputEnd);
complete = StepByteOrderMark (inbuf, ref bomIndex);
} while (!complete && inputIndex == inputEnd);

return bomIndex == 0 || bomIndex == UTF8ByteOrderMark.Length;
return complete;
}

static unsafe bool IsMboxMarker (byte* text, bool allowMunged = false)
Expand Down Expand Up @@ -1667,6 +1670,7 @@ Header CreateHeader (long beginOffset, int fieldNameLength, int headerFieldLengt
unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken)
{
int headersBeginLineNumber = lineNumber;
var eof = false;

headerBlockBegin = GetOffset (inputIndex);
boundary = BoundaryType.None;
Expand Down Expand Up @@ -1695,13 +1699,21 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken)
left = ReadAhead (2, 0, cancellationToken);

if (left == 0) {
if (toplevel && headerCount == 0 && headerBlockBegin == GetOffset (inputIndex)) {
state = MimeParserState.Eos;
return;
}

// FIXME: Should this be Content or Error?
state = MimeParserState.Content;
break;
}

// Check for an empty line denoting the end of the header block.
if (IsEndOfHeaderBlock (left))
if (IsEndOfHeaderBlock (left)) {
state = MimeParserState.Content;
break;
}

// Scan ahead a bit to see if this looks like an invalid header.
while (!TryDetectInvalidHeader (inbuf, out invalid, out fieldNameLength, out headerFieldLength)) {
Expand Down Expand Up @@ -1753,8 +1765,11 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken)

// Consume the header value.
while (!StepHeaderValue (inbuf, ref midline)) {
if (ReadAhead (1, 0, cancellationToken) == 0)
if (ReadAhead (1, 0, cancellationToken) == 0) {
state = MimeParserState.Content;
eof = true;
break;
}
}

if (toplevel && headerCount == 0 && invalid && !IsMboxMarker (headerBuffer)) {
Expand All @@ -1765,7 +1780,12 @@ unsafe void StepHeaders (byte* inbuf, CancellationToken cancellationToken)
var header = CreateHeader (beginOffset, fieldNameLength, headerFieldLength, invalid);

OnHeaderRead (header, beginLineNumber, cancellationToken);
} while (true);
} while (!eof);

if (state == MimeParserState.MessageHeaders || state == MimeParserState.Headers) {
// Ideally, we never get here. If we do, there's an exit in the loop above that should be fixed.
state = MimeParserState.Content;
}

headerBlockEnd = GetOffset (inputIndex);

Expand Down Expand Up @@ -2425,8 +2445,13 @@ unsafe void ReadEntity (byte* inbuf, CancellationToken cancellationToken)
state = MimeParserState.Headers;
toplevel = true;

if (Step (inbuf, cancellationToken) == MimeParserState.Error)
// parse the headers
switch (Step (inbuf, cancellationToken)) {
case MimeParserState.Error:
throw new FormatException ("Failed to parse entity headers.");
case MimeParserState.Eos:
throw new FormatException ("End of stream.");
}

var type = GetContentType (null);
var currentHeadersEndOffset = headerBlockEnd;
Expand Down Expand Up @@ -2503,12 +2528,16 @@ unsafe void ReadMessage (byte* inbuf, CancellationToken cancellationToken)
}
}

var beginLineNumber = lineNumber;
toplevel = true;

// parse the headers
var beginLineNumber = lineNumber;
if (state < MimeParserState.Content && Step (inbuf, cancellationToken) == MimeParserState.Error)
switch (Step (inbuf, cancellationToken)) {
case MimeParserState.Error:
throw new FormatException ("Failed to parse message headers.");
case MimeParserState.Eos:
throw new FormatException ("End of stream.");
}

var currentHeadersEndOffset = headerBlockEnd;
var currentBeginOffset = headerBlockBegin;
Expand Down

0 comments on commit c0678bb

Please sign in to comment.