Skip to content

Commit

Permalink
Fix edge cases with invalid characters
Browse files Browse the repository at this point in the history
  • Loading branch information
xoofx committed Feb 11, 2019
1 parent a2819d2 commit 83b7545
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 22 deletions.
2 changes: 1 addition & 1 deletion ext/toml-test
Submodule toml-test updated 43 files
+1 −0 tests/invalid/array-missing-separator.toml
+1 −0 tests/invalid/array-no-close.toml
+2 −0 tests/invalid/bad-utf8.toml
+1 −0 tests/invalid/boolean-mixed-case.toml
+2 −0 tests/invalid/duplicate-table-array.toml
+2 −0 tests/invalid/duplicate-table-array2.toml
+1 −0 tests/invalid/key-escape.toml
+2 −0 tests/invalid/key-multiline.toml
+1 −0 tests/invalid/key-partial-quoted.toml
+1 −0 tests/invalid/key-special-character.toml
+1 −0 tests/invalid/key-two-equals2.toml
+1 −0 tests/invalid/key-two-equals3.toml
+1 −0 tests/invalid/string-bad-concat.toml
+2 −0 tests/invalid/string-bad-multiline.toml
+1 −0 tests/invalid/string-control-character.toml
+1 −0 tests/invalid/string-missing-quotes.toml
+1 −0 tests/invalid/string-wrong-close.toml
+1 −0 tests/invalid/table-equals-sign.toml
+2 −0 tests/invalid/table-quoted-no-close.toml
+20 −0 tests/valid/comments-tricky.json
+31 −0 tests/valid/comments-tricky.toml
+8 −0 tests/valid/inline-table-empty.json
+6 −0 tests/valid/inline-table-empty.toml
+8 −0 tests/valid/inline-table-multiline.json
+4 −0 tests/valid/inline-table-multiline.toml
+9 −0 tests/valid/inline-table-nest.json
+10 −0 tests/valid/inline-table-nest.toml
+16 −0 tests/valid/key-alphanum.json
+14 −0 tests/valid/key-alphanum.toml
+14 −0 tests/valid/key-case-sensitive.json
+13 −0 tests/valid/key-case-sensitive.toml
+14 −0 tests/valid/key-escapes.json
+10 −0 tests/valid/key-escapes.toml
+6 −0 tests/valid/key-special-word.json
+5 −0 tests/valid/key-special-word.toml
+8 −0 tests/valid/multiline-string-false-end.json
+6 −0 tests/valid/multiline-string-false-end.toml
+25 −0 tests/valid/spec-example-1-compact.json
+23 −0 tests/valid/spec-example-1-compact.toml
+25 −0 tests/valid/spec-example-1.json
+33 −0 tests/valid/spec-example-1.toml
+9 −0 tests/valid/string-escape-tricky.json
+15 −0 tests/valid/string-escape-tricky.toml
69 changes: 48 additions & 21 deletions src/Tomlyn/Parsing/Lexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -706,17 +706,7 @@ private void ReadString(TextPosition start, bool allowMultiline)
NextChar();
// we have an opening ''' -> this a multi-line string
isMultiLine = true;

// Skip any white spaces until the next line
while (CharHelper.IsWhiteSpaceOrNewLine(_c))
{
var isNewLine = _c == '\n';
NextChar();
if (isNewLine)
{
break;
}
}
SkipImmediateNextLine();
}
else
{
Expand All @@ -734,6 +724,15 @@ private void ReadString(TextPosition start, bool allowMultiline)
{
if (!TryReadEscapeChar(ref end))
{
if (!isMultiLine && CharHelper.IsNewLine(_c))
{
AddError("Invalid newline in a string", _position, _position);
}
else if (_c < 32 && (!isMultiLine || !CharHelper.IsNewLine(_c)))
{
AddError($"Invalid control character found {((char)_c).ToPrintableString()}", start, start);
}

_textBuilder.AppendUtf32(_c);
end = _position;
NextChar();
Expand All @@ -757,11 +756,14 @@ private void ReadString(TextPosition start, bool allowMultiline)
}
else
{
_textBuilder.Append('"');
_textBuilder.Append('"');
goto continue_parsing_string;
}
}
else
{
_textBuilder.Append('"');
goto continue_parsing_string;
}
}
Expand All @@ -786,6 +788,23 @@ private void ReadString(TextPosition start, bool allowMultiline)
}
}

private void SkipImmediateNextLine()
{
// Skip any white spaces until the next line
if (_c == '\r')
{
NextChar();
if (_c == '\n')
{
NextChar();
}
}
else if (_c == '\n')
{
NextChar();
}
}

private bool TryReadEscapeChar(ref TextPosition end)
{
if (_c == '\\')
Expand Down Expand Up @@ -899,16 +918,7 @@ private void ReadStringLiteral(TextPosition start, bool allowMultiline)
// we have an opening ''' -> this a multi-line literal string
isMultiLine = true;

// Skip any white spaces until the next line
while (CharHelper.IsWhiteSpaceOrNewLine(_c))
{
var isNewLine = _c == '\n';
NextChar();
if (isNewLine)
{
break;
}
}
SkipImmediateNextLine();
}
else
{
Expand All @@ -922,6 +932,14 @@ private void ReadStringLiteral(TextPosition start, bool allowMultiline)
continue_parsing_string:
while (_c != '\'' && _c != Eof)
{
if (!isMultiLine && CharHelper.IsNewLine(_c))
{
AddError("Invalid newline in a string", _position, _position);
}
else if (_c < 32 && (!isMultiLine || !CharHelper.IsNewLine(_c)))
{
AddError($"Invalid control character found {((char)_c).ToPrintableString()}", start, start);
}
_textBuilder.AppendUtf32(_c);
end = _position;
NextChar();
Expand Down Expand Up @@ -1041,6 +1059,7 @@ private char32 NextCharFromReader()
{
_current.NextPosition.Column++;
}
CheckCharacter(nextc);
return nextc;
}

Expand All @@ -1053,6 +1072,14 @@ private char32 NextCharFromReader()
return Eof;
}

private void CheckCharacter(char32 c)
{
if (!CharHelper.IsValidUnicodeScalarValue(c))
{
AddError($"The character `{c}` is an invalid UTF8 character", _current.Position, _current.Position);
}
}

private void AddError(string message, TextPosition start, TextPosition end)
{
if (_errors == null)
Expand Down
6 changes: 6 additions & 0 deletions src/Tomlyn/Text/CharHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ public static bool IsWhiteSpaceOrNewLine(char32 c)
c == '\n'; // \n
}

public static bool IsNewLine(char32 c)
{
return c == '\r' || // \r
c == '\n'; // \n
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static char32? ToUtf8(byte[] buffer, ref int position)
{
Expand Down

0 comments on commit 83b7545

Please sign in to comment.