From 83b75452443fb4a806c472466176aa6c9e1f2308 Mon Sep 17 00:00:00 2001 From: Alexandre Mutel Date: Mon, 11 Feb 2019 23:05:02 +0100 Subject: [PATCH] Fix edge cases with invalid characters --- ext/toml-test | 2 +- src/Tomlyn/Parsing/Lexer.cs | 69 ++++++++++++++++++++++++----------- src/Tomlyn/Text/CharHelper.cs | 6 +++ 3 files changed, 55 insertions(+), 22 deletions(-) diff --git a/ext/toml-test b/ext/toml-test index 858ca6d..70b2972 160000 --- a/ext/toml-test +++ b/ext/toml-test @@ -1 +1 @@ -Subproject commit 858ca6d13a62486a9ed6c552f816ffa4bb7be41d +Subproject commit 70b297207299acf3ca477ecc4fa81df44919f41e diff --git a/src/Tomlyn/Parsing/Lexer.cs b/src/Tomlyn/Parsing/Lexer.cs index 18deb7a..da80769 100644 --- a/src/Tomlyn/Parsing/Lexer.cs +++ b/src/Tomlyn/Parsing/Lexer.cs @@ -706,17 +706,7 @@ private void ReadString(TextPosition start, bool allowMultiline) NextChar(); // we have an opening ''' -> this a multi-line string isMultiLine = true; - - // Skip any white spaces until the next line - while (CharHelper.IsWhiteSpaceOrNewLine(_c)) - { - var isNewLine = _c == '\n'; - NextChar(); - if (isNewLine) - { - break; - } - } + SkipImmediateNextLine(); } else { @@ -734,6 +724,15 @@ private void ReadString(TextPosition start, bool allowMultiline) { if (!TryReadEscapeChar(ref end)) { + if (!isMultiLine && CharHelper.IsNewLine(_c)) + { + AddError("Invalid newline in a string", _position, _position); + } + else if (_c < 32 && (!isMultiLine || !CharHelper.IsNewLine(_c))) + { + AddError($"Invalid control character found {((char)_c).ToPrintableString()}", start, start); + } + _textBuilder.AppendUtf32(_c); end = _position; NextChar(); @@ -757,11 +756,14 @@ private void ReadString(TextPosition start, bool allowMultiline) } else { + _textBuilder.Append('"'); + _textBuilder.Append('"'); goto continue_parsing_string; } } else { + _textBuilder.Append('"'); goto continue_parsing_string; } } @@ -786,6 +788,23 @@ private void ReadString(TextPosition start, bool allowMultiline) } } + private void SkipImmediateNextLine() + { + // Skip any white spaces until the next line + if (_c == '\r') + { + NextChar(); + if (_c == '\n') + { + NextChar(); + } + } + else if (_c == '\n') + { + NextChar(); + } + } + private bool TryReadEscapeChar(ref TextPosition end) { if (_c == '\\') @@ -899,16 +918,7 @@ private void ReadStringLiteral(TextPosition start, bool allowMultiline) // we have an opening ''' -> this a multi-line literal string isMultiLine = true; - // Skip any white spaces until the next line - while (CharHelper.IsWhiteSpaceOrNewLine(_c)) - { - var isNewLine = _c == '\n'; - NextChar(); - if (isNewLine) - { - break; - } - } + SkipImmediateNextLine(); } else { @@ -922,6 +932,14 @@ private void ReadStringLiteral(TextPosition start, bool allowMultiline) continue_parsing_string: while (_c != '\'' && _c != Eof) { + if (!isMultiLine && CharHelper.IsNewLine(_c)) + { + AddError("Invalid newline in a string", _position, _position); + } + else if (_c < 32 && (!isMultiLine || !CharHelper.IsNewLine(_c))) + { + AddError($"Invalid control character found {((char)_c).ToPrintableString()}", start, start); + } _textBuilder.AppendUtf32(_c); end = _position; NextChar(); @@ -1041,6 +1059,7 @@ private char32 NextCharFromReader() { _current.NextPosition.Column++; } + CheckCharacter(nextc); return nextc; } @@ -1053,6 +1072,14 @@ private char32 NextCharFromReader() return Eof; } + private void CheckCharacter(char32 c) + { + if (!CharHelper.IsValidUnicodeScalarValue(c)) + { + AddError($"The character `{c}` is an invalid UTF8 character", _current.Position, _current.Position); + } + } + private void AddError(string message, TextPosition start, TextPosition end) { if (_errors == null) diff --git a/src/Tomlyn/Text/CharHelper.cs b/src/Tomlyn/Text/CharHelper.cs index 699ecc3..c6676c8 100644 --- a/src/Tomlyn/Text/CharHelper.cs +++ b/src/Tomlyn/Text/CharHelper.cs @@ -128,6 +128,12 @@ public static bool IsWhiteSpaceOrNewLine(char32 c) c == '\n'; // \n } + public static bool IsNewLine(char32 c) + { + return c == '\r' || // \r + c == '\n'; // \n + } + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static char32? ToUtf8(byte[] buffer, ref int position) {