Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

A few more perf improvements #753

Merged
merged 6 commits into from
Nov 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 11 additions & 19 deletions src/Markdig/Helpers/EntityHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

using System.Text;

namespace Markdig.Helpers;

/// <summary>
Expand All @@ -57,41 +59,31 @@ public static class EntityHelper
/// <returns>The unicode character set or <c>null</c> if the entity was not recognized.</returns>
public static string DecodeEntity(int utf32)
{
if (!CharHelper.IsInInclusiveRange(utf32, 1, 1114111) || CharHelper.IsInInclusiveRange(utf32, 55296, 57343))
if (utf32 == 0 || !UnicodeUtility.IsValidUnicodeScalar((uint)utf32))
return CharHelper.ReplacementCharString;

if (utf32 < 65536)
if (UnicodeUtility.IsBmpCodePoint((uint)utf32))
return char.ToString((char)utf32);

utf32 -= 65536;
return new string(
#if NETSTANDARD2_1_OR_GREATER || NETCOREAPP3_1_OR_GREATER
stackalloc
#else
new
#endif
char[]
{
(char)((uint)utf32 / 1024 + 55296),
(char)((uint)utf32 % 1024 + 56320)
});
UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar((uint)utf32, out char high, out char low);
return new string([high, low]);
}

internal static void DecodeEntity(int utf32, ref ValueStringBuilder sb)
{
if (!CharHelper.IsInInclusiveRange(utf32, 1, 1114111) || CharHelper.IsInInclusiveRange(utf32, 55296, 57343))
if (utf32 == 0 || !UnicodeUtility.IsValidUnicodeScalar((uint)utf32))
{
sb.Append(CharHelper.ReplacementChar);
}
else if (utf32 < 65536)
else if (UnicodeUtility.IsBmpCodePoint((uint)utf32))
{
sb.Append((char)utf32);
}
else
{
utf32 -= 65536;
sb.Append((char)((uint)utf32 / 1024 + 55296));
sb.Append((char)((uint)utf32 % 1024 + 56320));
UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar((uint)utf32, out char high, out char low);
sb.Append(high);
sb.Append(low);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/Markdig/Helpers/ThrowHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public static void CheckDepthLimit(int depth, bool useLargeLimit = false)
if (depth > limit)
DepthLimitExceeded();

[MethodImpl(MethodImplOptions.NoInlining)]
[DoesNotReturn]
static void DepthLimitExceeded() => throw new ArgumentException("Markdown elements in the input are too deeply nested - depth limit exceeded. Input is most likely not sensible or is a very large table.");
}

Expand Down
30 changes: 30 additions & 0 deletions src/Markdig/Helpers/UnicodeUtility.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.

using System.Diagnostics;
using System.Runtime.CompilerServices;

namespace System.Text;

// Based on https://github.com/dotnet/runtime/blob/main/src/libraries/System.Private.CoreLib/src/System/Text/UnicodeUtility.cs
internal static class UnicodeUtility
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsBmpCodePoint(uint value) => value <= 0xFFFFu;

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsValidUnicodeScalar(uint value)
{
return ((value - 0x110000u) ^ 0xD800u) >= 0xFFEF0800u;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void GetUtf16SurrogatesFromSupplementaryPlaneScalar(uint value, out char highSurrogateCodePoint, out char lowSurrogateCodePoint)
{
Debug.Assert(IsValidUnicodeScalar(value) && IsBmpCodePoint(value));

highSurrogateCodePoint = (char)((value + ((0xD800u - 0x40u) << 10)) >> 10);
lowSurrogateCodePoint = (char)((value & 0x3FFu) + 0xDC00u);
}
}
87 changes: 32 additions & 55 deletions src/Markdig/Parsers/FencedBlockParserBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ public abstract class FencedBlockParserBase : BlockParser, IAttributesParseable
/// <seealso cref="BlockParser" />
public abstract class FencedBlockParserBase<T> : FencedBlockParserBase where T : Block, IFencedBlock
{
private static readonly TransformedStringCache _infoStringCache = new(static infoString => HtmlHelper.Unescape(infoString));
private static readonly TransformedStringCache s_infoStringCache = new(static infoString => HtmlHelper.Unescape(infoString));
private static readonly TransformedStringCache s_argumentsStringCache = new(static argumentsString => HtmlHelper.Unescape(argumentsString));
private TransformedStringCache? _infoPrefixCache;

/// <summary>
Expand Down Expand Up @@ -176,7 +177,7 @@ public static bool RoundtripInfoParser(BlockProcessor blockProcessor, ref String

end:
fenced.TriviaAfterFencedChar = afterFence;
fenced.Info = _infoStringCache.Get(info.AsSpan());
fenced.Info = s_infoStringCache.Get(info.AsSpan());
fenced.UnescapedInfo = info;
fenced.TriviaAfterInfo = afterInfo;
fenced.Arguments = HtmlHelper.Unescape(arg.ToString());
Expand All @@ -197,71 +198,47 @@ public static bool RoundtripInfoParser(BlockProcessor blockProcessor, ref String
/// <returns><c>true</c> if parsing of the line is successfull; <c>false</c> otherwise</returns>
public static bool DefaultInfoParser(BlockProcessor state, ref StringSlice line, IFencedBlock fenced, char openingCharacter)
{
// An info string cannot contain any backticks (unless it is a tilde block)
int firstSpace = -1;
if (openingCharacter == '`')
ReadOnlySpan<char> lineSpan = line.AsSpan();

if (!lineSpan.IsEmpty)
{
for (int i = line.Start; i <= line.End; i++)
if (openingCharacter == '`')
{
char c = line.Text[i];
if (c == '`')
{
return false;
}
firstSpace = lineSpan.IndexOfAny(' ', '\t', '`');

if (firstSpace < 0 && c.IsSpaceOrTab())
// An info string cannot contain any backticks (unless it is a tilde block)
if (firstSpace >= 0 && lineSpan.Slice(firstSpace).Contains('`'))
{
firstSpace = i;
return false;
}
}
}
else
{
for (int i = line.Start; i <= line.End; i++)
else
{
if (line.Text[i].IsSpaceOrTab())
{
firstSpace = i;
break;
}
firstSpace = lineSpan.IndexOfAny(' ', '\t');
}
}

StringSlice infoStringSlice;
string? argString = null;

if (firstSpace > 0)
if (firstSpace >= 0)
{
firstSpace += line.Start;
infoStringSlice = new StringSlice(line.Text, line.Start, firstSpace - 1);

// Skip any spaces after info string
firstSpace++;
while (firstSpace <= line.End)
{
char c = line[firstSpace];
if (c.IsSpaceOrTab())
{
firstSpace++;
}
else
{
break;
}
}

var argStringSlice = new StringSlice(line.Text, firstSpace, line.End);
argStringSlice.Trim();
argString = argStringSlice.ToString();
fenced.Arguments = s_argumentsStringCache.Get(argStringSlice.AsSpan());
}
else
{
infoStringSlice = line;
fenced.Arguments = string.Empty;
}

infoStringSlice.Trim();

fenced.Info = _infoStringCache.Get(infoStringSlice.AsSpan());
fenced.Arguments = HtmlHelper.Unescape(argString);
fenced.Info = s_infoStringCache.Get(infoStringSlice.AsSpan());

return true;
}
Expand Down Expand Up @@ -303,17 +280,19 @@ public override BlockState TryOpen(BlockProcessor processor)
// Try to parse any attached attributes
TryParseAttributes?.Invoke(processor, ref line, fenced);

// If the info parser was not successfull, early exit
// If the info parser was not successful, early exit
if (InfoParser != null && !InfoParser(processor, ref line, fenced, matchChar))
{
return BlockState.None;
}

// Add the language as an attribute by default
if (!string.IsNullOrEmpty(fenced.Info))
string? info = fenced.Info;

if (!string.IsNullOrEmpty(info))
{
Debug.Assert(_infoPrefixCache is not null || InfoPrefix is null);
string infoWithPrefix = _infoPrefixCache?.Get(fenced.Info!) ?? fenced.Info!;
string infoWithPrefix = _infoPrefixCache?.Get(info!) ?? info!;
fenced.GetAttributes().AddClass(infoWithPrefix);
}

Expand All @@ -329,34 +308,32 @@ public override BlockState TryOpen(BlockProcessor processor)
public override BlockState TryContinue(BlockProcessor processor, Block block)
{
var fence = (IFencedBlock)block;
var openingCount = fence.OpeningFencedCharCount;

// Match if we have a closing fence
var line = processor.Line;
var sourcePosition = processor.Start;
var closingCount = line.CountAndSkipChar(fence.FencedChar);
var diff = openingCount - closingCount;

char c = line.CurrentChar;
var lastFenceCharPosition = processor.Start + closingCount;

// If we have a closing fence, close it and discard the current line
// The line must contain only fence opening character followed only by whitespaces.
var startBeforeTrim = line.Start;
var endBeforeTrim = line.End;
var trimmed = line.TrimEnd();
if (diff <= 0 && !processor.IsCodeIndent && (c == '\0' || c.IsWhitespace()) && trimmed)

if (fence.OpeningFencedCharCount <= closingCount &&
!processor.IsCodeIndent &&
(c == '\0' || c.IsWhitespace()) &&
line.TrimEnd())
{
block.UpdateSpanEnd(startBeforeTrim - 1);

var fencedBlock = (IFencedBlock)block;
fencedBlock.ClosingFencedCharCount = closingCount;
fence.ClosingFencedCharCount = closingCount;

if (processor.TrackTrivia)
{
fencedBlock.NewLine = processor.Line.NewLine;
fencedBlock.TriviaBeforeClosingFence = processor.UseTrivia(sourcePosition - 1);
fencedBlock.TriviaAfter = new StringSlice(processor.Line.Text, lastFenceCharPosition, endBeforeTrim);
fence.NewLine = line.NewLine;
fence.TriviaBeforeClosingFence = processor.UseTrivia(sourcePosition - 1);
fence.TriviaAfter = new StringSlice(line.Text, processor.Start + closingCount, processor.Line.End);
}

// Don't keep the last line
Expand Down
66 changes: 38 additions & 28 deletions src/Markdig/Parsers/MarkdownParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,34 +53,11 @@ public static MarkdownDocument Parse(string text, MarkdownPipeline? pipeline = n
{
blockProcessor.Open(document);

ProcessBlocks(blockProcessor, new LineReader(text));
ProcessBlocks(blockProcessor, text);

if (pipeline.TrackTrivia)
{
Block? lastBlock = blockProcessor.LastBlock;
if (lastBlock is null && document.Count == 0)
{
// this means we have unassigned characters
var noBlocksFoundBlock = new EmptyBlock(null);
List<StringSlice> linesBefore = blockProcessor.UseLinesBefore();
noBlocksFoundBlock.LinesAfter = new List<StringSlice>();
if (linesBefore != null)
{
noBlocksFoundBlock.LinesAfter.AddRange(linesBefore);
}

document.Add(noBlocksFoundBlock);
}
else if (lastBlock != null && blockProcessor.LinesBefore != null)
{
// this means we're out of lines, but still have unassigned empty lines.
// thus, we'll assign the empty unsassigned lines to the last block
// of the document.
var rootMostContainerBlock = Block.FindRootMostContainerParent(lastBlock);
rootMostContainerBlock.LinesAfter ??= new List<StringSlice>();
var linesBefore = blockProcessor.UseLinesBefore();
rootMostContainerBlock.LinesAfter.AddRange(linesBefore);
}
ProcessBlocksTrivia(blockProcessor, document);
}

// At this point the LineIndex is the same as the number of lines in the document
Expand Down Expand Up @@ -117,12 +94,15 @@ private static string FixupZero(string text)
return text.Replace('\0', CharHelper.ReplacementChar);
}

private static void ProcessBlocks(BlockProcessor blockProcessor, LineReader lineReader)
[MethodImpl(MethodImplOptions.NoInlining)]
private static void ProcessBlocks(BlockProcessor blockProcessor, string text)
{
var lineReader = new LineReader(text);

while (true)
{
// Get the precise position of the begining of the line
var lineText = lineReader.ReadLine();
// Get the precise position of the beginning of the line
StringSlice lineText = lineReader.ReadLine();

// If this is the end of file and the last line is empty
if (lineText.Text is null)
Expand All @@ -132,9 +112,39 @@ private static void ProcessBlocks(BlockProcessor blockProcessor, LineReader line

blockProcessor.ProcessLine(lineText);
}

blockProcessor.CloseAll(true);
}

private static void ProcessBlocksTrivia(BlockProcessor blockProcessor, MarkdownDocument document)
{
Block? lastBlock = blockProcessor.LastBlock;
if (lastBlock is null && document.Count == 0)
{
// this means we have unassigned characters
var noBlocksFoundBlock = new EmptyBlock(null);
List<StringSlice> linesBefore = blockProcessor.UseLinesBefore();
noBlocksFoundBlock.LinesAfter = [];
if (linesBefore != null)
{
noBlocksFoundBlock.LinesAfter.AddRange(linesBefore);
}

document.Add(noBlocksFoundBlock);
}
else if (lastBlock != null && blockProcessor.LinesBefore != null)
{
// this means we're out of lines, but still have unassigned empty lines.
// thus, we'll assign the empty unsassigned lines to the last block
// of the document.
var rootMostContainerBlock = Block.FindRootMostContainerParent(lastBlock);
rootMostContainerBlock.LinesAfter ??= [];
var linesBefore = blockProcessor.UseLinesBefore();
rootMostContainerBlock.LinesAfter.AddRange(linesBefore);
}
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static void ProcessInlines(InlineProcessor inlineProcessor, MarkdownDocument document)
{
// "stackless" processor
Expand Down
7 changes: 7 additions & 0 deletions src/Markdig/Polyfills/IndexOfHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ public static int IndexOfAny(this ReadOnlySpan<char> span, string values)
return -1;
}
#endif

#if !NET6_0_OR_GREATER
public static bool Contains<T>(this ReadOnlySpan<T> span, T value) where T : IEquatable<T>
{
return span.IndexOf(value) >= 0;
}
#endif
}

#endif
Loading
Loading