diff --git a/src/Markdig/Helpers/EntityHelper.cs b/src/Markdig/Helpers/EntityHelper.cs index df6d2d8d..87c85d0b 100644 --- a/src/Markdig/Helpers/EntityHelper.cs +++ b/src/Markdig/Helpers/EntityHelper.cs @@ -31,6 +31,8 @@ // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +using System.Text; + namespace Markdig.Helpers; /// @@ -57,41 +59,31 @@ public static class EntityHelper /// The unicode character set or null if the entity was not recognized. public static string DecodeEntity(int utf32) { - if (!CharHelper.IsInInclusiveRange(utf32, 1, 1114111) || CharHelper.IsInInclusiveRange(utf32, 55296, 57343)) + if (utf32 == 0 || !UnicodeUtility.IsValidUnicodeScalar((uint)utf32)) return CharHelper.ReplacementCharString; - if (utf32 < 65536) + if (UnicodeUtility.IsBmpCodePoint((uint)utf32)) return char.ToString((char)utf32); - utf32 -= 65536; - return new string( -#if NETSTANDARD2_1_OR_GREATER || NETCOREAPP3_1_OR_GREATER - stackalloc -#else - new -#endif - char[] - { - (char)((uint)utf32 / 1024 + 55296), - (char)((uint)utf32 % 1024 + 56320) - }); + UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar((uint)utf32, out char high, out char low); + return new string([high, low]); } internal static void DecodeEntity(int utf32, ref ValueStringBuilder sb) { - if (!CharHelper.IsInInclusiveRange(utf32, 1, 1114111) || CharHelper.IsInInclusiveRange(utf32, 55296, 57343)) + if (utf32 == 0 || !UnicodeUtility.IsValidUnicodeScalar((uint)utf32)) { sb.Append(CharHelper.ReplacementChar); } - else if (utf32 < 65536) + else if (UnicodeUtility.IsBmpCodePoint((uint)utf32)) { sb.Append((char)utf32); } else { - utf32 -= 65536; - sb.Append((char)((uint)utf32 / 1024 + 55296)); - sb.Append((char)((uint)utf32 % 1024 + 56320)); + UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar((uint)utf32, out char high, out char low); + sb.Append(high); + sb.Append(low); } } diff --git a/src/Markdig/Helpers/ThrowHelper.cs b/src/Markdig/Helpers/ThrowHelper.cs index 4253a504..1e727368 100644 --- a/src/Markdig/Helpers/ThrowHelper.cs +++ b/src/Markdig/Helpers/ThrowHelper.cs @@ -80,7 +80,7 @@ public static void CheckDepthLimit(int depth, bool useLargeLimit = false) if (depth > limit) DepthLimitExceeded(); - [MethodImpl(MethodImplOptions.NoInlining)] + [DoesNotReturn] static void DepthLimitExceeded() => throw new ArgumentException("Markdown elements in the input are too deeply nested - depth limit exceeded. Input is most likely not sensible or is a very large table."); } diff --git a/src/Markdig/Helpers/UnicodeUtility.cs b/src/Markdig/Helpers/UnicodeUtility.cs new file mode 100644 index 00000000..4364c953 --- /dev/null +++ b/src/Markdig/Helpers/UnicodeUtility.cs @@ -0,0 +1,30 @@ +// Copyright (c) Alexandre Mutel. All rights reserved. +// This file is licensed under the BSD-Clause 2 license. +// See the license.txt file in the project root for more information. + +using System.Diagnostics; +using System.Runtime.CompilerServices; + +namespace System.Text; + +// Based on https://github.com/dotnet/runtime/blob/main/src/libraries/System.Private.CoreLib/src/System/Text/UnicodeUtility.cs +internal static class UnicodeUtility +{ + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsBmpCodePoint(uint value) => value <= 0xFFFFu; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static bool IsValidUnicodeScalar(uint value) + { + return ((value - 0x110000u) ^ 0xD800u) >= 0xFFEF0800u; + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void GetUtf16SurrogatesFromSupplementaryPlaneScalar(uint value, out char highSurrogateCodePoint, out char lowSurrogateCodePoint) + { + Debug.Assert(IsValidUnicodeScalar(value) && IsBmpCodePoint(value)); + + highSurrogateCodePoint = (char)((value + ((0xD800u - 0x40u) << 10)) >> 10); + lowSurrogateCodePoint = (char)((value & 0x3FFu) + 0xDC00u); + } +} diff --git a/src/Markdig/Parsers/FencedBlockParserBase.cs b/src/Markdig/Parsers/FencedBlockParserBase.cs index 1ffab620..07232a0a 100644 --- a/src/Markdig/Parsers/FencedBlockParserBase.cs +++ b/src/Markdig/Parsers/FencedBlockParserBase.cs @@ -40,7 +40,8 @@ public abstract class FencedBlockParserBase : BlockParser, IAttributesParseable /// public abstract class FencedBlockParserBase : FencedBlockParserBase where T : Block, IFencedBlock { - private static readonly TransformedStringCache _infoStringCache = new(static infoString => HtmlHelper.Unescape(infoString)); + private static readonly TransformedStringCache s_infoStringCache = new(static infoString => HtmlHelper.Unescape(infoString)); + private static readonly TransformedStringCache s_argumentsStringCache = new(static argumentsString => HtmlHelper.Unescape(argumentsString)); private TransformedStringCache? _infoPrefixCache; /// @@ -176,7 +177,7 @@ public static bool RoundtripInfoParser(BlockProcessor blockProcessor, ref String end: fenced.TriviaAfterFencedChar = afterFence; - fenced.Info = _infoStringCache.Get(info.AsSpan()); + fenced.Info = s_infoStringCache.Get(info.AsSpan()); fenced.UnescapedInfo = info; fenced.TriviaAfterInfo = afterInfo; fenced.Arguments = HtmlHelper.Unescape(arg.ToString()); @@ -197,71 +198,47 @@ public static bool RoundtripInfoParser(BlockProcessor blockProcessor, ref String /// true if parsing of the line is successfull; false otherwise public static bool DefaultInfoParser(BlockProcessor state, ref StringSlice line, IFencedBlock fenced, char openingCharacter) { - // An info string cannot contain any backticks (unless it is a tilde block) int firstSpace = -1; - if (openingCharacter == '`') + ReadOnlySpan lineSpan = line.AsSpan(); + + if (!lineSpan.IsEmpty) { - for (int i = line.Start; i <= line.End; i++) + if (openingCharacter == '`') { - char c = line.Text[i]; - if (c == '`') - { - return false; - } + firstSpace = lineSpan.IndexOfAny(' ', '\t', '`'); - if (firstSpace < 0 && c.IsSpaceOrTab()) + // An info string cannot contain any backticks (unless it is a tilde block) + if (firstSpace >= 0 && lineSpan.Slice(firstSpace).Contains('`')) { - firstSpace = i; + return false; } } - } - else - { - for (int i = line.Start; i <= line.End; i++) + else { - if (line.Text[i].IsSpaceOrTab()) - { - firstSpace = i; - break; - } + firstSpace = lineSpan.IndexOfAny(' ', '\t'); } } StringSlice infoStringSlice; - string? argString = null; - if (firstSpace > 0) + if (firstSpace >= 0) { + firstSpace += line.Start; infoStringSlice = new StringSlice(line.Text, line.Start, firstSpace - 1); - // Skip any spaces after info string - firstSpace++; - while (firstSpace <= line.End) - { - char c = line[firstSpace]; - if (c.IsSpaceOrTab()) - { - firstSpace++; - } - else - { - break; - } - } - var argStringSlice = new StringSlice(line.Text, firstSpace, line.End); argStringSlice.Trim(); - argString = argStringSlice.ToString(); + fenced.Arguments = s_argumentsStringCache.Get(argStringSlice.AsSpan()); } else { infoStringSlice = line; + fenced.Arguments = string.Empty; } infoStringSlice.Trim(); - fenced.Info = _infoStringCache.Get(infoStringSlice.AsSpan()); - fenced.Arguments = HtmlHelper.Unescape(argString); + fenced.Info = s_infoStringCache.Get(infoStringSlice.AsSpan()); return true; } @@ -303,17 +280,19 @@ public override BlockState TryOpen(BlockProcessor processor) // Try to parse any attached attributes TryParseAttributes?.Invoke(processor, ref line, fenced); - // If the info parser was not successfull, early exit + // If the info parser was not successful, early exit if (InfoParser != null && !InfoParser(processor, ref line, fenced, matchChar)) { return BlockState.None; } // Add the language as an attribute by default - if (!string.IsNullOrEmpty(fenced.Info)) + string? info = fenced.Info; + + if (!string.IsNullOrEmpty(info)) { Debug.Assert(_infoPrefixCache is not null || InfoPrefix is null); - string infoWithPrefix = _infoPrefixCache?.Get(fenced.Info!) ?? fenced.Info!; + string infoWithPrefix = _infoPrefixCache?.Get(info!) ?? info!; fenced.GetAttributes().AddClass(infoWithPrefix); } @@ -329,34 +308,32 @@ public override BlockState TryOpen(BlockProcessor processor) public override BlockState TryContinue(BlockProcessor processor, Block block) { var fence = (IFencedBlock)block; - var openingCount = fence.OpeningFencedCharCount; // Match if we have a closing fence var line = processor.Line; var sourcePosition = processor.Start; var closingCount = line.CountAndSkipChar(fence.FencedChar); - var diff = openingCount - closingCount; char c = line.CurrentChar; - var lastFenceCharPosition = processor.Start + closingCount; // If we have a closing fence, close it and discard the current line // The line must contain only fence opening character followed only by whitespaces. var startBeforeTrim = line.Start; - var endBeforeTrim = line.End; - var trimmed = line.TrimEnd(); - if (diff <= 0 && !processor.IsCodeIndent && (c == '\0' || c.IsWhitespace()) && trimmed) + + if (fence.OpeningFencedCharCount <= closingCount && + !processor.IsCodeIndent && + (c == '\0' || c.IsWhitespace()) && + line.TrimEnd()) { block.UpdateSpanEnd(startBeforeTrim - 1); - var fencedBlock = (IFencedBlock)block; - fencedBlock.ClosingFencedCharCount = closingCount; + fence.ClosingFencedCharCount = closingCount; if (processor.TrackTrivia) { - fencedBlock.NewLine = processor.Line.NewLine; - fencedBlock.TriviaBeforeClosingFence = processor.UseTrivia(sourcePosition - 1); - fencedBlock.TriviaAfter = new StringSlice(processor.Line.Text, lastFenceCharPosition, endBeforeTrim); + fence.NewLine = line.NewLine; + fence.TriviaBeforeClosingFence = processor.UseTrivia(sourcePosition - 1); + fence.TriviaAfter = new StringSlice(line.Text, processor.Start + closingCount, processor.Line.End); } // Don't keep the last line diff --git a/src/Markdig/Parsers/MarkdownParser.cs b/src/Markdig/Parsers/MarkdownParser.cs index 48ecc13f..851c9d22 100644 --- a/src/Markdig/Parsers/MarkdownParser.cs +++ b/src/Markdig/Parsers/MarkdownParser.cs @@ -53,34 +53,11 @@ public static MarkdownDocument Parse(string text, MarkdownPipeline? pipeline = n { blockProcessor.Open(document); - ProcessBlocks(blockProcessor, new LineReader(text)); + ProcessBlocks(blockProcessor, text); if (pipeline.TrackTrivia) { - Block? lastBlock = blockProcessor.LastBlock; - if (lastBlock is null && document.Count == 0) - { - // this means we have unassigned characters - var noBlocksFoundBlock = new EmptyBlock(null); - List linesBefore = blockProcessor.UseLinesBefore(); - noBlocksFoundBlock.LinesAfter = new List(); - if (linesBefore != null) - { - noBlocksFoundBlock.LinesAfter.AddRange(linesBefore); - } - - document.Add(noBlocksFoundBlock); - } - else if (lastBlock != null && blockProcessor.LinesBefore != null) - { - // this means we're out of lines, but still have unassigned empty lines. - // thus, we'll assign the empty unsassigned lines to the last block - // of the document. - var rootMostContainerBlock = Block.FindRootMostContainerParent(lastBlock); - rootMostContainerBlock.LinesAfter ??= new List(); - var linesBefore = blockProcessor.UseLinesBefore(); - rootMostContainerBlock.LinesAfter.AddRange(linesBefore); - } + ProcessBlocksTrivia(blockProcessor, document); } // At this point the LineIndex is the same as the number of lines in the document @@ -117,12 +94,15 @@ private static string FixupZero(string text) return text.Replace('\0', CharHelper.ReplacementChar); } - private static void ProcessBlocks(BlockProcessor blockProcessor, LineReader lineReader) + [MethodImpl(MethodImplOptions.NoInlining)] + private static void ProcessBlocks(BlockProcessor blockProcessor, string text) { + var lineReader = new LineReader(text); + while (true) { - // Get the precise position of the begining of the line - var lineText = lineReader.ReadLine(); + // Get the precise position of the beginning of the line + StringSlice lineText = lineReader.ReadLine(); // If this is the end of file and the last line is empty if (lineText.Text is null) @@ -132,9 +112,39 @@ private static void ProcessBlocks(BlockProcessor blockProcessor, LineReader line blockProcessor.ProcessLine(lineText); } + blockProcessor.CloseAll(true); } + private static void ProcessBlocksTrivia(BlockProcessor blockProcessor, MarkdownDocument document) + { + Block? lastBlock = blockProcessor.LastBlock; + if (lastBlock is null && document.Count == 0) + { + // this means we have unassigned characters + var noBlocksFoundBlock = new EmptyBlock(null); + List linesBefore = blockProcessor.UseLinesBefore(); + noBlocksFoundBlock.LinesAfter = []; + if (linesBefore != null) + { + noBlocksFoundBlock.LinesAfter.AddRange(linesBefore); + } + + document.Add(noBlocksFoundBlock); + } + else if (lastBlock != null && blockProcessor.LinesBefore != null) + { + // this means we're out of lines, but still have unassigned empty lines. + // thus, we'll assign the empty unsassigned lines to the last block + // of the document. + var rootMostContainerBlock = Block.FindRootMostContainerParent(lastBlock); + rootMostContainerBlock.LinesAfter ??= []; + var linesBefore = blockProcessor.UseLinesBefore(); + rootMostContainerBlock.LinesAfter.AddRange(linesBefore); + } + } + + [MethodImpl(MethodImplOptions.NoInlining)] private static void ProcessInlines(InlineProcessor inlineProcessor, MarkdownDocument document) { // "stackless" processor diff --git a/src/Markdig/Polyfills/IndexOfHelpers.cs b/src/Markdig/Polyfills/IndexOfHelpers.cs index bca57be3..88c9357e 100644 --- a/src/Markdig/Polyfills/IndexOfHelpers.cs +++ b/src/Markdig/Polyfills/IndexOfHelpers.cs @@ -41,6 +41,13 @@ public static int IndexOfAny(this ReadOnlySpan span, string values) return -1; } #endif + +#if !NET6_0_OR_GREATER + public static bool Contains(this ReadOnlySpan span, T value) where T : IEquatable + { + return span.IndexOf(value) >= 0; + } +#endif } #endif \ No newline at end of file diff --git a/src/Markdig/Renderers/RendererBase.cs b/src/Markdig/Renderers/RendererBase.cs index fe9ce855..fde4b5d3 100644 --- a/src/Markdig/Renderers/RendererBase.cs +++ b/src/Markdig/Renderers/RendererBase.cs @@ -3,7 +3,7 @@ // See the license.txt file in the project root for more information. using System.Runtime.CompilerServices; - +using System.Runtime.InteropServices; using Markdig.Helpers; using Markdig.Syntax; using Markdig.Syntax.Inlines; @@ -16,31 +16,64 @@ namespace Markdig.Renderers; /// public abstract class RendererBase : IMarkdownRenderer { - private readonly Dictionary _renderersPerType = new(); + private const int SubTableCount = 32; + + private readonly struct RendererEntry + { + public readonly IntPtr Key; + public readonly IMarkdownObjectRenderer? Renderer; + + public RendererEntry(IntPtr key, IMarkdownObjectRenderer? renderer) + { + Key = key; + Renderer = renderer; + } + } + + private readonly RendererEntry[][] _renderersPerType; + internal int _childrenDepth = 0; + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static IntPtr GetKeyForType(MarkdownObject obj) => Type.GetTypeHandle(obj).Value; + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static int SubTableIndex(IntPtr key) => (int)((((ulong)key) / 64) & (SubTableCount - 1)); + /// /// Initializes a new instance of the class. /// - protected RendererBase() { } + protected RendererBase() + { + var entries = _renderersPerType = new RendererEntry[SubTableCount][]; + for (int i = 0; i < entries.Length; i++) + { + entries[i] ??= []; + } + } + [MethodImpl(MethodImplOptions.NoInlining)] private IMarkdownObjectRenderer? GetRendererInstance(MarkdownObject obj) { - KeyWrapper key = GetKeyForType(obj); Type objectType = obj.GetType(); + IMarkdownObjectRenderer? renderer = null; - for (int i = 0; i < ObjectRenderers.Count; i++) + foreach (var potentialRenderer in ObjectRenderers) { - var renderer = ObjectRenderers[i]; - if (renderer.Accept(this, objectType)) + if (potentialRenderer.Accept(this, objectType)) { - _renderersPerType[key] = renderer; - return renderer; + renderer = potentialRenderer; + break; } } - _renderersPerType[key] = null; - return null; + IntPtr key = GetKeyForType(obj); + + ref RendererEntry[] entries = ref _renderersPerType[SubTableIndex(key)]; + Array.Resize(ref entries, entries.Length + 1); + entries[entries.Length - 1] = new RendererEntry(key, renderer); + + return renderer; } public ObjectRendererCollection ObjectRenderers { get; } = new(); @@ -77,12 +110,11 @@ public void WriteChildren(ContainerBlock containerBlock) bool saveIsFirstInContainer = IsFirstInContainer; bool saveIsLastInContainer = IsLastInContainer; - var children = containerBlock; - for (int i = 0; i < children.Count; i++) + for (int i = 0; i < containerBlock.Count; i++) { IsFirstInContainer = i == 0; - IsLastInContainer = i + 1 == children.Count; - Write(children[i]); + IsLastInContainer = i + 1 == containerBlock.Count; + Write(containerBlock[i]); } IsFirstInContainer = saveIsFirstInContainer; @@ -140,11 +172,27 @@ public void Write(MarkdownObject obj) // Calls before writing an object ObjectWriteBefore?.Invoke(this, obj); - if (!_renderersPerType.TryGetValue(GetKeyForType(obj), out IMarkdownObjectRenderer? renderer)) + IMarkdownObjectRenderer? renderer = null; + IntPtr key = GetKeyForType(obj); + +#if NETFRAMEWORK || NETSTANDARD + RendererEntry[] renderers = _renderersPerType[SubTableIndex(key)]; +#else + RendererEntry[] renderers = Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(_renderersPerType), SubTableIndex(key)); +#endif + + foreach (RendererEntry entry in renderers) { - renderer = GetRendererInstance(obj); + if (key == entry.Key) + { + renderer = entry.Renderer; + goto Render; + } } + renderer = GetRendererInstance(obj); + + Render: if (renderer is not null) { renderer.Write(this, obj); @@ -161,24 +209,4 @@ public void Write(MarkdownObject obj) // Calls after writing an object ObjectWriteAfter?.Invoke(this, obj); } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static KeyWrapper GetKeyForType(MarkdownObject obj) - { - IntPtr typeHandle = Type.GetTypeHandle(obj).Value; - return new KeyWrapper(typeHandle); - } - - private readonly struct KeyWrapper : IEquatable - { - public readonly IntPtr Key; - - public KeyWrapper(IntPtr key) => Key = key; - - public bool Equals(KeyWrapper other) => Key == other.Key; - - public override int GetHashCode() => Key.GetHashCode(); - - public override bool Equals(object? obj) => throw new NotImplementedException(); - } } \ No newline at end of file