diff --git a/Gobo.Cli/Program.cs b/Gobo.Cli/Program.cs index d78d591..0cc765f 100644 --- a/Gobo.Cli/Program.cs +++ b/Gobo.Cli/Program.cs @@ -1,6 +1,7 @@ using DocoptNet; using Gobo; using Gobo.Cli; +using Gobo.Text; using System.Diagnostics; const string usage = @@ -152,7 +153,7 @@ static async Task CheckFile( IDictionary arguments ) { - var input = await File.ReadAllTextAsync(filePath); + var input = SourceText.From(await File.ReadAllTextAsync(filePath)); bool success; try diff --git a/Gobo.Tests/Gml/FormattingTests/StringsAndTemplates.expected b/Gobo.Tests/Gml/FormattingTests/StringsAndTemplates.expected index 8be84aa..856d97f 100644 --- a/Gobo.Tests/Gml/FormattingTests/StringsAndTemplates.expected +++ b/Gobo.Tests/Gml/FormattingTests/StringsAndTemplates.expected @@ -1,3 +1,5 @@ str = "\\"; var escaped_template = $"\{{a + b}\}"; + +var more_escapes = $"\"{link_list[link_index]}\""; diff --git a/Gobo.Tests/Gml/FormattingTests/StringsAndTemplates.test b/Gobo.Tests/Gml/FormattingTests/StringsAndTemplates.test index 1cca859..ff16b30 100644 --- a/Gobo.Tests/Gml/FormattingTests/StringsAndTemplates.test +++ b/Gobo.Tests/Gml/FormattingTests/StringsAndTemplates.test @@ -1,3 +1,5 @@ str = "\\" -var escaped_template = $"\{{a+b}\}" \ No newline at end of file +var escaped_template = $"\{{a+b}\}" + +var more_escapes = $"\"{link_list[link_index]}\"" \ No newline at end of file diff --git a/Gobo.Tests/SourceTextTests.cs b/Gobo.Tests/SourceTextTests.cs new file mode 100644 index 0000000..e6e72b6 --- /dev/null +++ b/Gobo.Tests/SourceTextTests.cs @@ -0,0 +1,44 @@ +using Gobo.Text; +using Xunit.Abstractions; +using Xunit.Sdk; + +namespace Gobo.Tests; + +/// +/// These tests ensure that implementations of SourceText work as expected on large files +/// +public class SourceTextTests +{ + private readonly ITestOutputHelper output; + + public const string TestFileExtension = ".test"; + + public SourceTextTests(ITestOutputHelper output) + { + this.output = output; + } + + [Theory] + [ClassData(typeof(SampleFileProvider))] + public async Task EnsureContentEquals(TestFile test) + { + var filePath = test.FilePath; + + var input = await File.ReadAllTextAsync(filePath); + var wrongInput = "obviously wrong input"; + + var sourceTextA = new StringText(input); + var sourceTextB = new StringText(input); + var sourceTextC = new StringText(wrongInput); + + if (!sourceTextA.ContentEquals(sourceTextB)) + { + throw new XunitException($"Comparison failed"); + } + + if (sourceTextA.ContentEquals(sourceTextC)) + { + throw new XunitException($"Something has gone horribly wrong"); + } + } +} diff --git a/Gobo/GmlFormatter.cs b/Gobo/GmlFormatter.cs index 01c40df..1ad74c4 100644 --- a/Gobo/GmlFormatter.cs +++ b/Gobo/GmlFormatter.cs @@ -1,6 +1,7 @@ using Gobo.Parser; using Gobo.Printer.DocPrinter; using Gobo.SyntaxNodes; +using Gobo.Text; using System.Diagnostics; namespace Gobo; @@ -48,7 +49,12 @@ public override string ToString() public static partial class GmlFormatter { - public static FormatResult Format(string code, FormatOptions options) + public static FormatResult Format(string text, FormatOptions options) + { + return Format(SourceText.From(text), options); + } + + public static FormatResult Format(SourceText code, FormatOptions options) { long parseStart = 0; long parseStop = 0; @@ -134,7 +140,7 @@ public static FormatResult Format(string code, FormatOptions options) try { - updatedParseResult = new GmlParser(output).Parse(); + updatedParseResult = new GmlParser(SourceText.From(output)).Parse(); } catch (GmlSyntaxErrorException ex) { @@ -176,10 +182,10 @@ out var difference } } - public static bool Check(string code, FormatOptions options) + public static bool Check(SourceText code, FormatOptions options) { var result = Format(code, options); - return result.Output == code; + return SourceText.From(result.Output).ContentEquals(code); } public static async Task FormatFileAsync(string filePath, FormatOptions options) @@ -189,7 +195,7 @@ public static async Task FormatFileAsync(string filePath, FormatOptions options) try { - var result = Format(input, options); + var result = Format(SourceText.From(input), options); formatted = result.Output; } catch (Exception) diff --git a/Gobo/Gobo.csproj b/Gobo/Gobo.csproj index 20a46d5..f5bb22c 100644 --- a/Gobo/Gobo.csproj +++ b/Gobo/Gobo.csproj @@ -7,6 +7,7 @@ true false gobolib + true diff --git a/Gobo/Parser/CommentMapper.cs b/Gobo/Parser/CommentMapper.cs index dd3decf..7cfebb3 100644 --- a/Gobo/Parser/CommentMapper.cs +++ b/Gobo/Parser/CommentMapper.cs @@ -1,14 +1,15 @@ using Gobo.SyntaxNodes; +using Gobo.Text; namespace Gobo.Parser; internal class CommentMapper { - public string SourceText { get; set; } + public SourceText SourceText { get; set; } public List CommentGroups { get; set; } = new(); - public CommentMapper(string sourceText, List triviaGroups) + public CommentMapper(SourceText sourceText, List triviaGroups) { SourceText = sourceText; foreach (var triviaGroup in triviaGroups) diff --git a/Gobo/Parser/GmlLexer.cs b/Gobo/Parser/GmlLexer.cs index 70c1ad4..1bc46a8 100644 --- a/Gobo/Parser/GmlLexer.cs +++ b/Gobo/Parser/GmlLexer.cs @@ -1,4 +1,5 @@ -using System.Runtime.CompilerServices; +using Gobo.Text; +using System.Runtime.CompilerServices; namespace Gobo.Parser; @@ -14,19 +15,17 @@ internal enum LexerMode public bool HitEof { get; private set; } = false; public LexerMode Mode { get; set; } = LexerMode.Default; - private readonly string text; + private readonly SourceText sourceText; private int lineNumber; private int columnNumber; private int startIndex; private int index; private int character; - private string CurrentToken => text[startIndex..index]; + private string CurrentToken => sourceText.ReadSpan(startIndex, index); - private static readonly char[] whitespaces = { '\u000B', '\u000C', '\u0020', '\u00A0', '\t' }; - - public GmlLexer(string text) + public GmlLexer(SourceText source) { - this.text = text; + sourceText = source; index = 0; lineNumber = 1; } @@ -82,7 +81,7 @@ public Token NextToken() case '\t': while (true) { - if (!MatchAny(whitespaces)) + if (!MatchAnyWhitespace()) { break; } @@ -594,40 +593,45 @@ private bool Match(int expected) return false; } - private bool MatchAny(char[] expected) + private bool MatchAnyWhitespace() { var next = Peek(); - if (Array.Exists(expected, c => next == c)) + bool matched = next switch + { + '\u000B' or '\u000C' or '\u0020' or '\u00A0' or '\t' => true, + _ => false + }; + + if (matched) { Advance(); - return true; } - return false; + return matched; } [MethodImpl(MethodImplOptions.AggressiveInlining)] private int Peek(int amount = 1) { var targetIndex = index + amount - 1; - if (targetIndex >= text.Length) + if (targetIndex >= sourceText.Length) { return -1; } - return text[targetIndex]; + return sourceText[targetIndex]; } private void Advance() { - if (index >= text.Length) + if (index >= sourceText.Length) { HitEof = true; character = -1; return; } - character = text[index]; + character = sourceText[index]; index++; switch (character) diff --git a/Gobo/Parser/GmlParser.cs b/Gobo/Parser/GmlParser.cs index 5c81481..b9e3d60 100644 --- a/Gobo/Parser/GmlParser.cs +++ b/Gobo/Parser/GmlParser.cs @@ -1,6 +1,7 @@ using Gobo.SyntaxNodes; using Gobo.SyntaxNodes.Gml; using Gobo.SyntaxNodes.Gml.Literals; +using Gobo.Text; namespace Gobo.Parser; @@ -112,9 +113,9 @@ internal class GmlParser private delegate bool BinaryExpressionRule(out GmlSyntaxNode node); - public GmlParser(string code) + public GmlParser(SourceText sourceText) { - lexer = new GmlLexer(code); + lexer = new GmlLexer(sourceText); token = lexer.NextToken(); ProcessToken(token); } diff --git a/Gobo/PrintContext.cs b/Gobo/PrintContext.cs index 0b19d80..d23c7f8 100644 --- a/Gobo/PrintContext.cs +++ b/Gobo/PrintContext.cs @@ -1,11 +1,13 @@ -namespace Gobo; +using Gobo.Text; + +namespace Gobo; internal class PrintContext { public FormatOptions Options { get; init; } - public string SourceText { get; init; } + public SourceText SourceText { get; init; } - public PrintContext(FormatOptions options, string sourceText) + public PrintContext(FormatOptions options, SourceText sourceText) { Options = options; SourceText = sourceText; diff --git a/Gobo/StringExtensions.cs b/Gobo/StringExtensions.cs deleted file mode 100644 index ed9d4af..0000000 --- a/Gobo/StringExtensions.cs +++ /dev/null @@ -1,68 +0,0 @@ -namespace Gobo; - -internal static class StringExtensions -{ - public static string ReadSpan(this string text, TextSpan span) - { - return text.Substring(span.Start, span.Length); - } - - public static string ReadSpan(this string text, int start, int end) - { - return text[start..end]; - } - - public static int GetLineBreaksToLeft(this string text, TextSpan span) - { - var start = span.Start - 1; - - if (start <= 0) - { - return 0; - } - - var lineBreakCount = 0; - - for (var index = start; index >= 0; index--) - { - var character = text[index]; - if (character == '\n') - { - lineBreakCount++; - } - else if (!char.IsWhiteSpace(character)) - { - break; - } - } - - return lineBreakCount; - } - - public static int GetLineBreaksToRight(this string text, TextSpan span) - { - var end = span.End; - - if (end >= text.Length - 1) - { - return 0; - } - - var lineBreakCount = 0; - - for (var index = end; index < text.Length; index++) - { - var character = text[index]; - if (character == '\n') - { - lineBreakCount++; - } - else if (!char.IsWhiteSpace(character)) - { - break; - } - } - - return lineBreakCount; - } -} diff --git a/Gobo/Text/ObjectPool.cs b/Gobo/Text/ObjectPool.cs new file mode 100644 index 0000000..27ffc8b --- /dev/null +++ b/Gobo/Text/ObjectPool.cs @@ -0,0 +1,295 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +// define TRACE_LEAKS to get additional diagnostics that can lead to the leak sources. note: it will +// make everything about 2-3x slower +// +// #define TRACE_LEAKS + +// define DETECT_LEAKS to detect possible leaks +// #if DEBUG +// #define DETECT_LEAKS //for now always enable DETECT_LEAKS in debug. +// #endif + +using System.Diagnostics; + +#if DETECT_LEAKS +using System.Runtime.CompilerServices; +#endif + +namespace Gobo.Text; + +/// +/// Generic implementation of object pooling pattern with predefined pool size limit. The main +/// purpose is that limited number of frequently used objects can be kept in the pool for +/// further recycling. +/// +/// Notes: +/// 1) it is not the goal to keep all returned objects. Pool is not meant for storage. If there +/// is no space in the pool, extra returned objects will be dropped. +/// +/// 2) it is implied that if object was obtained from a pool, the caller will return it back in +/// a relatively short time. Keeping checked out objects for long durations is ok, but +/// reduces usefulness of pooling. Just new up your own. +/// +/// Not returning objects to the pool in not detrimental to the pool's work, but is a bad practice. +/// Rationale: +/// If there is no intent for reusing the object, do not use pool - just use "new". +/// +internal class ObjectPool + where T : class +{ + [DebuggerDisplay("{Value,nq}")] + private struct Element + { + internal T? Value; + } + + /// + /// Not using System.Func{T} because this file is linked into the (debugger) Formatter, + /// which does not have that type (since it compiles against .NET 2.0). + /// + internal delegate T Factory(); + + // Storage for the pool objects. The first item is stored in a dedicated field because we + // expect to be able to satisfy most requests from it. + private T? _firstItem; + private readonly Element[] _items; + + // factory is stored for the lifetime of the pool. We will call this only when pool needs to + // expand. compared to "new T()", Func gives more flexibility to implementers and faster + // than "new T()". + private readonly Factory _factory; + + public readonly bool TrimOnFree; + +#if DETECT_LEAKS + private static readonly ConditionalWeakTable leakTrackers = + new ConditionalWeakTable(); + + private class LeakTracker : IDisposable + { + private volatile bool disposed; + +#if TRACE_LEAKS + internal volatile object Trace = null; +#endif + + public void Dispose() + { + disposed = true; + GC.SuppressFinalize(this); + } + + private string GetTrace() + { +#if TRACE_LEAKS + return Trace == null ? "" : Trace.ToString(); +#else + return "Leak tracing information is disabled. Define TRACE_LEAKS on ObjectPool`1.cs to get more info \n"; +#endif + } + + ~LeakTracker() + { + if (!this.disposed && !Environment.HasShutdownStarted) + { + var trace = GetTrace(); + + // If you are seeing this message it means that object has been allocated from the pool + // and has not been returned back. This is not critical, but turns pool into rather + // inefficient kind of "new". + Debug.WriteLine( + $"TRACEOBJECTPOOLLEAKS_BEGIN\nPool detected potential leaking of {typeof(T)}. \n Location of the leak: \n {GetTrace()} TRACEOBJECTPOOLLEAKS_END" + ); + } + } + } +#endif + + internal ObjectPool(Factory factory, bool trimOnFree = true) + : this(factory, Environment.ProcessorCount * 2, trimOnFree) { } + + internal ObjectPool(Factory factory, int size, bool trimOnFree = true) + { + Debug.Assert(size >= 1); + _factory = factory; + _items = new Element[size - 1]; + TrimOnFree = trimOnFree; + } + + internal ObjectPool(Func, T> factory, int size) + { + Debug.Assert(size >= 1); + _factory = () => factory(this); + _items = new Element[size - 1]; + } + + private T CreateInstance() + { + var inst = _factory(); + return inst; + } + + /// + /// Produces an instance. + /// + /// + /// Search strategy is a simple linear probing which is chosen for it cache-friendliness. + /// Note that Free will try to store recycled objects close to the start thus statistically + /// reducing how far we will typically search. + /// + internal T Allocate() + { + // PERF: Examine the first element. If that fails, AllocateSlow will look at the remaining elements. + // Note that the initial read is optimistically not synchronized. That is intentional. + // We will interlock only when we have a candidate. in a worst case we may miss some + // recently returned objects. Not a big deal. + var inst = _firstItem; + if (inst == null || inst != Interlocked.CompareExchange(ref _firstItem, null, inst)) + { + inst = AllocateSlow(); + } + +#if DETECT_LEAKS + var tracker = new LeakTracker(); + leakTrackers.Add(inst, tracker); + +#if TRACE_LEAKS + var frame = CaptureStackTrace(); + tracker.Trace = frame; +#endif +#endif + return inst; + } + + private T AllocateSlow() + { + var items = _items; + + for (var i = 0; i < items.Length; i++) + { + // Note that the initial read is optimistically not synchronized. That is intentional. + // We will interlock only when we have a candidate. in a worst case we may miss some + // recently returned objects. Not a big deal. + var inst = items[i].Value; + if (inst != null) + { + if (inst == Interlocked.CompareExchange(ref items[i].Value, null, inst)) + { + return inst; + } + } + } + + return CreateInstance(); + } + + /// + /// Returns objects to the pool. + /// + /// + /// Search strategy is a simple linear probing which is chosen for it cache-friendliness. + /// Note that Free will try to store recycled objects close to the start thus statistically + /// reducing how far we will typically search in Allocate. + /// + internal void Free(T obj) + { + Validate(obj); + ForgetTrackedObject(obj); + + if (_firstItem == null) + { + // Intentionally not using interlocked here. + // In a worst case scenario two objects may be stored into same slot. + // It is very unlikely to happen and will only mean that one of the objects will get collected. + _firstItem = obj; + } + else + { + FreeSlow(obj); + } + } + + private void FreeSlow(T obj) + { + var items = _items; + for (var i = 0; i < items.Length; i++) + { + if (items[i].Value == null) + { + // Intentionally not using interlocked here. + // In a worst case scenario two objects may be stored into same slot. + // It is very unlikely to happen and will only mean that one of the objects will get collected. + items[i].Value = obj; + break; + } + } + } + + /// + /// Removes an object from leak tracking. + /// + /// This is called when an object is returned to the pool. It may also be explicitly + /// called if an object allocated from the pool is intentionally not being returned + /// to the pool. This can be of use with pooled arrays if the consumer wants to + /// return a larger array to the pool than was originally allocated. + /// + [Conditional("DEBUG")] + internal void ForgetTrackedObject(T old, T? replacement = null) + { +#if DETECT_LEAKS + LeakTracker tracker; + if (leakTrackers.TryGetValue(old, out tracker)) + { + tracker.Dispose(); + leakTrackers.Remove(old); + } + else + { + var trace = CaptureStackTrace(); + Debug.WriteLine( + $"TRACEOBJECTPOOLLEAKS_BEGIN\nObject of type {typeof(T)} was freed, but was not from pool. \n Callstack: \n {trace} TRACEOBJECTPOOLLEAKS_END" + ); + } + + if (replacement != null) + { + tracker = new LeakTracker(); + leakTrackers.Add(replacement, tracker); + } +#endif + } + +#if DETECT_LEAKS + private static Lazy _stackTraceType = new Lazy( + () => Type.GetType("System.Diagnostics.StackTrace") + ); + + private static object CaptureStackTrace() + { + return Activator.CreateInstance(_stackTraceType.Value); + } +#endif + + [Conditional("DEBUG")] + private void Validate(object obj) + { + Debug.Assert(obj != null, "freeing null?"); + + Debug.Assert(_firstItem != obj, "freeing twice?"); + + var items = _items; + for (var i = 0; i < items.Length; i++) + { + var value = items[i].Value; + if (value == null) + { + return; + } + + Debug.Assert(value != obj, "freeing twice?"); + } + } +} diff --git a/Gobo/Text/SourceText.cs b/Gobo/Text/SourceText.cs new file mode 100644 index 0000000..57851cc --- /dev/null +++ b/Gobo/Text/SourceText.cs @@ -0,0 +1,146 @@ +using System.Diagnostics; + +namespace Gobo.Text; + +public abstract class SourceText +{ + public abstract char this[int position] { get; } + public abstract int Length { get; } + + private const int CharBufferSize = 32 * 1024; + private const int CharBufferCount = 5; + + private static readonly ObjectPool s_charArrayPool = new ObjectPool( + () => new char[CharBufferSize], + CharBufferCount + ); + + public abstract string ReadSpan(TextSpan span); + + public abstract void CopyTo( + int sourceIndex, + char[] destination, + int destinationIndex, + int count + ); + + public string ReadSpan(int start, int end) + { + return ReadSpan(new TextSpan(start, end)); + } + + public static SourceText From(string text) + { + return new StringText(text); + } + + /// + /// Implements equality comparison of the content of two different instances of . + /// + public virtual bool ContentEquals(SourceText other) + { + if (ReferenceEquals(this, other)) + { + return true; + } + + if (other == null) + { + return false; + } + + if (ReferenceEquals(this, other)) + { + return true; + } + + if (this.Length != other.Length) + { + return false; + } + + var buffer1 = s_charArrayPool.Allocate(); + var buffer2 = s_charArrayPool.Allocate(); + Debug.Assert(buffer1.Length == buffer2.Length); + Debug.Assert(buffer1.Length == CharBufferSize); + + try + { + for ( + int position = 0, length = this.Length; + position < length; + position += CharBufferSize + ) + { + var count = Math.Min(this.Length - position, CharBufferSize); + this.CopyTo(sourceIndex: position, buffer1, destinationIndex: 0, count); + other.CopyTo(sourceIndex: position, buffer2, destinationIndex: 0, count); + + if (!buffer1.AsSpan(0, count).SequenceEqual(buffer2.AsSpan(0, count))) + { + return false; + } + } + + return true; + } + finally + { + s_charArrayPool.Free(buffer2); + s_charArrayPool.Free(buffer1); + } + } + + public virtual int GetLineBreaksToLeft(TextSpan span) + { + var start = span.Start - 1; + + if (start <= 0) + { + return 0; + } + + var lineBreakCount = 0; + + for (var index = start; index >= 0; index--) + { + var character = this[index]; + if (character == '\n') + { + lineBreakCount++; + } + else if (!char.IsWhiteSpace(character)) + { + break; + } + } + + return lineBreakCount; + } + + public virtual int GetLineBreaksToRight(TextSpan span) + { + var end = span.End; + + if (end >= Length - 1) + { + return 0; + } + var lineBreakCount = 0; + + for (var index = end; index < Length; index++) + { + var character = this[index]; + if (character == '\n') + { + lineBreakCount++; + } + else if (!char.IsWhiteSpace(character)) + { + break; + } + } + + return lineBreakCount; + } +} diff --git a/Gobo/Text/StringText.cs b/Gobo/Text/StringText.cs new file mode 100644 index 0000000..67bc34d --- /dev/null +++ b/Gobo/Text/StringText.cs @@ -0,0 +1,30 @@ +namespace Gobo.Text; + +public class StringText : SourceText +{ + public string Source; + + public override int Length => Source.Length; + + public override char this[int position] => Source[position]; + + public StringText(string code) + { + Source = code; + } + + public override string ReadSpan(TextSpan span) + { + return Source.Substring(span.Start, span.Length); + } + + public override void CopyTo( + int sourceIndex, + char[] destination, + int destinationIndex, + int count + ) + { + Source.CopyTo(sourceIndex, destination, destinationIndex, count); + } +}