Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce more efficient internal representation of a sequence VirtualChars #33834

Merged
merged 28 commits into from
Mar 5, 2019
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,12 @@ public async Task<ImmutableArray<DocumentHighlights>> GetDocumentHighlightsAsync
var tree = await _language.TryGetTreeAtPositionAsync(document, position, cancellationToken).ConfigureAwait(false);
return tree == null
? default
: ImmutableArray.Create(new DocumentHighlights(document, GetHighlights(document, tree, position)));
: ImmutableArray.Create(new DocumentHighlights(document, GetHighlights(tree, position)));
}

private ImmutableArray<HighlightSpan> GetHighlights(
Document document, RegexTree tree, int positionInDocument)
private ImmutableArray<HighlightSpan> GetHighlights(RegexTree tree, int positionInDocument)
{
var referencesOnTheRight = GetReferences(document, tree, positionInDocument, caretOnLeft: true);
var referencesOnTheRight = GetReferences(tree, positionInDocument, caretOnLeft: true);
if (!referencesOnTheRight.IsEmpty)
{
return referencesOnTheRight;
Expand All @@ -56,12 +55,12 @@ private ImmutableArray<HighlightSpan> GetHighlights(

// Nothing was on the right of the caret. Return anything we were able to find on
// the left of the caret.
var referencesOnTheLeft = GetReferences(document, tree, positionInDocument - 1, caretOnLeft: false);
var referencesOnTheLeft = GetReferences(tree, positionInDocument - 1, caretOnLeft: false);
return referencesOnTheLeft;
}

private ImmutableArray<HighlightSpan> GetReferences(
Document document, RegexTree tree, int position, bool caretOnLeft)
RegexTree tree, int position, bool caretOnLeft)
{
var virtualChar = tree.Text.FirstOrNullable(vc => vc.Span.Contains(position));
if (virtualChar == null)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ internal class CSharpVirtualCharService : AbstractVirtualCharService
protected override bool IsStringLiteralToken(SyntaxToken token)
=> token.Kind() == SyntaxKind.StringLiteralToken;

protected override ImmutableArray<VirtualChar> TryConvertToVirtualCharsWorker(SyntaxToken token)
protected override VirtualCharSequence TryConvertToVirtualCharsWorker(SyntaxToken token)
{
// C# preprocessor directives can contain string literals. However, these string
// literals do not behave like normal literals. Because they are used for paths (i.e.
Expand Down Expand Up @@ -80,10 +80,10 @@ private bool IsInDirective(SyntaxNode node)
return false;
}

private ImmutableArray<VirtualChar> TryConvertVerbatimStringToVirtualChars(SyntaxToken token, string startDelimiter, string endDelimiter, bool escapeBraces)
private VirtualCharSequence TryConvertVerbatimStringToVirtualChars(SyntaxToken token, string startDelimiter, string endDelimiter, bool escapeBraces)
=> TryConvertSimpleDoubleQuoteString(token, startDelimiter, endDelimiter, escapeBraces);

private ImmutableArray<VirtualChar> TryConvertStringToVirtualChars(
private VirtualCharSequence TryConvertStringToVirtualChars(
SyntaxToken token, string startDelimiter, string endDelimiter, bool escapeBraces)
{
var tokenText = token.Text;
Expand Down Expand Up @@ -133,7 +133,8 @@ private ImmutableArray<VirtualChar> TryConvertStringToVirtualChars(
}
}

return result.ToImmutable();
return CreateVirtualCharSequence(
tokenText, startIndexInclusive, endIndexExclusive, result, offset);
}
finally
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ private void TryParseSubTrees(
}
}

private (SyntaxToken, RegexTree, ImmutableArray<VirtualChar>) JustParseTree(
private (SyntaxToken, RegexTree, VirtualCharSequence) JustParseTree(
string stringText, RegexOptions options, bool conversionFailureOk)
{
var token = GetStringToken(stringText);
Expand Down Expand Up @@ -252,15 +252,15 @@ private XElement TriviaToElement(RegexTrivia trivia)
trivia.Kind.ToString(),
trivia.VirtualChars.CreateString());

private void CheckInvariants(RegexTree tree, ImmutableArray<VirtualChar> allChars)
private void CheckInvariants(RegexTree tree, VirtualCharSequence allChars)
{
var root = tree.Root;
var position = 0;
CheckInvariants(root, ref position, allChars);
Assert.Equal(allChars.Length, position);
}

private void CheckInvariants(RegexNode node, ref int position, ImmutableArray<VirtualChar> allChars)
private void CheckInvariants(RegexNode node, ref int position, VirtualCharSequence allChars)
{
foreach (var child in node)
{
Expand All @@ -275,21 +275,21 @@ private void CheckInvariants(RegexNode node, ref int position, ImmutableArray<Vi
}
}

private void CheckInvariants(RegexToken token, ref int position, ImmutableArray<VirtualChar> allChars)
private void CheckInvariants(RegexToken token, ref int position, VirtualCharSequence allChars)
{
CheckInvariants(token.LeadingTrivia, ref position, allChars);
CheckCharacters(token.VirtualChars, ref position, allChars);
}

private void CheckInvariants(ImmutableArray<RegexTrivia> leadingTrivia, ref int position, ImmutableArray<VirtualChar> allChars)
private void CheckInvariants(ImmutableArray<RegexTrivia> leadingTrivia, ref int position, VirtualCharSequence allChars)
{
foreach (var trivia in leadingTrivia)
{
CheckInvariants(trivia, ref position, allChars);
}
}

private void CheckInvariants(RegexTrivia trivia, ref int position, ImmutableArray<VirtualChar> allChars)
private void CheckInvariants(RegexTrivia trivia, ref int position, VirtualCharSequence allChars)
{
switch (trivia.Kind)
{
Expand All @@ -304,7 +304,7 @@ private void CheckInvariants(RegexTrivia trivia, ref int position, ImmutableArra
CheckCharacters(trivia.VirtualChars, ref position, allChars);
}

private static void CheckCharacters(ImmutableArray<VirtualChar> virtualChars, ref int position, ImmutableArray<VirtualChar> allChars)
private static void CheckCharacters(VirtualCharSequence virtualChars, ref int position, VirtualCharSequence allChars)
{
for (var i = 0; i < virtualChars.Length; i++)
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
// Copyright (c) Microsoft. All Rights Reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System;
using System.Collections.Immutable;
using System.Linq;
using Microsoft.CodeAnalysis.CSharp.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.PooledObjects;
using Xunit;

namespace Microsoft.CodeAnalysis.CSharp.UnitTests.EmbeddedLanguages.VirtualChars
Expand Down Expand Up @@ -76,6 +75,12 @@ public void TestSimpleString()
Test("\"a\"", "['a',[1,2]]");
}

[Fact]
public void TestSimpleMultiCharString()
{
Test("\"abc\"", "['a',[1,2]]['b',[2,3]]['c',[3,4]]");
}

[Fact]
public void TestBracesInSimpleString()
{
Expand Down Expand Up @@ -258,8 +263,16 @@ public void TestEscapedQuoteInVerbatimString()
Test("@\"a\"\"a\"", @"['a',[2,3]]['\u0022',[3,5]]['a',[5,6]]");
}

private string ConvertToString(ImmutableArray<VirtualChar> virtualChars)
=> string.Join("", virtualChars.Select(ConvertToString));
private string ConvertToString(VirtualCharSequence virtualChars)
{
var strings = ArrayBuilder<string>.GetInstance();
foreach (var ch in virtualChars)
{
strings.Add(ConvertToString(ch));
}

return string.Join("", strings.ToImmutableAndFree());
}

private string ConvertToString(VirtualChar vc)
=> $"[{ConvertToString(vc.Char)},[{vc.Span.Start - _statementPrefix.Length},{vc.Span.End - _statementPrefix.Length}]]";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ internal static class EmbeddedSyntaxHelpers
public static TextSpan GetSpan<TSyntaxKind>(EmbeddedSyntaxToken<TSyntaxKind> token1, EmbeddedSyntaxToken<TSyntaxKind> token2) where TSyntaxKind : struct
=> GetSpan(token1.VirtualChars[0], token2.VirtualChars.Last());

public static TextSpan GetSpan(ImmutableArray<VirtualChar> virtualChars)
public static TextSpan GetSpan(VirtualCharSequence virtualChars)
=> GetSpan(virtualChars[0], virtualChars.Last());

public static TextSpan GetSpan(VirtualChar firstChar, VirtualChar lastChar)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ internal struct EmbeddedSyntaxToken<TSyntaxKind> where TSyntaxKind : struct
{
public readonly TSyntaxKind Kind;
public readonly ImmutableArray<EmbeddedSyntaxTrivia<TSyntaxKind>> LeadingTrivia;
public readonly ImmutableArray<VirtualChar> VirtualChars;
public readonly VirtualCharSequence VirtualChars;
public readonly ImmutableArray<EmbeddedSyntaxTrivia<TSyntaxKind>> TrailingTrivia;
internal readonly ImmutableArray<EmbeddedDiagnostic> Diagnostics;

Expand All @@ -24,7 +24,7 @@ internal struct EmbeddedSyntaxToken<TSyntaxKind> where TSyntaxKind : struct
public EmbeddedSyntaxToken(
TSyntaxKind kind,
ImmutableArray<EmbeddedSyntaxTrivia<TSyntaxKind>> leadingTrivia,
ImmutableArray<VirtualChar> virtualChars,
VirtualCharSequence virtualChars,
ImmutableArray<EmbeddedSyntaxTrivia<TSyntaxKind>> trailingTrivia,
ImmutableArray<EmbeddedDiagnostic> diagnostics, object value)
{
Expand All @@ -51,7 +51,7 @@ public EmbeddedSyntaxToken<TSyntaxKind> WithDiagnostics(ImmutableArray<EmbeddedD
public EmbeddedSyntaxToken<TSyntaxKind> With(
Optional<TSyntaxKind> kind = default,
Optional<ImmutableArray<EmbeddedSyntaxTrivia<TSyntaxKind>>> leadingTrivia = default,
Optional<ImmutableArray<VirtualChar>> virtualChars = default,
Optional<VirtualCharSequence> virtualChars = default,
Optional<ImmutableArray<EmbeddedSyntaxTrivia<TSyntaxKind>>> trailingTrivia = default,
Optional<ImmutableArray<EmbeddedDiagnostic>> diagnostics = default,
Optional<object> value = default)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,12 @@ internal abstract class EmbeddedSyntaxTree<TSyntaxKind, TSyntaxNode, TCompilatio
where TSyntaxNode : EmbeddedSyntaxNode<TSyntaxKind, TSyntaxNode>
where TCompilationUnitSyntax : TSyntaxNode
{
public readonly ImmutableArray<VirtualChar> Text;
public readonly VirtualCharSequence Text;
public readonly TCompilationUnitSyntax Root;
public readonly ImmutableArray<EmbeddedDiagnostic> Diagnostics;

protected EmbeddedSyntaxTree(
ImmutableArray<VirtualChar> text,
VirtualCharSequence text,
TCompilationUnitSyntax root,
ImmutableArray<EmbeddedDiagnostic> diagnostics)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,15 @@ namespace Microsoft.CodeAnalysis.EmbeddedLanguages.Common
internal struct EmbeddedSyntaxTrivia<TSyntaxKind> where TSyntaxKind : struct
{
public readonly TSyntaxKind Kind;
public readonly ImmutableArray<VirtualChar> VirtualChars;
public readonly VirtualCharSequence VirtualChars;

/// <summary>
/// A place for diagnostics to be stored during parsing. Not intended to be accessed
/// directly. These will be collected and aggregated into <see cref="EmbeddedSyntaxTree{TNode, TRoot, TSyntaxKind}.Diagnostics"/>
/// </summary>
internal readonly ImmutableArray<EmbeddedDiagnostic> Diagnostics;

public EmbeddedSyntaxTrivia(TSyntaxKind kind, ImmutableArray<VirtualChar> virtualChars, ImmutableArray<EmbeddedDiagnostic> diagnostics)
public EmbeddedSyntaxTrivia(TSyntaxKind kind, VirtualCharSequence virtualChars, ImmutableArray<EmbeddedDiagnostic> diagnostics)
{
Debug.Assert(virtualChars.Length > 0);
Kind = kind;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,7 @@ public RegexTree TryParseRegexPattern(SyntaxToken token, CancellationToken cance
}

var chars = _info.VirtualCharService.TryConvertToVirtualChars(token);
return chars.IsDefault ? null : RegexParser.TryParse(chars, options);
return RegexParser.TryParse(chars, options);
}

private bool AnalyzeStringLiteral(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
using Microsoft.CodeAnalysis.Classification.Classifiers;
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
using Microsoft.CodeAnalysis.EmbeddedLanguages.LanguageServices;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
using Microsoft.CodeAnalysis.PooledObjects;

namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions.LanguageServices
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,16 @@ internal static class RegexHelpers
public static bool HasOption(RegexOptions options, RegexOptions val)
=> (options & val) != 0;

public static RegexToken CreateToken(RegexKind kind, ImmutableArray<RegexTrivia> leadingTrivia, ImmutableArray<VirtualChar> virtualChars)
public static RegexToken CreateToken(RegexKind kind, ImmutableArray<RegexTrivia> leadingTrivia, VirtualCharSequence virtualChars)
=> new RegexToken(kind, leadingTrivia, virtualChars, ImmutableArray<RegexTrivia>.Empty, ImmutableArray<EmbeddedDiagnostic>.Empty, value: null);

public static RegexToken CreateMissingToken(RegexKind kind)
=> CreateToken(kind, ImmutableArray<RegexTrivia>.Empty, ImmutableArray<VirtualChar>.Empty);
=> CreateToken(kind, ImmutableArray<RegexTrivia>.Empty, VirtualCharSequence.Empty);

public static RegexTrivia CreateTrivia(RegexKind kind, ImmutableArray<VirtualChar> virtualChars)
public static RegexTrivia CreateTrivia(RegexKind kind, VirtualCharSequence virtualChars)
=> CreateTrivia(kind, virtualChars, ImmutableArray<EmbeddedDiagnostic>.Empty);

public static RegexTrivia CreateTrivia(RegexKind kind, ImmutableArray<VirtualChar> virtualChars, ImmutableArray<EmbeddedDiagnostic> diagnostics)
public static RegexTrivia CreateTrivia(RegexKind kind, VirtualCharSequence virtualChars, ImmutableArray<EmbeddedDiagnostic> diagnostics)
=> new RegexTrivia(kind, virtualChars, diagnostics);

/// <summary>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions
{
using static EmbeddedSyntaxHelpers;
using static RegexHelpers;

using RegexToken = EmbeddedSyntaxToken<RegexKind>;
Expand All @@ -37,42 +36,34 @@ namespace Microsoft.CodeAnalysis.EmbeddedLanguages.RegularExpressions
/// </summary>
internal struct RegexLexer
{
public readonly ImmutableArray<VirtualChar> Text;
public readonly VirtualCharSequence Text;
public int Position;

public RegexLexer(ImmutableArray<VirtualChar> text) : this()
public RegexLexer(VirtualCharSequence text) : this()
{
Text = text;
}

public VirtualChar CurrentChar => Position < Text.Length ? Text[Position] : new VirtualChar((char)0, default);

public ImmutableArray<VirtualChar> GetSubPatternToCurrentPos(int start)
public VirtualCharSequence GetSubPatternToCurrentPos(int start)
=> GetSubPattern(start, Position);

public ImmutableArray<VirtualChar> GetSubPattern(int start, int end)
{
var result = ArrayBuilder<VirtualChar>.GetInstance(end - start);
for (var i = start; i < end; i++)
{
result.Add(Text[i]);
}

return result.ToImmutableAndFree();
}
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Large source of allocations removed.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

now, when a Token/Trivia want to point at a range of characters, they can do it in an alloc-free manner. They just et a VirtualCharSequence (A struct) which is a sub-span of the original VirtualCharSequence.

public VirtualCharSequence GetSubPattern(int start, int end)
=> Text.GetSubSequence(TextSpan.FromBounds(start, end));

public RegexToken ScanNextToken(bool allowTrivia, RegexOptions options)
{
var trivia = ScanLeadingTrivia(allowTrivia, options);
if (Position == Text.Length)
{
return CreateToken(RegexKind.EndOfFile, trivia, ImmutableArray<VirtualChar>.Empty);
return CreateToken(RegexKind.EndOfFile, trivia, VirtualCharSequence.Empty);
}

var ch = this.CurrentChar;
Position++;

return CreateToken(GetKind(ch), trivia, ImmutableArray.Create(ch));
return CreateToken(GetKind(ch), trivia, Text.GetSubSequence(new TextSpan(Position - 1, 1)));
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

allocation removed.

}

private static RegexKind GetKind(char ch)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

using System.Collections.Immutable;
using System.Diagnostics;
using System.Linq;
using System.Text.RegularExpressions;
using Microsoft.CodeAnalysis.EmbeddedLanguages.Common;
using Microsoft.CodeAnalysis.EmbeddedLanguages.VirtualChars;
Expand All @@ -25,13 +24,13 @@ internal partial struct RegexParser
/// </summary>
private struct CaptureInfoAnalyzer
{
private readonly ImmutableArray<VirtualChar> _text;
private readonly VirtualCharSequence _text;
private readonly ImmutableDictionary<int, TextSpan>.Builder _captureNumberToSpan;
private readonly ImmutableDictionary<string, TextSpan>.Builder _captureNameToSpan;
private readonly ArrayBuilder<string> _captureNames;
private int _autoNumber;

private CaptureInfoAnalyzer(ImmutableArray<VirtualChar> text)
private CaptureInfoAnalyzer(VirtualCharSequence text)
{
_text = text;
_captureNumberToSpan = ImmutableDictionary.CreateBuilder<int, TextSpan>();
Expand All @@ -43,7 +42,7 @@ private CaptureInfoAnalyzer(ImmutableArray<VirtualChar> text)
}

public static (ImmutableDictionary<string, TextSpan>, ImmutableDictionary<int, TextSpan>) Analyze(
ImmutableArray<VirtualChar> text, RegexCompilationUnit root, RegexOptions options)
VirtualCharSequence text, RegexCompilationUnit root, RegexOptions options)
{
var analyzer = new CaptureInfoAnalyzer(text);
return analyzer.Analyze(root, options);
Expand Down
Loading