From de1d4636c7f8fc60bc6794662f67166cf2c3d3f2 Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Mon, 12 Aug 2024 17:12:40 -0700 Subject: [PATCH 01/15] Reduce allocations from newline information allocated by ChangedText.GetLinesCore. Going to start this in draft mode and fire off a test insertion to verify this improves allocations as the typing scenario in speedometer scrolling test shows this as nearly 10% of allocations. The general idea here is that ChangedText doesn't need to keep track of line information as the SourceText that it wraps has that information. The complexity that was in ChangedText around newline splitting now needs to sit in both CompositeText and SubText as they need to understand how to expose their line collections. --- .../Text/CompositeTextTests.cs | 79 +++++++++++ .../CodeAnalysisTest/Text/TextChangeTests.cs | 4 +- .../Core/Portable/Text/ChangedText.cs | 102 +------------- .../Core/Portable/Text/CompositeText.cs | 125 ++++++++++++++++++ src/Compilers/Core/Portable/Text/SubText.cs | 85 ++++++++++++ 5 files changed, 292 insertions(+), 103 deletions(-) create mode 100644 src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs diff --git a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs new file mode 100644 index 0000000000000..92cd059712b1a --- /dev/null +++ b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs @@ -0,0 +1,79 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Collections.Generic; +using System.Collections.Immutable; +using System.IO; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Microsoft.CodeAnalysis.PooledObjects; +using Microsoft.CodeAnalysis.Text; +using Xunit; + +namespace Microsoft.CodeAnalysis.UnitTests.Text +{ + public class CompositeTextTests + { + [Theory] + [InlineData(["a", "b"])] + [InlineData(["a", "b", "c", "d", "e", "f"])] + [InlineData(["aa", "bb", "cc", "dd", "ee", "ff"])] + [InlineData(["a\r\n", "b"])] + [InlineData(["a", "\r\nb"])] + [InlineData(["\r\na\r\n", "\r\nb\r\n"])] + [InlineData(["\r\n\r\na", "b", "c", "d\r\n\r\n"])] + [InlineData(["a\r", "\nb"])] + [InlineData(["a\r", "\nb\r", "\nc"])] + [InlineData(["a\n", "\nb\n", "\nc"])] + [InlineData(["a\r", "\rb\r", "\rc"])] + public void CompositeTextLinesEqualSourceTextLines(params string[] sourceTextsContents) + { + var (sourceText, compositeText) = CreateSourceAndCompositeTexts(sourceTextsContents); + var sourceLinesText = GetLinesTexts(sourceText.Lines); + var compositeLinesText = GetLinesTexts(compositeText.Lines); + + Assert.True(sourceLinesText.SequenceEqual(compositeLinesText)); + } + + [Theory] + [InlineData(["a", "b"])] + [InlineData(["a", "b", "c", "d", "e", "f"])] + [InlineData(["aa", "bb", "cc", "dd", "ee", "ff"])] + [InlineData(["a\r\n", "b"])] + [InlineData(["a", "\r\nb"])] + [InlineData(["\r\na\r\n", "\r\nb\r\n"])] + [InlineData(["\r\n\r\na", "b", "c", "d\r\n\r\n"])] + [InlineData(["a\r", "\nb"])] + [InlineData(["a\r", "\nb\r", "\nc"])] + [InlineData(["a\n", "\nb\n", "\nc"])] + [InlineData(["a\r", "\rb\r", "\rc"])] + public void CompositeTextIndexOfEqualSourceTextIndexOf(params string[] sourceTextsContents) + { + var (sourceText, compositeText) = CreateSourceAndCompositeTexts(sourceTextsContents); + + for (var i = 0; i < sourceText.Length; i++) + { + Assert.Equal(sourceText.Lines.IndexOf(i), compositeText.Lines.IndexOf(i)); + } + } + + private IEnumerable GetLinesTexts(TextLineCollection textLines) + { + return textLines.Select(l => l.Text.ToString(l.SpanIncludingLineBreak)); + } + + private (SourceText, CompositeText) CreateSourceAndCompositeTexts(string[] contents) + { + var texts = ArrayBuilder.GetInstance(); + texts.AddRange(contents.Select(static s => SourceText.From(s))); + + var sourceText = SourceText.From(String.Join(string.Empty, contents)); + var compositeText = (CompositeText)CompositeText.ToSourceText(texts, sourceText, adjustSegments: false); + + return (sourceText, compositeText); + } + } +} diff --git a/src/Compilers/Core/CodeAnalysisTest/Text/TextChangeTests.cs b/src/Compilers/Core/CodeAnalysisTest/Text/TextChangeTests.cs index 19b2d90213633..ba0f572cbe928 100644 --- a/src/Compilers/Core/CodeAnalysisTest/Text/TextChangeTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/Text/TextChangeTests.cs @@ -320,14 +320,14 @@ public void TestOptimizedSourceTextLinesRemoveCrLf() } [Fact] - public void TestOptimizedSourceTextLinesBrakeCrLf() + public void TestOptimizedSourceTextLinesBreakCrLf() { AssertChangedTextLinesHelper("Test\r\nMessage", new TextChange(new TextSpan(5, 0), "aaaaaa")); } [Fact] - public void TestOptimizedSourceTextLinesBrakeCrLfWithLfPrefixedAndCrSuffixed() + public void TestOptimizedSourceTextLinesBreakCrLfWithLfPrefixedAndCrSuffixed() { AssertChangedTextLinesHelper("Test\r\nMessage", new TextChange(new TextSpan(5, 0), "\naaaaaa\r")); diff --git a/src/Compilers/Core/Portable/Text/ChangedText.cs b/src/Compilers/Core/Portable/Text/ChangedText.cs index c8f7d46b8d71d..3f33bf0e5a0ab 100644 --- a/src/Compilers/Core/Portable/Text/ChangedText.cs +++ b/src/Compilers/Core/Portable/Text/ChangedText.cs @@ -7,8 +7,6 @@ using System.Collections.Immutable; using System.Diagnostics; using System.Text; -using Microsoft.CodeAnalysis.Collections; -using Microsoft.CodeAnalysis.PooledObjects; using Roslyn.Utilities; namespace Microsoft.CodeAnalysis.Text @@ -264,107 +262,9 @@ private static ImmutableArray Merge(IReadOnlyList - /// Computes line starts faster given already computed line starts from text before the change. - /// protected override TextLineCollection GetLinesCore() { - SourceText? oldText; - TextLineCollection? oldLineInfo; - - if (!_info.WeakOldText.TryGetTarget(out oldText) || !oldText.TryGetLines(out oldLineInfo)) - { - // no old line starts? do it the hard way. - return base.GetLinesCore(); - } - - // compute line starts given changes and line starts already computed from previous text - var lineStarts = new SegmentedList(capacity: oldLineInfo.Count) - { - 0 - }; - - // position in the original document - var position = 0; - - // delta generated by already processed changes (position in the new document = position + delta) - var delta = 0; - - // true if last segment ends with CR and we need to check for CR+LF code below assumes that both CR and LF are also line breaks alone - var endsWithCR = false; - - foreach (var change in _info.ChangeRanges) - { - // include existing line starts that occur before this change - if (change.Span.Start > position) - { - if (endsWithCR && _newText[position + delta] == '\n') - { - // remove last added line start (it was due to previous CR) - // a new line start including the LF will be added next - lineStarts.RemoveAt(lineStarts.Count - 1); - } - - var lps = oldLineInfo.GetLinePositionSpan(TextSpan.FromBounds(position, change.Span.Start)); - for (int i = lps.Start.Line + 1; i <= lps.End.Line; i++) - { - lineStarts.Add(oldLineInfo[i].Start + delta); - } - - endsWithCR = oldText[change.Span.Start - 1] == '\r'; - - // in case change is inserted between CR+LF we treat CR as line break alone, - // but this line break might be retracted and replaced with new one in case LF is inserted - if (endsWithCR && change.Span.Start < oldText.Length && oldText[change.Span.Start] == '\n') - { - lineStarts.Add(change.Span.Start + delta); - } - } - - // include line starts that occur within newly inserted text - if (change.NewLength > 0) - { - var changeStart = change.Span.Start + delta; - var text = GetSubText(new TextSpan(changeStart, change.NewLength)); - - if (endsWithCR && text[0] == '\n') - { - // remove last added line start (it was due to previous CR) - // a new line start including the LF will be added next - lineStarts.RemoveAt(lineStarts.Count - 1); - } - - // Skip first line (it is always at offset 0 and corresponds to the previous line) - for (int i = 1; i < text.Lines.Count; i++) - { - lineStarts.Add(changeStart + text.Lines[i].Start); - } - - endsWithCR = text[change.NewLength - 1] == '\r'; - } - - position = change.Span.End; - delta += (change.NewLength - change.Span.Length); - } - - // include existing line starts that occur after all changes - if (position < oldText.Length) - { - if (endsWithCR && _newText[position + delta] == '\n') - { - // remove last added line start (it was due to previous CR) - // a new line start including the LF will be added next - lineStarts.RemoveAt(lineStarts.Count - 1); - } - - var lps = oldLineInfo.GetLinePositionSpan(TextSpan.FromBounds(position, oldText.Length)); - for (int i = lps.Start.Line + 1; i <= lps.End.Line; i++) - { - lineStarts.Add(oldLineInfo[i].Start + delta); - } - } - - return new LineInfo(this, lineStarts); + return _newText.Lines; } internal static class TestAccessor diff --git a/src/Compilers/Core/Portable/Text/CompositeText.cs b/src/Compilers/Core/Portable/Text/CompositeText.cs index d72ccf5f8d7c9..578138159b23d 100644 --- a/src/Compilers/Core/Portable/Text/CompositeText.cs +++ b/src/Compilers/Core/Portable/Text/CompositeText.cs @@ -44,6 +44,11 @@ private CompositeText(ImmutableArray segments, Encoding? encoding, S } } + protected override TextLineCollection GetLinesCore() + { + return new CompositeLineInfo(this); + } + public override Encoding? Encoding { get { return _encoding; } @@ -373,5 +378,125 @@ private static void TrimInaccessibleText(ArrayBuilder segments) segments.Add(writer.ToSourceText()); } } + + /// + /// Delegates to SourceTexts within the CompositeText to determine line information. + /// + internal sealed class CompositeLineInfo : TextLineCollection + { + private readonly CompositeText _compositeText; + private readonly int[] _segmentLineIndexes; + private readonly int _lineCount; + + public CompositeLineInfo(CompositeText compositeText) + { + _compositeText = compositeText; + _segmentLineIndexes = new int[compositeText.Segments.Length]; + + for (int i = 0; i < compositeText.Segments.Length; i++) + { + _segmentLineIndexes[i] = _lineCount; + + var segment = compositeText.Segments[i]; + _lineCount += (segment.Lines.Count - 1); + + // If "\r\n" is split amongst adjacent segments, reduce the line count by one as both segments + // would count their corresponding CR or LF as a newline. + if (segment.Length > 0 && + segment[segment.Length - 1] == '\r' && + i < compositeText.Segments.Length - 1) + { + var nextSegment = compositeText.Segments[i + 1]; + if (nextSegment.Length > 0 && nextSegment[0] == '\n') + { + _lineCount -= 1; + } + } + } + + _lineCount += 1; + } + + public override int Count => _lineCount; + + public override int IndexOf(int position) + { + _compositeText.GetIndexAndOffset(position, out var segmentIndex, out var segmentOffset); + + var segment = _compositeText.Segments[segmentIndex]; + var lineIndexWithinSegment = segment.Lines.IndexOf(segmentOffset); + + return _segmentLineIndexes[segmentIndex] + lineIndexWithinSegment; + } + + public override TextLine this[int lineNumber] + { + get + { + // Determine the indexes for segments that contribute to our view of this line's contents + GetSegmentIndexRangeContainingLine(lineNumber, out var firstSegmentIndex, out var lastSegmentIndex); + + var firstSegmentFirstLineNumber = _segmentLineIndexes[firstSegmentIndex]; + var firstSegment = _compositeText.Segments[firstSegmentIndex]; + var firstSegmentOffset = _compositeText._segmentOffsets[firstSegmentIndex]; + var firstSegmentTextLine = firstSegment.Lines[lineNumber - firstSegmentFirstLineNumber]; + + var lineLength = firstSegmentTextLine.SpanIncludingLineBreak.Length; + + // walk forward through remaining segments contributing to this line, and add their + // view of this line. + for (var nextSegmentIndex = firstSegmentIndex + 1; nextSegmentIndex <= lastSegmentIndex; nextSegmentIndex++) + { + var nextSegment = _compositeText.Segments[nextSegmentIndex]; + lineLength += nextSegment.Lines[0].SpanIncludingLineBreak.Length; + } + + return TextLine.FromSpanUnsafe(_compositeText, new TextSpan(firstSegmentOffset + firstSegmentTextLine.Start, lineLength)); + } + } + + private void GetSegmentIndexRangeContainingLine(int lineNumber, out int firstSegmentIndex, out int lastSegmentIndex) + { + int idx = _segmentLineIndexes.BinarySearch(lineNumber); + var binarySearchSegmentIndex = idx >= 0 ? idx : (~idx - 1); + + for (firstSegmentIndex = binarySearchSegmentIndex; firstSegmentIndex > 0; firstSegmentIndex--) + { + if (_segmentLineIndexes[firstSegmentIndex] != lineNumber) + { + // This segment doesn't start at the requested line, no need to continue to earlier segments. + break; + } + + // No need to continue to the previous segment if either: + // 1) it ends in \n or + // 2) if ends in \r and the current segment doesn't start with \n + var previousSegment = _compositeText.Segments[firstSegmentIndex - 1]; + var previousSegmentLastChar = previousSegment.Length > 0 ? previousSegment[previousSegment.Length - 1] : '\0'; + if (previousSegmentLastChar == '\n') + { + break; + } + else if (previousSegmentLastChar == '\r') + { + var currentSegment = _compositeText.Segments[firstSegmentIndex]; + if (currentSegment.Length == 0 || currentSegment[0] != '\n') + { + break; + } + } + } + + // Determining the lastSegment is a simple walk as the _segmentLineIndexes was populated + // accounting for split "\r\n". + for (lastSegmentIndex = binarySearchSegmentIndex; lastSegmentIndex < _compositeText.Segments.Length - 1; lastSegmentIndex++) + { + if (_segmentLineIndexes[lastSegmentIndex + 1] != lineNumber) + { + break; + } + } + } + } } } diff --git a/src/Compilers/Core/Portable/Text/SubText.cs b/src/Compilers/Core/Portable/Text/SubText.cs index 7769617224043..0f8897196c105 100644 --- a/src/Compilers/Core/Portable/Text/SubText.cs +++ b/src/Compilers/Core/Portable/Text/SubText.cs @@ -63,6 +63,11 @@ public override char this[int position] } } + protected override TextLineCollection GetLinesCore() + { + return new SubTextLineInfo(this); + } + public override string ToString(TextSpan span) { CheckSubSpan(span); @@ -89,5 +94,85 @@ private TextSpan GetCompositeSpan(int start, int length) int compositeEnd = Math.Min(UnderlyingText.Length, compositeStart + length); return new TextSpan(compositeStart, compositeEnd - compositeStart); } + + /// + /// Delegates to the SubText's UnderlyingText to determine line information. + /// + internal sealed class SubTextLineInfo : TextLineCollection + { + private readonly SubText _subText; + private readonly int _startLineNumber; + private readonly int _lineCount; + private readonly bool _startsWithinSplitCRLF; + private readonly bool _endsWithinSplitCRLF; + + public SubTextLineInfo(SubText subText) + { + _subText = subText; + + var startLine = _subText.UnderlyingText.Lines.GetLineFromPosition(_subText.UnderlyingSpan.Start); + var endLine = _subText.UnderlyingText.Lines.GetLineFromPosition(_subText.UnderlyingSpan.End); + + _startLineNumber = startLine.LineNumber; + _lineCount = (endLine.LineNumber - _startLineNumber) + 1; + + if (_subText.UnderlyingSpan.Length > 0) + { + if (startLine.End == _subText.UnderlyingSpan.Start - 1 && startLine.EndIncludingLineBreak == _subText.UnderlyingSpan.Start + 1) + { + _startsWithinSplitCRLF = true; + } + + if (endLine.End == _subText.UnderlyingSpan.End - 1 && endLine.EndIncludingLineBreak == _subText.UnderlyingSpan.End + 1) + { + _endsWithinSplitCRLF = true; + + // If this subtext ends in the middle of a CR/LF, then this object should view that CR as a separate line + // whereas the UnderlyingText would not. + _lineCount += 1; + } + } + } + + public override TextLine this[int index] + { + get + { + if (_endsWithinSplitCRLF && index == _lineCount - 1) + { + // Special case splitting the CRLF at the end as the UnderlyingText doesn't view the position + // after between the \r and \n as on a new line whereas this subtext doesn't contain the \n + // and needs to view that position as on a new line. + return TextLine.FromSpanUnsafe(_subText, new TextSpan(_subText.UnderlyingSpan.End, 0)); + } + + var underlyingTextLine = _subText.UnderlyingText.Lines[index + _startLineNumber]; + + var startInUnderlyingText = Math.Max(underlyingTextLine.Start, _subText.UnderlyingSpan.Start); + var endInUnderlyingText = Math.Min(underlyingTextLine.EndIncludingLineBreak, _subText.UnderlyingSpan.End); + + var startInSubText = startInUnderlyingText - _subText.UnderlyingSpan.Start; + var length = endInUnderlyingText - startInUnderlyingText; + + return TextLine.FromSpanUnsafe(_subText, new TextSpan(startInSubText, length)); + } + } + + public override int Count => _lineCount; + + public override int IndexOf(int position) + { + var underlyingPosition = position + _subText.UnderlyingSpan.Start; + var underlyingLineNumber = _subText.UnderlyingText.Lines.IndexOf(underlyingPosition); + + if (_startsWithinSplitCRLF && position != 0) + { + // The \n contributes a line to the count in this subtext, but not in the UnderlyingText. + underlyingLineNumber += 1; + } + + return underlyingLineNumber - _startLineNumber; + } + } } } From d3495bee2ef2ac39006202fa2c7f0e536f1450a5 Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Mon, 12 Aug 2024 17:30:45 -0700 Subject: [PATCH 02/15] Fix build error on ci --- src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs index 92cd059712b1a..5713cabc8c3d5 100644 --- a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs @@ -62,7 +62,7 @@ public void CompositeTextIndexOfEqualSourceTextIndexOf(params string[] sourceTex private IEnumerable GetLinesTexts(TextLineCollection textLines) { - return textLines.Select(l => l.Text.ToString(l.SpanIncludingLineBreak)); + return textLines.Select(l => l.Text!.ToString(l.SpanIncludingLineBreak)); } private (SourceText, CompositeText) CreateSourceAndCompositeTexts(string[] contents) From 975340c7eb322957f3824cec3eaba23bef09ca90 Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Tue, 13 Aug 2024 14:16:57 -0700 Subject: [PATCH 03/15] 1) Modify tests to instead just take in a single string, and permute over all possible source texts generatable from that sequence 2) Move the two added TextLineCollection derivations to be private. --- .../Text/CompositeTextTests.cs | 98 +++++++++++-------- .../Core/Portable/Text/CompositeText.cs | 2 +- src/Compilers/Core/Portable/Text/SubText.cs | 2 +- 3 files changed, 58 insertions(+), 44 deletions(-) diff --git a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs index 5713cabc8c3d5..13bf6cf255419 100644 --- a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs @@ -18,45 +18,32 @@ namespace Microsoft.CodeAnalysis.UnitTests.Text public class CompositeTextTests { [Theory] - [InlineData(["a", "b"])] - [InlineData(["a", "b", "c", "d", "e", "f"])] - [InlineData(["aa", "bb", "cc", "dd", "ee", "ff"])] - [InlineData(["a\r\n", "b"])] - [InlineData(["a", "\r\nb"])] - [InlineData(["\r\na\r\n", "\r\nb\r\n"])] - [InlineData(["\r\n\r\na", "b", "c", "d\r\n\r\n"])] - [InlineData(["a\r", "\nb"])] - [InlineData(["a\r", "\nb\r", "\nc"])] - [InlineData(["a\n", "\nb\n", "\nc"])] - [InlineData(["a\r", "\rb\r", "\rc"])] - public void CompositeTextLinesEqualSourceTextLines(params string[] sourceTextsContents) + [InlineData("abcdefghijkl")] + [InlineData(["\r\r\r\r\r\r\r\r\r\r\r\r"])] + [InlineData(["\n\n\n\n\n\n\n\n\n\n\n\n"])] + [InlineData(["\r\n\r\n\r\n\r\n\r\n\r\n"])] + [InlineData(["\n\r\n\r\n\r\n\r\n\r\n\r"])] + [InlineData(["a\r\nb\r\nc\r\nd\r\n"])] + [InlineData(["\ra\n\rb\n\rc\n\rd\n"])] + [InlineData(["\na\r\nb\r\nc\r\nd\r"])] + [InlineData(["ab\r\ncd\r\nef\r\n"])] + [InlineData(["ab\r\r\ncd\r\r\nef"])] + [InlineData(["ab\n\n\rcd\n\n\ref"])] + public void CompositeTextIndexOfEqualSourceTextIndexOf(string contents) { - var (sourceText, compositeText) = CreateSourceAndCompositeTexts(sourceTextsContents); - var sourceLinesText = GetLinesTexts(sourceText.Lines); - var compositeLinesText = GetLinesTexts(compositeText.Lines); - - Assert.True(sourceLinesText.SequenceEqual(compositeLinesText)); - } + // Please try to limit the inputs to this method to around 12 chars or less, as much longer than that + // will blow up the number of potential permutations. + foreach (var (sourceText, compositeText) in CreateSourceAndCompositeTexts(contents)) + { + var sourceLinesText = GetLinesTexts(sourceText.Lines); + var compositeLinesText = GetLinesTexts(compositeText.Lines); - [Theory] - [InlineData(["a", "b"])] - [InlineData(["a", "b", "c", "d", "e", "f"])] - [InlineData(["aa", "bb", "cc", "dd", "ee", "ff"])] - [InlineData(["a\r\n", "b"])] - [InlineData(["a", "\r\nb"])] - [InlineData(["\r\na\r\n", "\r\nb\r\n"])] - [InlineData(["\r\n\r\na", "b", "c", "d\r\n\r\n"])] - [InlineData(["a\r", "\nb"])] - [InlineData(["a\r", "\nb\r", "\nc"])] - [InlineData(["a\n", "\nb\n", "\nc"])] - [InlineData(["a\r", "\rb\r", "\rc"])] - public void CompositeTextIndexOfEqualSourceTextIndexOf(params string[] sourceTextsContents) - { - var (sourceText, compositeText) = CreateSourceAndCompositeTexts(sourceTextsContents); + Assert.True(sourceLinesText.SequenceEqual(compositeLinesText)); - for (var i = 0; i < sourceText.Length; i++) - { - Assert.Equal(sourceText.Lines.IndexOf(i), compositeText.Lines.IndexOf(i)); + for (var i = 0; i < sourceText.Length; i++) + { + Assert.Equal(sourceText.Lines.IndexOf(i), compositeText.Lines.IndexOf(i)); + } } } @@ -65,15 +52,42 @@ private IEnumerable GetLinesTexts(TextLineCollection textLines) return textLines.Select(l => l.Text!.ToString(l.SpanIncludingLineBreak)); } - private (SourceText, CompositeText) CreateSourceAndCompositeTexts(string[] contents) + // Returns all possible permutations of contents into SourceText arrays of length between minSourceTextCount and maxSourceTextCount + private IEnumerable<(SourceText, CompositeText)> CreateSourceAndCompositeTexts(string contents, int minSourceTextCount = 2, int maxSourceTextCount = 4) { - var texts = ArrayBuilder.GetInstance(); - texts.AddRange(contents.Select(static s => SourceText.From(s))); + var sourceText = SourceText.From(contents); - var sourceText = SourceText.From(String.Join(string.Empty, contents)); - var compositeText = (CompositeText)CompositeText.ToSourceText(texts, sourceText, adjustSegments: false); + for (var sourceTextCount = minSourceTextCount; sourceTextCount <= Math.Min(maxSourceTextCount, contents.Length); sourceTextCount++) + { + foreach (var sourceTexts in CreateSourceTextPermutations(contents, sourceTextCount)) + { + var sourceTextsBuilder = ArrayBuilder.GetInstance(); + sourceTextsBuilder.AddRange(sourceTexts); - return (sourceText, compositeText); + var compositeText = (CompositeText)CompositeText.ToSourceText(sourceTextsBuilder, sourceText, adjustSegments: false); + yield return (sourceText, compositeText); + } + } + } + + private static IEnumerable CreateSourceTextPermutations(string contents, int requestedSourceTextCount) + { + if (requestedSourceTextCount == 1) + { + yield return [SourceText.From(contents)]; + } + else + { + var maximalSourceTextLength = (contents.Length - requestedSourceTextCount) + 1; + for (int i = 1; i <= maximalSourceTextLength; i++) + { + var sourceText = SourceText.From(contents[..i]); + foreach (var otherSourceTexts in CreateSourceTextPermutations(contents.Substring(i), requestedSourceTextCount - 1)) + { + yield return [sourceText, .. otherSourceTexts]; + } + } + } } } } diff --git a/src/Compilers/Core/Portable/Text/CompositeText.cs b/src/Compilers/Core/Portable/Text/CompositeText.cs index 578138159b23d..aabd7754ad928 100644 --- a/src/Compilers/Core/Portable/Text/CompositeText.cs +++ b/src/Compilers/Core/Portable/Text/CompositeText.cs @@ -382,7 +382,7 @@ private static void TrimInaccessibleText(ArrayBuilder segments) /// /// Delegates to SourceTexts within the CompositeText to determine line information. /// - internal sealed class CompositeLineInfo : TextLineCollection + private sealed class CompositeLineInfo : TextLineCollection { private readonly CompositeText _compositeText; private readonly int[] _segmentLineIndexes; diff --git a/src/Compilers/Core/Portable/Text/SubText.cs b/src/Compilers/Core/Portable/Text/SubText.cs index 0f8897196c105..ee51a082d9904 100644 --- a/src/Compilers/Core/Portable/Text/SubText.cs +++ b/src/Compilers/Core/Portable/Text/SubText.cs @@ -98,7 +98,7 @@ private TextSpan GetCompositeSpan(int start, int length) /// /// Delegates to the SubText's UnderlyingText to determine line information. /// - internal sealed class SubTextLineInfo : TextLineCollection + private sealed class SubTextLineInfo : TextLineCollection { private readonly SubText _subText; private readonly int _startLineNumber; From 6ae36fce0931c433ff432cd8f1e90b019773ca5b Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Tue, 13 Aug 2024 14:35:45 -0700 Subject: [PATCH 04/15] Rename some variables, add a couple comments in CompositeText --- .../Core/Portable/Text/CompositeText.cs | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/src/Compilers/Core/Portable/Text/CompositeText.cs b/src/Compilers/Core/Portable/Text/CompositeText.cs index aabd7754ad928..fab65ec4db12e 100644 --- a/src/Compilers/Core/Portable/Text/CompositeText.cs +++ b/src/Compilers/Core/Portable/Text/CompositeText.cs @@ -385,36 +385,41 @@ private static void TrimInaccessibleText(ArrayBuilder segments) private sealed class CompositeLineInfo : TextLineCollection { private readonly CompositeText _compositeText; - private readonly int[] _segmentLineIndexes; + + // The starting line number for the correspondingly indexed SourceTexts in _compositeText.Segments + private readonly int[] _segmentLineNumbers; + + // The total number of lines in our _compositeText private readonly int _lineCount; public CompositeLineInfo(CompositeText compositeText) { _compositeText = compositeText; - _segmentLineIndexes = new int[compositeText.Segments.Length]; + _segmentLineNumbers = new int[compositeText.Segments.Length]; + var accumulatedLineCount = 0 for (int i = 0; i < compositeText.Segments.Length; i++) { - _segmentLineIndexes[i] = _lineCount; + _segmentLineNumbers[i] = accumulatedLineCount; var segment = compositeText.Segments[i]; - _lineCount += (segment.Lines.Count - 1); + accumulatedLineCount += (segment.Lines.Count - 1); // If "\r\n" is split amongst adjacent segments, reduce the line count by one as both segments // would count their corresponding CR or LF as a newline. if (segment.Length > 0 && - segment[segment.Length - 1] == '\r' && + segment[^1] == '\r' && i < compositeText.Segments.Length - 1) { var nextSegment = compositeText.Segments[i + 1]; if (nextSegment.Length > 0 && nextSegment[0] == '\n') { - _lineCount -= 1; + accumulatedLineCount -= 1; } } } - _lineCount += 1; + _lineCount = accumulatedLineCount + 1; } public override int Count => _lineCount; @@ -426,17 +431,17 @@ public override int IndexOf(int position) var segment = _compositeText.Segments[segmentIndex]; var lineIndexWithinSegment = segment.Lines.IndexOf(segmentOffset); - return _segmentLineIndexes[segmentIndex] + lineIndexWithinSegment; + return _segmentLineNumbers[segmentIndex] + lineIndexWithinSegment; } public override TextLine this[int lineNumber] { get { - // Determine the indexes for segments that contribute to our view of this line's contents + // Determine the indices for segments that contribute to our view of this line's contents GetSegmentIndexRangeContainingLine(lineNumber, out var firstSegmentIndex, out var lastSegmentIndex); - var firstSegmentFirstLineNumber = _segmentLineIndexes[firstSegmentIndex]; + var firstSegmentFirstLineNumber = _segmentLineNumbers[firstSegmentIndex]; var firstSegment = _compositeText.Segments[firstSegmentIndex]; var firstSegmentOffset = _compositeText._segmentOffsets[firstSegmentIndex]; var firstSegmentTextLine = firstSegment.Lines[lineNumber - firstSegmentFirstLineNumber]; @@ -457,12 +462,12 @@ public override TextLine this[int lineNumber] private void GetSegmentIndexRangeContainingLine(int lineNumber, out int firstSegmentIndex, out int lastSegmentIndex) { - int idx = _segmentLineIndexes.BinarySearch(lineNumber); + int idx = _segmentLineNumbers.BinarySearch(lineNumber); var binarySearchSegmentIndex = idx >= 0 ? idx : (~idx - 1); for (firstSegmentIndex = binarySearchSegmentIndex; firstSegmentIndex > 0; firstSegmentIndex--) { - if (_segmentLineIndexes[firstSegmentIndex] != lineNumber) + if (_segmentLineNumbers[firstSegmentIndex] != lineNumber) { // This segment doesn't start at the requested line, no need to continue to earlier segments. break; @@ -487,11 +492,11 @@ private void GetSegmentIndexRangeContainingLine(int lineNumber, out int firstSeg } } - // Determining the lastSegment is a simple walk as the _segmentLineIndexes was populated + // Determining the lastSegment is a simple walk as the _segmentLineNumbers was populated // accounting for split "\r\n". for (lastSegmentIndex = binarySearchSegmentIndex; lastSegmentIndex < _compositeText.Segments.Length - 1; lastSegmentIndex++) { - if (_segmentLineIndexes[lastSegmentIndex + 1] != lineNumber) + if (_segmentLineNumbers[lastSegmentIndex + 1] != lineNumber) { break; } From 75741f292d5d27954723a465bdea2fe44bdf8c31 Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Tue, 13 Aug 2024 14:51:41 -0700 Subject: [PATCH 05/15] reordered and reformatted some items in conditional to hopefully make it more clear --- src/Compilers/Core/Portable/Text/CompositeText.cs | 7 ++++--- src/Compilers/Core/Portable/Text/SubText.cs | 6 ++++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/Compilers/Core/Portable/Text/CompositeText.cs b/src/Compilers/Core/Portable/Text/CompositeText.cs index fab65ec4db12e..c59276ebdf13b 100644 --- a/src/Compilers/Core/Portable/Text/CompositeText.cs +++ b/src/Compilers/Core/Portable/Text/CompositeText.cs @@ -429,9 +429,9 @@ public override int IndexOf(int position) _compositeText.GetIndexAndOffset(position, out var segmentIndex, out var segmentOffset); var segment = _compositeText.Segments[segmentIndex]; - var lineIndexWithinSegment = segment.Lines.IndexOf(segmentOffset); + var lineNumberWithinSegment = segment.Lines.IndexOf(segmentOffset); - return _segmentLineNumbers[segmentIndex] + lineIndexWithinSegment; + return _segmentLineNumbers[segmentIndex] + lineNumberWithinSegment; } public override TextLine this[int lineNumber] @@ -465,6 +465,7 @@ private void GetSegmentIndexRangeContainingLine(int lineNumber, out int firstSeg int idx = _segmentLineNumbers.BinarySearch(lineNumber); var binarySearchSegmentIndex = idx >= 0 ? idx : (~idx - 1); + // Walk backwards starting at binarySearchSegmentIndex to find the earliest segment index that intersects this line number for (firstSegmentIndex = binarySearchSegmentIndex; firstSegmentIndex > 0; firstSegmentIndex--) { if (_segmentLineNumbers[firstSegmentIndex] != lineNumber) @@ -477,7 +478,7 @@ private void GetSegmentIndexRangeContainingLine(int lineNumber, out int firstSeg // 1) it ends in \n or // 2) if ends in \r and the current segment doesn't start with \n var previousSegment = _compositeText.Segments[firstSegmentIndex - 1]; - var previousSegmentLastChar = previousSegment.Length > 0 ? previousSegment[previousSegment.Length - 1] : '\0'; + var previousSegmentLastChar = previousSegment.Length > 0 ? previousSegment[^1] : '\0'; if (previousSegmentLastChar == '\n') { break; diff --git a/src/Compilers/Core/Portable/Text/SubText.cs b/src/Compilers/Core/Portable/Text/SubText.cs index ee51a082d9904..46a716ec00c13 100644 --- a/src/Compilers/Core/Portable/Text/SubText.cs +++ b/src/Compilers/Core/Portable/Text/SubText.cs @@ -118,12 +118,14 @@ public SubTextLineInfo(SubText subText) if (_subText.UnderlyingSpan.Length > 0) { - if (startLine.End == _subText.UnderlyingSpan.Start - 1 && startLine.EndIncludingLineBreak == _subText.UnderlyingSpan.Start + 1) + if (_subText.UnderlyingSpan.Start == startLine.End + 1 && + _subText.UnderlyingSpan.Start == startLine.EndIncludingLineBreak - 1) { _startsWithinSplitCRLF = true; } - if (endLine.End == _subText.UnderlyingSpan.End - 1 && endLine.EndIncludingLineBreak == _subText.UnderlyingSpan.End + 1) + if (_subText.UnderlyingSpan.End == endLine.End + 1 && + _subText.UnderlyingSpan.End == endLine.EndIncludingLineBreak - 1) { _endsWithinSplitCRLF = true; From 03543db6754a76b0db943dc277382858e1b892dc Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Tue, 13 Aug 2024 14:58:26 -0700 Subject: [PATCH 06/15] rename another variable for clarity --- src/Compilers/Core/Portable/Text/SubText.cs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Compilers/Core/Portable/Text/SubText.cs b/src/Compilers/Core/Portable/Text/SubText.cs index 46a716ec00c13..d094edeae125e 100644 --- a/src/Compilers/Core/Portable/Text/SubText.cs +++ b/src/Compilers/Core/Portable/Text/SubText.cs @@ -136,11 +136,11 @@ public SubTextLineInfo(SubText subText) } } - public override TextLine this[int index] + public override TextLine this[int lineNumber] { get { - if (_endsWithinSplitCRLF && index == _lineCount - 1) + if (_endsWithinSplitCRLF && lineNumber == _lineCount - 1) { // Special case splitting the CRLF at the end as the UnderlyingText doesn't view the position // after between the \r and \n as on a new line whereas this subtext doesn't contain the \n @@ -148,7 +148,7 @@ public override TextLine this[int index] return TextLine.FromSpanUnsafe(_subText, new TextSpan(_subText.UnderlyingSpan.End, 0)); } - var underlyingTextLine = _subText.UnderlyingText.Lines[index + _startLineNumber]; + var underlyingTextLine = _subText.UnderlyingText.Lines[lineNumber + _startLineNumber]; var startInUnderlyingText = Math.Max(underlyingTextLine.Start, _subText.UnderlyingSpan.Start); var endInUnderlyingText = Math.Min(underlyingTextLine.EndIncludingLineBreak, _subText.UnderlyingSpan.End); From 1c0bae9dacbf91d4bc69bfaf0c69287101f0614d Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Tue, 13 Aug 2024 15:02:50 -0700 Subject: [PATCH 07/15] fsn, seal --- .../Text/CompositeTextTests.cs | 113 +++++++++--------- 1 file changed, 54 insertions(+), 59 deletions(-) diff --git a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs index 13bf6cf255419..e57b374e77a84 100644 --- a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs @@ -4,88 +4,83 @@ using System; using System.Collections.Generic; -using System.Collections.Immutable; -using System.IO; using System.Linq; -using System.Text; -using System.Threading.Tasks; using Microsoft.CodeAnalysis.PooledObjects; using Microsoft.CodeAnalysis.Text; using Xunit; -namespace Microsoft.CodeAnalysis.UnitTests.Text +namespace Microsoft.CodeAnalysis.UnitTests.Text; + +public sealed class CompositeTextTests { - public class CompositeTextTests + [Theory] + [InlineData("abcdefghijkl")] + [InlineData(["\r\r\r\r\r\r\r\r\r\r\r\r"])] + [InlineData(["\n\n\n\n\n\n\n\n\n\n\n\n"])] + [InlineData(["\r\n\r\n\r\n\r\n\r\n\r\n"])] + [InlineData(["\n\r\n\r\n\r\n\r\n\r\n\r"])] + [InlineData(["a\r\nb\r\nc\r\nd\r\n"])] + [InlineData(["\ra\n\rb\n\rc\n\rd\n"])] + [InlineData(["\na\r\nb\r\nc\r\nd\r"])] + [InlineData(["ab\r\ncd\r\nef\r\n"])] + [InlineData(["ab\r\r\ncd\r\r\nef"])] + [InlineData(["ab\n\n\rcd\n\n\ref"])] + public void CompositeTextIndexOfEqualSourceTextIndexOf(string contents) { - [Theory] - [InlineData("abcdefghijkl")] - [InlineData(["\r\r\r\r\r\r\r\r\r\r\r\r"])] - [InlineData(["\n\n\n\n\n\n\n\n\n\n\n\n"])] - [InlineData(["\r\n\r\n\r\n\r\n\r\n\r\n"])] - [InlineData(["\n\r\n\r\n\r\n\r\n\r\n\r"])] - [InlineData(["a\r\nb\r\nc\r\nd\r\n"])] - [InlineData(["\ra\n\rb\n\rc\n\rd\n"])] - [InlineData(["\na\r\nb\r\nc\r\nd\r"])] - [InlineData(["ab\r\ncd\r\nef\r\n"])] - [InlineData(["ab\r\r\ncd\r\r\nef"])] - [InlineData(["ab\n\n\rcd\n\n\ref"])] - public void CompositeTextIndexOfEqualSourceTextIndexOf(string contents) + // Please try to limit the inputs to this method to around 12 chars or less, as much longer than that + // will blow up the number of potential permutations. + foreach (var (sourceText, compositeText) in CreateSourceAndCompositeTexts(contents)) { - // Please try to limit the inputs to this method to around 12 chars or less, as much longer than that - // will blow up the number of potential permutations. - foreach (var (sourceText, compositeText) in CreateSourceAndCompositeTexts(contents)) - { - var sourceLinesText = GetLinesTexts(sourceText.Lines); - var compositeLinesText = GetLinesTexts(compositeText.Lines); + var sourceLinesText = GetLinesTexts(sourceText.Lines); + var compositeLinesText = GetLinesTexts(compositeText.Lines); - Assert.True(sourceLinesText.SequenceEqual(compositeLinesText)); + Assert.True(sourceLinesText.SequenceEqual(compositeLinesText)); - for (var i = 0; i < sourceText.Length; i++) - { - Assert.Equal(sourceText.Lines.IndexOf(i), compositeText.Lines.IndexOf(i)); - } + for (var i = 0; i < sourceText.Length; i++) + { + Assert.Equal(sourceText.Lines.IndexOf(i), compositeText.Lines.IndexOf(i)); } } + } - private IEnumerable GetLinesTexts(TextLineCollection textLines) - { - return textLines.Select(l => l.Text!.ToString(l.SpanIncludingLineBreak)); - } + private IEnumerable GetLinesTexts(TextLineCollection textLines) + { + return textLines.Select(l => l.Text!.ToString(l.SpanIncludingLineBreak)); + } - // Returns all possible permutations of contents into SourceText arrays of length between minSourceTextCount and maxSourceTextCount - private IEnumerable<(SourceText, CompositeText)> CreateSourceAndCompositeTexts(string contents, int minSourceTextCount = 2, int maxSourceTextCount = 4) - { - var sourceText = SourceText.From(contents); + // Returns all possible permutations of contents into SourceText arrays of length between minSourceTextCount and maxSourceTextCount + private IEnumerable<(SourceText, CompositeText)> CreateSourceAndCompositeTexts(string contents, int minSourceTextCount = 2, int maxSourceTextCount = 4) + { + var sourceText = SourceText.From(contents); - for (var sourceTextCount = minSourceTextCount; sourceTextCount <= Math.Min(maxSourceTextCount, contents.Length); sourceTextCount++) + for (var sourceTextCount = minSourceTextCount; sourceTextCount <= Math.Min(maxSourceTextCount, contents.Length); sourceTextCount++) + { + foreach (var sourceTexts in CreateSourceTextPermutations(contents, sourceTextCount)) { - foreach (var sourceTexts in CreateSourceTextPermutations(contents, sourceTextCount)) - { - var sourceTextsBuilder = ArrayBuilder.GetInstance(); - sourceTextsBuilder.AddRange(sourceTexts); + var sourceTextsBuilder = ArrayBuilder.GetInstance(); + sourceTextsBuilder.AddRange(sourceTexts); - var compositeText = (CompositeText)CompositeText.ToSourceText(sourceTextsBuilder, sourceText, adjustSegments: false); - yield return (sourceText, compositeText); - } + var compositeText = (CompositeText)CompositeText.ToSourceText(sourceTextsBuilder, sourceText, adjustSegments: false); + yield return (sourceText, compositeText); } } + } - private static IEnumerable CreateSourceTextPermutations(string contents, int requestedSourceTextCount) + private static IEnumerable CreateSourceTextPermutations(string contents, int requestedSourceTextCount) + { + if (requestedSourceTextCount == 1) { - if (requestedSourceTextCount == 1) - { - yield return [SourceText.From(contents)]; - } - else + yield return [SourceText.From(contents)]; + } + else + { + var maximalSourceTextLength = (contents.Length - requestedSourceTextCount) + 1; + for (int i = 1; i <= maximalSourceTextLength; i++) { - var maximalSourceTextLength = (contents.Length - requestedSourceTextCount) + 1; - for (int i = 1; i <= maximalSourceTextLength; i++) + var sourceText = SourceText.From(contents[..i]); + foreach (var otherSourceTexts in CreateSourceTextPermutations(contents.Substring(i), requestedSourceTextCount - 1)) { - var sourceText = SourceText.From(contents[..i]); - foreach (var otherSourceTexts in CreateSourceTextPermutations(contents.Substring(i), requestedSourceTextCount - 1)) - { - yield return [sourceText, .. otherSourceTexts]; - } + yield return [sourceText, .. otherSourceTexts]; } } } From 100549ae97e5baebd25181099965c4bf10d78cdb Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Tue, 13 Aug 2024 15:12:07 -0700 Subject: [PATCH 08/15] Missed a semi-colon --- src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs | 2 +- src/Compilers/Core/Portable/Text/CompositeText.cs | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs index e57b374e77a84..1567198beb5aa 100644 --- a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs @@ -25,7 +25,7 @@ public sealed class CompositeTextTests [InlineData(["ab\r\ncd\r\nef\r\n"])] [InlineData(["ab\r\r\ncd\r\r\nef"])] [InlineData(["ab\n\n\rcd\n\n\ref"])] - public void CompositeTextIndexOfEqualSourceTextIndexOf(string contents) + public void CompositeTextLinesEqualSourceTextLinesPermutations(string contents) { // Please try to limit the inputs to this method to around 12 chars or less, as much longer than that // will blow up the number of potential permutations. diff --git a/src/Compilers/Core/Portable/Text/CompositeText.cs b/src/Compilers/Core/Portable/Text/CompositeText.cs index c59276ebdf13b..af58902edf398 100644 --- a/src/Compilers/Core/Portable/Text/CompositeText.cs +++ b/src/Compilers/Core/Portable/Text/CompositeText.cs @@ -397,7 +397,7 @@ public CompositeLineInfo(CompositeText compositeText) _compositeText = compositeText; _segmentLineNumbers = new int[compositeText.Segments.Length]; - var accumulatedLineCount = 0 + var accumulatedLineCount = 0; for (int i = 0; i < compositeText.Segments.Length; i++) { _segmentLineNumbers[i] = accumulatedLineCount; From 41397b8452366040333d0b0be15f3300007ec1d6 Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Wed, 14 Aug 2024 13:16:15 -0700 Subject: [PATCH 09/15] Add some comments, move some code around --- .../Text/CompositeTextTests.cs | 4 ++-- .../Core/Portable/Text/ChangedText.cs | 4 +--- .../Core/Portable/Text/CompositeText.cs | 24 ++++++++++--------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs index 1567198beb5aa..1bf74861d7d0f 100644 --- a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs @@ -43,13 +43,13 @@ public void CompositeTextLinesEqualSourceTextLinesPermutations(string contents) } } - private IEnumerable GetLinesTexts(TextLineCollection textLines) + private static IEnumerable GetLinesTexts(TextLineCollection textLines) { return textLines.Select(l => l.Text!.ToString(l.SpanIncludingLineBreak)); } // Returns all possible permutations of contents into SourceText arrays of length between minSourceTextCount and maxSourceTextCount - private IEnumerable<(SourceText, CompositeText)> CreateSourceAndCompositeTexts(string contents, int minSourceTextCount = 2, int maxSourceTextCount = 4) + private static IEnumerable<(SourceText, CompositeText)> CreateSourceAndCompositeTexts(string contents, int minSourceTextCount = 2, int maxSourceTextCount = 4) { var sourceText = SourceText.From(contents); diff --git a/src/Compilers/Core/Portable/Text/ChangedText.cs b/src/Compilers/Core/Portable/Text/ChangedText.cs index 3f33bf0e5a0ab..acd1e57c4ea04 100644 --- a/src/Compilers/Core/Portable/Text/ChangedText.cs +++ b/src/Compilers/Core/Portable/Text/ChangedText.cs @@ -263,9 +263,7 @@ private static ImmutableArray Merge(IReadOnlyList _newText.Lines; internal static class TestAccessor { diff --git a/src/Compilers/Core/Portable/Text/CompositeText.cs b/src/Compilers/Core/Portable/Text/CompositeText.cs index af58902edf398..95e7242013d30 100644 --- a/src/Compilers/Core/Portable/Text/CompositeText.cs +++ b/src/Compilers/Core/Portable/Text/CompositeText.cs @@ -6,8 +6,6 @@ using System.Collections.Generic; using System.Collections.Immutable; using System.Diagnostics; -using System.IO; -using System.Linq; using System.Text; using Microsoft.CodeAnalysis.PooledObjects; using Roslyn.Utilities; @@ -45,9 +43,7 @@ private CompositeText(ImmutableArray segments, Encoding? encoding, S } protected override TextLineCollection GetLinesCore() - { - return new CompositeLineInfo(this); - } + => new CompositeLineInfo(this); public override Encoding? Encoding { @@ -386,21 +382,25 @@ private sealed class CompositeLineInfo : TextLineCollection { private readonly CompositeText _compositeText; - // The starting line number for the correspondingly indexed SourceTexts in _compositeText.Segments - private readonly int[] _segmentLineNumbers; + /// + /// The starting line number for the correspondingly indexed SourceTexts in _compositeText.Segments + /// + /// + /// This will be of the same length as _compositeText.Segments + /// + private readonly ImmutableArray _segmentLineNumbers; // The total number of lines in our _compositeText private readonly int _lineCount; public CompositeLineInfo(CompositeText compositeText) { - _compositeText = compositeText; - _segmentLineNumbers = new int[compositeText.Segments.Length]; - + var segmentLineNumbers = ArrayBuilder.GetInstance(); var accumulatedLineCount = 0; + for (int i = 0; i < compositeText.Segments.Length; i++) { - _segmentLineNumbers[i] = accumulatedLineCount; + segmentLineNumbers.Add(accumulatedLineCount); var segment = compositeText.Segments[i]; accumulatedLineCount += (segment.Lines.Count - 1); @@ -419,6 +419,8 @@ public CompositeLineInfo(CompositeText compositeText) } } + _compositeText = compositeText; + _segmentLineNumbers = segmentLineNumbers.ToImmutableAndFree(); _lineCount = accumulatedLineCount + 1; } From 7feb45105c68970e396e2f00da094865aae46531 Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Wed, 14 Aug 2024 16:51:49 -0700 Subject: [PATCH 10/15] Lots of good cleanup per Cyrus's requests --- .../Core/Portable/Text/CompositeText.cs | 101 +++++++++++++----- src/Compilers/Core/Portable/Text/SubText.cs | 71 +++++++++--- 2 files changed, 131 insertions(+), 41 deletions(-) diff --git a/src/Compilers/Core/Portable/Text/CompositeText.cs b/src/Compilers/Core/Portable/Text/CompositeText.cs index 95e7242013d30..4ce9f2082eeba 100644 --- a/src/Compilers/Core/Portable/Text/CompositeText.cs +++ b/src/Compilers/Core/Portable/Text/CompositeText.cs @@ -6,6 +6,7 @@ using System.Collections.Generic; using System.Collections.Immutable; using System.Diagnostics; +using System.Linq; using System.Text; using Microsoft.CodeAnalysis.PooledObjects; using Roslyn.Utilities; @@ -43,7 +44,7 @@ private CompositeText(ImmutableArray segments, Encoding? encoding, S } protected override TextLineCollection GetLinesCore() - => new CompositeLineInfo(this); + => new CompositeTextLineInfo(this); public override Encoding? Encoding { @@ -183,6 +184,12 @@ internal static SourceText ToSourceText(ArrayBuilder segments, Sourc ReduceSegmentCountIfNecessary(segments); } + // If there are multiple segments, ensure they all have non-zero length. This simplifies the CompositeTextLineInfo logic. + if (segments.Count > 1) + { + segments.RemoveWhere(static (s, _, _) => s.Length == 0, default(VoidResult)); + } + if (segments.Count == 0) { return SourceText.From(string.Empty, original.Encoding, original.ChecksumAlgorithm); @@ -378,12 +385,14 @@ private static void TrimInaccessibleText(ArrayBuilder segments) /// /// Delegates to SourceTexts within the CompositeText to determine line information. /// - private sealed class CompositeLineInfo : TextLineCollection + private sealed class CompositeTextLineInfo : TextLineCollection { private readonly CompositeText _compositeText; /// - /// The starting line number for the correspondingly indexed SourceTexts in _compositeText.Segments + /// The starting line number for the correspondingly indexed SourceTexts in _compositeText.Segments. + /// Multiple consecutive entries could indicate the same line number if the corresponding + /// segments don't contain newline characters. /// /// /// This will be of the same length as _compositeText.Segments @@ -393,26 +402,31 @@ private sealed class CompositeLineInfo : TextLineCollection // The total number of lines in our _compositeText private readonly int _lineCount; - public CompositeLineInfo(CompositeText compositeText) + public CompositeTextLineInfo(CompositeText compositeText) { var segmentLineNumbers = ArrayBuilder.GetInstance(); var accumulatedLineCount = 0; + Debug.Assert(compositeText.Segments.Length > 0); for (int i = 0; i < compositeText.Segments.Length; i++) { segmentLineNumbers.Add(accumulatedLineCount); var segment = compositeText.Segments[i]; + + // Account for this segments lines in our accumulated lines. Subtract one as each segment + // views it's line count as one greater than the number of line breaks it contains. accumulatedLineCount += (segment.Lines.Count - 1); // If "\r\n" is split amongst adjacent segments, reduce the line count by one as both segments // would count their corresponding CR or LF as a newline. - if (segment.Length > 0 && - segment[^1] == '\r' && + Debug.Assert(segment.Length > 0); + if (segment[^1] == '\r' && i < compositeText.Segments.Length - 1) { var nextSegment = compositeText.Segments[i + 1]; - if (nextSegment.Length > 0 && nextSegment[0] == '\n') + Debug.Assert(nextSegment.Length > 0); + if (nextSegment[0] == '\n') { accumulatedLineCount -= 1; } @@ -421,13 +435,24 @@ public CompositeLineInfo(CompositeText compositeText) _compositeText = compositeText; _segmentLineNumbers = segmentLineNumbers.ToImmutableAndFree(); + + // Add one to the accumulatedLineCount for our stored line count (so that we maintain the + // invariant that a text's line count is one greater than the number of newlines it contains) _lineCount = accumulatedLineCount + 1; } public override int Count => _lineCount; + /// + /// Determines the line number of a position in this CompositeText + /// public override int IndexOf(int position) { + if (position < 0 || position > _compositeText.Length) + { + throw new ArgumentOutOfRangeException(nameof(position)); + } + _compositeText.GetIndexAndOffset(position, out var segmentIndex, out var segmentOffset); var segment = _compositeText.Segments[segmentIndex]; @@ -440,37 +465,63 @@ public override TextLine this[int lineNumber] { get { - // Determine the indices for segments that contribute to our view of this line's contents - GetSegmentIndexRangeContainingLine(lineNumber, out var firstSegmentIndex, out var lastSegmentIndex); + if (lineNumber < 0 || lineNumber >= _lineCount) + { + throw new ArgumentOutOfRangeException(nameof(lineNumber)); + } + + // Determine the indices for segments that contribute to our view of the requested line's contents + GetSegmentIndexRangeContainingLine(lineNumber, out var firstSegmentIndexInclusive, out var lastSegmentIndexInclusive); - var firstSegmentFirstLineNumber = _segmentLineNumbers[firstSegmentIndex]; - var firstSegment = _compositeText.Segments[firstSegmentIndex]; - var firstSegmentOffset = _compositeText._segmentOffsets[firstSegmentIndex]; + var firstSegmentFirstLineNumber = _segmentLineNumbers[firstSegmentIndexInclusive]; + var firstSegment = _compositeText.Segments[firstSegmentIndexInclusive]; + var firstSegmentOffset = _compositeText._segmentOffsets[firstSegmentIndexInclusive]; var firstSegmentTextLine = firstSegment.Lines[lineNumber - firstSegmentFirstLineNumber]; var lineLength = firstSegmentTextLine.SpanIncludingLineBreak.Length; - // walk forward through remaining segments contributing to this line, and add their - // view of this line. - for (var nextSegmentIndex = firstSegmentIndex + 1; nextSegmentIndex <= lastSegmentIndex; nextSegmentIndex++) + // walk forward through segments between firstSegmentIndexInclusive and lastSegmentIndexInclusive, and add their + // view of the length of this line. + for (var nextSegmentIndex = firstSegmentIndexInclusive + 1; nextSegmentIndex < lastSegmentIndexInclusive; nextSegmentIndex++) { var nextSegment = _compositeText.Segments[nextSegmentIndex]; + + // Segments between firstSegmentIndexInclusive and lastSegmentIndexInclusive should have either exactly one line or + // exactly two lines and the second line being empty. + Debug.Assert((nextSegment.Lines.Count == 1) || + (nextSegment.Lines.Count == 2 && nextSegment.Lines[1].SpanIncludingLineBreak.IsEmpty)); + lineLength += nextSegment.Lines[0].SpanIncludingLineBreak.Length; } - return TextLine.FromSpanUnsafe(_compositeText, new TextSpan(firstSegmentOffset + firstSegmentTextLine.Start, lineLength)); + if (firstSegmentIndexInclusive != lastSegmentIndexInclusive) + { + var lastSegment = _compositeText.Segments[lastSegmentIndexInclusive]; + + // lastSegment should have at least one line. + Debug.Assert(lastSegment.Lines.Count >= 1); + + lineLength += lastSegment.Lines[0].SpanIncludingLineBreak.Length; + } + + var resultLine = TextLine.FromSpanUnsafe(_compositeText, new TextSpan(firstSegmentOffset + firstSegmentTextLine.Start, lineLength)); + + // Assert resultLine only has line breaks in the appropriate locations + Debug.Assert(resultLine.ToString().All(static c => !TextUtilities.IsAnyLineBreakCharacter(c))); + + return resultLine; } } - private void GetSegmentIndexRangeContainingLine(int lineNumber, out int firstSegmentIndex, out int lastSegmentIndex) + private void GetSegmentIndexRangeContainingLine(int lineNumber, out int firstSegmentIndexInclusive, out int lastSegmentIndexInclusive) { int idx = _segmentLineNumbers.BinarySearch(lineNumber); var binarySearchSegmentIndex = idx >= 0 ? idx : (~idx - 1); // Walk backwards starting at binarySearchSegmentIndex to find the earliest segment index that intersects this line number - for (firstSegmentIndex = binarySearchSegmentIndex; firstSegmentIndex > 0; firstSegmentIndex--) + for (firstSegmentIndexInclusive = binarySearchSegmentIndex; firstSegmentIndexInclusive > 0; firstSegmentIndexInclusive--) { - if (_segmentLineNumbers[firstSegmentIndex] != lineNumber) + if (_segmentLineNumbers[firstSegmentIndexInclusive] != lineNumber) { // This segment doesn't start at the requested line, no need to continue to earlier segments. break; @@ -479,16 +530,16 @@ private void GetSegmentIndexRangeContainingLine(int lineNumber, out int firstSeg // No need to continue to the previous segment if either: // 1) it ends in \n or // 2) if ends in \r and the current segment doesn't start with \n - var previousSegment = _compositeText.Segments[firstSegmentIndex - 1]; - var previousSegmentLastChar = previousSegment.Length > 0 ? previousSegment[^1] : '\0'; + var previousSegment = _compositeText.Segments[firstSegmentIndexInclusive - 1]; + var previousSegmentLastChar = previousSegment[^1]; if (previousSegmentLastChar == '\n') { break; } else if (previousSegmentLastChar == '\r') { - var currentSegment = _compositeText.Segments[firstSegmentIndex]; - if (currentSegment.Length == 0 || currentSegment[0] != '\n') + var currentSegment = _compositeText.Segments[firstSegmentIndexInclusive]; + if (currentSegment[0] != '\n') { break; } @@ -497,9 +548,9 @@ private void GetSegmentIndexRangeContainingLine(int lineNumber, out int firstSeg // Determining the lastSegment is a simple walk as the _segmentLineNumbers was populated // accounting for split "\r\n". - for (lastSegmentIndex = binarySearchSegmentIndex; lastSegmentIndex < _compositeText.Segments.Length - 1; lastSegmentIndex++) + for (lastSegmentIndexInclusive = binarySearchSegmentIndex; lastSegmentIndexInclusive < _compositeText.Segments.Length - 1; lastSegmentIndexInclusive++) { - if (_segmentLineNumbers[lastSegmentIndex + 1] != lineNumber) + if (_segmentLineNumbers[lastSegmentIndexInclusive + 1] != lineNumber) { break; } diff --git a/src/Compilers/Core/Portable/Text/SubText.cs b/src/Compilers/Core/Portable/Text/SubText.cs index d094edeae125e..aa6c951de808c 100644 --- a/src/Compilers/Core/Portable/Text/SubText.cs +++ b/src/Compilers/Core/Portable/Text/SubText.cs @@ -3,6 +3,8 @@ // See the LICENSE file in the project root for more information. using System; +using System.Diagnostics; +using System.Linq; using System.Text; namespace Microsoft.CodeAnalysis.Text @@ -64,9 +66,7 @@ public override char this[int position] } protected override TextLineCollection GetLinesCore() - { - return new SubTextLineInfo(this); - } + => new SubTextLineInfo(this); public override string ToString(TextSpan span) { @@ -101,7 +101,7 @@ private TextSpan GetCompositeSpan(int start, int length) private sealed class SubTextLineInfo : TextLineCollection { private readonly SubText _subText; - private readonly int _startLineNumber; + private readonly int _startLineNumberInUnderlyingText; private readonly int _lineCount; private readonly bool _startsWithinSplitCRLF; private readonly bool _endsWithinSplitCRLF; @@ -110,23 +110,27 @@ public SubTextLineInfo(SubText subText) { _subText = subText; - var startLine = _subText.UnderlyingText.Lines.GetLineFromPosition(_subText.UnderlyingSpan.Start); - var endLine = _subText.UnderlyingText.Lines.GetLineFromPosition(_subText.UnderlyingSpan.End); + var startLineInUnderlyingText = _subText.UnderlyingText.Lines.GetLineFromPosition(_subText.UnderlyingSpan.Start); + var endLineInUnderlyingText = _subText.UnderlyingText.Lines.GetLineFromPosition(_subText.UnderlyingSpan.End); - _startLineNumber = startLine.LineNumber; - _lineCount = (endLine.LineNumber - _startLineNumber) + 1; + _startLineNumberInUnderlyingText = startLineInUnderlyingText.LineNumber; + _lineCount = (endLineInUnderlyingText.LineNumber - _startLineNumberInUnderlyingText) + 1; if (_subText.UnderlyingSpan.Length > 0) { - if (_subText.UnderlyingSpan.Start == startLine.End + 1 && - _subText.UnderlyingSpan.Start == startLine.EndIncludingLineBreak - 1) + var underlyingSpanStart = _subText.UnderlyingSpan.Start; + if (underlyingSpanStart == startLineInUnderlyingText.End + 1 && + underlyingSpanStart == startLineInUnderlyingText.EndIncludingLineBreak - 1) { + Debug.Assert(_subText.UnderlyingText[underlyingSpanStart - 1] == '\r' && _subText.UnderlyingText[underlyingSpanStart] == '\n'); _startsWithinSplitCRLF = true; } - if (_subText.UnderlyingSpan.End == endLine.End + 1 && - _subText.UnderlyingSpan.End == endLine.EndIncludingLineBreak - 1) + var underlyingSpanEnd = _subText.UnderlyingSpan.End; + if (underlyingSpanEnd == endLineInUnderlyingText.End + 1 && + underlyingSpanEnd == endLineInUnderlyingText.EndIncludingLineBreak - 1) { + Debug.Assert(_subText.UnderlyingText[underlyingSpanEnd - 1] == '\r' && _subText.UnderlyingText[underlyingSpanEnd] == '\n'); _endsWithinSplitCRLF = true; // If this subtext ends in the middle of a CR/LF, then this object should view that CR as a separate line @@ -140,6 +144,11 @@ public override TextLine this[int lineNumber] { get { + if (lineNumber < 0 || lineNumber >= _lineCount) + { + throw new ArgumentOutOfRangeException(nameof(lineNumber)); + } + if (_endsWithinSplitCRLF && lineNumber == _lineCount - 1) { // Special case splitting the CRLF at the end as the UnderlyingText doesn't view the position @@ -148,22 +157,52 @@ public override TextLine this[int lineNumber] return TextLine.FromSpanUnsafe(_subText, new TextSpan(_subText.UnderlyingSpan.End, 0)); } - var underlyingTextLine = _subText.UnderlyingText.Lines[lineNumber + _startLineNumber]; - + var underlyingTextLine = _subText.UnderlyingText.Lines[lineNumber + _startLineNumberInUnderlyingText]; + + // Consider input "a\r\nb" where ST1 contains "\a\r" and ST2 contains "\n\b", and requested lineNumber + // per this table: + // ---------------------------------------------------------------------------------------------------------------- + // | SubText | lineNumber | underlyingTextLine | _subText | underlyingTextLine | _subText | + // | | | .Start | .UnderlyingSpan | .EndIncludingLineBreak | .UnderlyingSpan | + // | | | | .Start | | .End | + // |--------------------------------------------------------------------------------------------------------------- + // | ST1 | 0 | 0 | 0 | 3 | 2 | + // | ST2 | 0 | 0 | 2 | 3 | 4 | + // | ST2 | 1 | 3 | 2 | 4 | 4 | + // ---------------------------------------------------------------------------------------------------------------- var startInUnderlyingText = Math.Max(underlyingTextLine.Start, _subText.UnderlyingSpan.Start); var endInUnderlyingText = Math.Min(underlyingTextLine.EndIncludingLineBreak, _subText.UnderlyingSpan.End); var startInSubText = startInUnderlyingText - _subText.UnderlyingSpan.Start; var length = endInUnderlyingText - startInUnderlyingText; - return TextLine.FromSpanUnsafe(_subText, new TextSpan(startInSubText, length)); + var resultLine = TextLine.FromSpanUnsafe(_subText, new TextSpan(startInSubText, length)); + +#if DEBUG + // Assert resultLine only has line breaks in the appropriate locations + var shouldContainLineBreak = (lineNumber != _lineCount - 1); + var resultContainsLineBreak = resultLine.EndIncludingLineBreak > resultLine.End; + + Debug.Assert(shouldContainLineBreak == resultContainsLineBreak); + Debug.Assert(resultLine.ToString().All(static c => !TextUtilities.IsAnyLineBreakCharacter(c))); +#endif + + return resultLine; } } public override int Count => _lineCount; + /// + /// Determines the line number of a position in this SubText + /// public override int IndexOf(int position) { + if (position < 0 || position > _subText.UnderlyingSpan.Length) + { + throw new ArgumentOutOfRangeException(nameof(position)); + } + var underlyingPosition = position + _subText.UnderlyingSpan.Start; var underlyingLineNumber = _subText.UnderlyingText.Lines.IndexOf(underlyingPosition); @@ -173,7 +212,7 @@ public override int IndexOf(int position) underlyingLineNumber += 1; } - return underlyingLineNumber - _startLineNumber; + return underlyingLineNumber - _startLineNumberInUnderlyingText; } } } From a40219871f35e3a4f790243cdc3f46a77a3d1a5d Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Thu, 15 Aug 2024 12:22:09 -0700 Subject: [PATCH 11/15] a couple more suggestions by Cyrus --- .../Text/CompositeTextTests.cs | 2 ++ .../Core/Portable/Text/CompositeText.cs | 27 ++++++++++--------- src/Compilers/Core/Portable/Text/SubText.cs | 10 ++++--- 3 files changed, 23 insertions(+), 16 deletions(-) diff --git a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs index 1bf74861d7d0f..5be719081f2be 100644 --- a/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs +++ b/src/Compilers/Core/CodeAnalysisTest/Text/CompositeTextTests.cs @@ -25,6 +25,8 @@ public sealed class CompositeTextTests [InlineData(["ab\r\ncd\r\nef\r\n"])] [InlineData(["ab\r\r\ncd\r\r\nef"])] [InlineData(["ab\n\n\rcd\n\n\ref"])] + [InlineData(["ab\u0085cdef\u2028ijkl\u2029op"])] + [InlineData(["\u0085\u2028\u2029\u0085\u2028\u2029\u0085\u2028\u2029\u0085\u2028\u2029"])] public void CompositeTextLinesEqualSourceTextLinesPermutations(string contents) { // Please try to limit the inputs to this method to around 12 chars or less, as much longer than that diff --git a/src/Compilers/Core/Portable/Text/CompositeText.cs b/src/Compilers/Core/Portable/Text/CompositeText.cs index 4ce9f2082eeba..4a63a3ed19260 100644 --- a/src/Compilers/Core/Portable/Text/CompositeText.cs +++ b/src/Compilers/Core/Portable/Text/CompositeText.cs @@ -7,6 +7,7 @@ using System.Collections.Immutable; using System.Diagnostics; using System.Linq; +using System.Runtime.InteropServices; using System.Text; using Microsoft.CodeAnalysis.PooledObjects; using Roslyn.Utilities; @@ -404,13 +405,13 @@ private sealed class CompositeTextLineInfo : TextLineCollection public CompositeTextLineInfo(CompositeText compositeText) { - var segmentLineNumbers = ArrayBuilder.GetInstance(); + var segmentLineNumbers = new int[compositeText.Segments.Length]; var accumulatedLineCount = 0; Debug.Assert(compositeText.Segments.Length > 0); for (int i = 0; i < compositeText.Segments.Length; i++) { - segmentLineNumbers.Add(accumulatedLineCount); + segmentLineNumbers[i] = accumulatedLineCount; var segment = compositeText.Segments[i]; @@ -434,7 +435,7 @@ public CompositeTextLineInfo(CompositeText compositeText) } _compositeText = compositeText; - _segmentLineNumbers = segmentLineNumbers.ToImmutableAndFree(); + _segmentLineNumbers = ImmutableCollectionsMarshal.AsImmutableArray(segmentLineNumbers); // Add one to the accumulatedLineCount for our stored line count (so that we maintain the // invariant that a text's line count is one greater than the number of newlines it contains) @@ -472,6 +473,7 @@ public override TextLine this[int lineNumber] // Determine the indices for segments that contribute to our view of the requested line's contents GetSegmentIndexRangeContainingLine(lineNumber, out var firstSegmentIndexInclusive, out var lastSegmentIndexInclusive); + Debug.Assert(firstSegmentIndexInclusive <= lastSegmentIndexInclusive); var firstSegmentFirstLineNumber = _segmentLineNumbers[firstSegmentIndexInclusive]; var firstSegment = _compositeText.Segments[firstSegmentIndexInclusive]; @@ -481,7 +483,7 @@ public override TextLine this[int lineNumber] var lineLength = firstSegmentTextLine.SpanIncludingLineBreak.Length; // walk forward through segments between firstSegmentIndexInclusive and lastSegmentIndexInclusive, and add their - // view of the length of this line. + // view of the length of this line. This loop handles all segments between firstSegmentIndexInclusive and lastSegmentIndexInclusive. for (var nextSegmentIndex = firstSegmentIndexInclusive + 1; nextSegmentIndex < lastSegmentIndexInclusive; nextSegmentIndex++) { var nextSegment = _compositeText.Segments[nextSegmentIndex]; @@ -515,7 +517,7 @@ public override TextLine this[int lineNumber] private void GetSegmentIndexRangeContainingLine(int lineNumber, out int firstSegmentIndexInclusive, out int lastSegmentIndexInclusive) { - int idx = _segmentLineNumbers.BinarySearch(lineNumber); + var idx = _segmentLineNumbers.BinarySearch(lineNumber); var binarySearchSegmentIndex = idx >= 0 ? idx : (~idx - 1); // Walk backwards starting at binarySearchSegmentIndex to find the earliest segment index that intersects this line number @@ -528,16 +530,17 @@ private void GetSegmentIndexRangeContainingLine(int lineNumber, out int firstSeg } // No need to continue to the previous segment if either: - // 1) it ends in \n or - // 2) if ends in \r and the current segment doesn't start with \n + // 1) it ends in newline character that isn't \r or + // 2) it ends in \r and the current segment doesn't start with \n var previousSegment = _compositeText.Segments[firstSegmentIndexInclusive - 1]; var previousSegmentLastChar = previousSegment[^1]; - if (previousSegmentLastChar == '\n') - { - break; - } - else if (previousSegmentLastChar == '\r') + if (TextUtilities.IsAnyLineBreakCharacter(previousSegmentLastChar)) { + if (previousSegmentLastChar != '\r') + { + break; + } + var currentSegment = _compositeText.Segments[firstSegmentIndexInclusive]; if (currentSegment[0] != '\n') { diff --git a/src/Compilers/Core/Portable/Text/SubText.cs b/src/Compilers/Core/Portable/Text/SubText.cs index aa6c951de808c..c4848c92346b1 100644 --- a/src/Compilers/Core/Portable/Text/SubText.cs +++ b/src/Compilers/Core/Portable/Text/SubText.cs @@ -178,14 +178,16 @@ public override TextLine this[int lineNumber] var resultLine = TextLine.FromSpanUnsafe(_subText, new TextSpan(startInSubText, length)); -#if DEBUG - // Assert resultLine only has line breaks in the appropriate locations var shouldContainLineBreak = (lineNumber != _lineCount - 1); var resultContainsLineBreak = resultLine.EndIncludingLineBreak > resultLine.End; - Debug.Assert(shouldContainLineBreak == resultContainsLineBreak); + if (shouldContainLineBreak != resultContainsLineBreak) + { + throw new InvalidOperationException(); + } + + // Assert resultLine only has line breaks in the appropriate locations Debug.Assert(resultLine.ToString().All(static c => !TextUtilities.IsAnyLineBreakCharacter(c))); -#endif return resultLine; } From 0522300d9dddc094a4d058049b49f50ab404d500 Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Thu, 15 Aug 2024 12:25:39 -0700 Subject: [PATCH 12/15] Missed one requested assert --- src/Compilers/Core/Portable/Text/CompositeText.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/Compilers/Core/Portable/Text/CompositeText.cs b/src/Compilers/Core/Portable/Text/CompositeText.cs index 4a63a3ed19260..2771c867dcdd9 100644 --- a/src/Compilers/Core/Portable/Text/CompositeText.cs +++ b/src/Compilers/Core/Portable/Text/CompositeText.cs @@ -29,6 +29,7 @@ private CompositeText(ImmutableArray segments, Encoding? encoding, S : base(checksumAlgorithm: checksumAlgorithm) { Debug.Assert(!segments.IsDefaultOrEmpty); + Debug.Assert(segments.Length > 0); _segments = segments; _encoding = encoding; @@ -40,6 +41,7 @@ private CompositeText(ImmutableArray segments, Encoding? encoding, S for (int i = 0; i < _segmentOffsets.Length; i++) { _segmentOffsets[i] = offset; + Debug.Assert(_segments[i].Length > 0); offset += _segments[i].Length; } } From 2dae6b9e1e1a9435293f51d4eb1c1ac4503ddd3a Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Fri, 16 Aug 2024 07:21:07 -0700 Subject: [PATCH 13/15] Simplification in CompositeText's handling of split \r\n. Instead of handling in two different locations, just handle it in a single location up front. --- .../Core/Portable/Text/CompositeText.cs | 70 ++++++++++--------- 1 file changed, 38 insertions(+), 32 deletions(-) diff --git a/src/Compilers/Core/Portable/Text/CompositeText.cs b/src/Compilers/Core/Portable/Text/CompositeText.cs index 2771c867dcdd9..a2c511bcd3a80 100644 --- a/src/Compilers/Core/Portable/Text/CompositeText.cs +++ b/src/Compilers/Core/Portable/Text/CompositeText.cs @@ -187,11 +187,7 @@ internal static SourceText ToSourceText(ArrayBuilder segments, Sourc ReduceSegmentCountIfNecessary(segments); } - // If there are multiple segments, ensure they all have non-zero length. This simplifies the CompositeTextLineInfo logic. - if (segments.Count > 1) - { - segments.RemoveWhere(static (s, _, _) => s.Length == 0, default(VoidResult)); - } + RemoveSplitLineBreaksAndEmptySegments(segments); if (segments.Count == 0) { @@ -207,6 +203,38 @@ internal static SourceText ToSourceText(ArrayBuilder segments, Sourc } } + private static void RemoveSplitLineBreaksAndEmptySegments(ArrayBuilder segments) + { + if (segments.Count > 1) + { + // Remove empty segments before checking for split line breaks + segments.RemoveWhere(static (s, _, _) => s.Length == 0, default(VoidResult)); + + var splitLineBreakFound = false; + for (int i = 1; i < segments.Count; i++) + { + var prevSegment = segments[i - 1]; + var curSegment = segments[i]; + if (prevSegment.Length > 0 && prevSegment[^1] == '\r' && curSegment[0] == '\n') + { + splitLineBreakFound = true; + + segments[i - 1] = prevSegment.GetSubText(new TextSpan(0, prevSegment.Length - 1)); + segments.Insert(i, SourceText.From("\r\n")); + segments[i + 1] = curSegment.GetSubText(new TextSpan(1, curSegment.Length - 1)); + i++; + } + } + + if (splitLineBreakFound) + { + // If a split line break was present, ensure there aren't any empty lines again + // due to the sourcetexts created from the GetSubText calls. + segments.RemoveWhere(static (s, _, _) => s.Length == 0, default(VoidResult)); + } + } + } + // both of these numbers are currently arbitrary. internal const int TARGET_SEGMENT_COUNT_AFTER_REDUCTION = 32; internal const int MAXIMUM_SEGMENT_COUNT_BEFORE_REDUCTION = 64; @@ -421,19 +449,10 @@ public CompositeTextLineInfo(CompositeText compositeText) // views it's line count as one greater than the number of line breaks it contains. accumulatedLineCount += (segment.Lines.Count - 1); - // If "\r\n" is split amongst adjacent segments, reduce the line count by one as both segments - // would count their corresponding CR or LF as a newline. Debug.Assert(segment.Length > 0); - if (segment[^1] == '\r' && - i < compositeText.Segments.Length - 1) - { - var nextSegment = compositeText.Segments[i + 1]; - Debug.Assert(nextSegment.Length > 0); - if (nextSegment[0] == '\n') - { - accumulatedLineCount -= 1; - } - } + + // RemoveSplitLineBreaksAndEmptySegments ensured no split line breaks + Debug.Assert(i == compositeText.Segments.Length - 1 || segment[^1] != '\r' || compositeText.Segments[i + 1][0] != '\n'); } _compositeText = compositeText; @@ -531,28 +550,15 @@ private void GetSegmentIndexRangeContainingLine(int lineNumber, out int firstSeg break; } - // No need to continue to the previous segment if either: - // 1) it ends in newline character that isn't \r or - // 2) it ends in \r and the current segment doesn't start with \n + // No need to include the previous segment if it ends in a newline character var previousSegment = _compositeText.Segments[firstSegmentIndexInclusive - 1]; var previousSegmentLastChar = previousSegment[^1]; if (TextUtilities.IsAnyLineBreakCharacter(previousSegmentLastChar)) { - if (previousSegmentLastChar != '\r') - { - break; - } - - var currentSegment = _compositeText.Segments[firstSegmentIndexInclusive]; - if (currentSegment[0] != '\n') - { - break; - } + break; } } - // Determining the lastSegment is a simple walk as the _segmentLineNumbers was populated - // accounting for split "\r\n". for (lastSegmentIndexInclusive = binarySearchSegmentIndex; lastSegmentIndexInclusive < _compositeText.Segments.Length - 1; lastSegmentIndexInclusive++) { if (_segmentLineNumbers[lastSegmentIndexInclusive + 1] != lineNumber) From 97b40388206ceb60551303668a8681cb5f43bb76 Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Mon, 19 Aug 2024 11:34:52 -0700 Subject: [PATCH 14/15] Comment --- src/Compilers/Core/Portable/Text/CompositeText.cs | 4 ++-- src/Compilers/Core/Portable/Text/SubText.cs | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/Compilers/Core/Portable/Text/CompositeText.cs b/src/Compilers/Core/Portable/Text/CompositeText.cs index a2c511bcd3a80..b4fdc92beec36 100644 --- a/src/Compilers/Core/Portable/Text/CompositeText.cs +++ b/src/Compilers/Core/Portable/Text/CompositeText.cs @@ -423,7 +423,7 @@ private sealed class CompositeTextLineInfo : TextLineCollection /// /// The starting line number for the correspondingly indexed SourceTexts in _compositeText.Segments. /// Multiple consecutive entries could indicate the same line number if the corresponding - /// segments don't contain newline characters. + /// segments don't contain newline characters. /// /// /// This will be of the same length as _compositeText.Segments @@ -446,7 +446,7 @@ public CompositeTextLineInfo(CompositeText compositeText) var segment = compositeText.Segments[i]; // Account for this segments lines in our accumulated lines. Subtract one as each segment - // views it's line count as one greater than the number of line breaks it contains. + // views its line count as one greater than the number of line breaks it contains. accumulatedLineCount += (segment.Lines.Count - 1); Debug.Assert(segment.Length > 0); diff --git a/src/Compilers/Core/Portable/Text/SubText.cs b/src/Compilers/Core/Portable/Text/SubText.cs index c4848c92346b1..3725ec1223aee 100644 --- a/src/Compilers/Core/Portable/Text/SubText.cs +++ b/src/Compilers/Core/Portable/Text/SubText.cs @@ -170,12 +170,17 @@ public override TextLine this[int lineNumber] // | ST2 | 0 | 0 | 2 | 3 | 4 | // | ST2 | 1 | 3 | 2 | 4 | 4 | // ---------------------------------------------------------------------------------------------------------------- + + // These two variables represent this subtext's view on the start/end of the requested line, + // but in the coordinate space of _subText.UnderlyingText. var startInUnderlyingText = Math.Max(underlyingTextLine.Start, _subText.UnderlyingSpan.Start); var endInUnderlyingText = Math.Min(underlyingTextLine.EndIncludingLineBreak, _subText.UnderlyingSpan.End); + // This variable represent this subtext's view on start of the requested line, + // in it's coordinate space var startInSubText = startInUnderlyingText - _subText.UnderlyingSpan.Start; - var length = endInUnderlyingText - startInUnderlyingText; + var length = endInUnderlyingText - startInUnderlyingText; var resultLine = TextLine.FromSpanUnsafe(_subText, new TextSpan(startInSubText, length)); var shouldContainLineBreak = (lineNumber != _lineCount - 1); From 30f47eeda4d27e94bcc407647d76333beafc5f0a Mon Sep 17 00:00:00 2001 From: Todd Grunke Date: Wed, 21 Aug 2024 11:05:18 -0700 Subject: [PATCH 15/15] Update a comment and remove a condition that was preventing setting some data in the empty SubText.UnderlyingSpan case. --- src/Compilers/Core/Portable/Text/SubText.cs | 35 ++++++++++----------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/Compilers/Core/Portable/Text/SubText.cs b/src/Compilers/Core/Portable/Text/SubText.cs index 3725ec1223aee..e0935e8cebc0d 100644 --- a/src/Compilers/Core/Portable/Text/SubText.cs +++ b/src/Compilers/Core/Portable/Text/SubText.cs @@ -96,7 +96,7 @@ private TextSpan GetCompositeSpan(int start, int length) } /// - /// Delegates to the SubText's UnderlyingText to determine line information. + /// Delegates to the SubText's to determine line information. /// private sealed class SubTextLineInfo : TextLineCollection { @@ -116,27 +116,24 @@ public SubTextLineInfo(SubText subText) _startLineNumberInUnderlyingText = startLineInUnderlyingText.LineNumber; _lineCount = (endLineInUnderlyingText.LineNumber - _startLineNumberInUnderlyingText) + 1; - if (_subText.UnderlyingSpan.Length > 0) + var underlyingSpanStart = _subText.UnderlyingSpan.Start; + if (underlyingSpanStart == startLineInUnderlyingText.End + 1 && + underlyingSpanStart == startLineInUnderlyingText.EndIncludingLineBreak - 1) { - var underlyingSpanStart = _subText.UnderlyingSpan.Start; - if (underlyingSpanStart == startLineInUnderlyingText.End + 1 && - underlyingSpanStart == startLineInUnderlyingText.EndIncludingLineBreak - 1) - { - Debug.Assert(_subText.UnderlyingText[underlyingSpanStart - 1] == '\r' && _subText.UnderlyingText[underlyingSpanStart] == '\n'); - _startsWithinSplitCRLF = true; - } + Debug.Assert(_subText.UnderlyingText[underlyingSpanStart - 1] == '\r' && _subText.UnderlyingText[underlyingSpanStart] == '\n'); + _startsWithinSplitCRLF = true; + } - var underlyingSpanEnd = _subText.UnderlyingSpan.End; - if (underlyingSpanEnd == endLineInUnderlyingText.End + 1 && - underlyingSpanEnd == endLineInUnderlyingText.EndIncludingLineBreak - 1) - { - Debug.Assert(_subText.UnderlyingText[underlyingSpanEnd - 1] == '\r' && _subText.UnderlyingText[underlyingSpanEnd] == '\n'); - _endsWithinSplitCRLF = true; + var underlyingSpanEnd = _subText.UnderlyingSpan.End; + if (underlyingSpanEnd == endLineInUnderlyingText.End + 1 && + underlyingSpanEnd == endLineInUnderlyingText.EndIncludingLineBreak - 1) + { + Debug.Assert(_subText.UnderlyingText[underlyingSpanEnd - 1] == '\r' && _subText.UnderlyingText[underlyingSpanEnd] == '\n'); + _endsWithinSplitCRLF = true; - // If this subtext ends in the middle of a CR/LF, then this object should view that CR as a separate line - // whereas the UnderlyingText would not. - _lineCount += 1; - } + // If this subtext ends in the middle of a CR/LF, then this object should view that CR as a separate line + // whereas the UnderlyingText would not. + _lineCount += 1; } }