From 608e268d991d153a0a6e0e0c1a2cde01b7a5da36 Mon Sep 17 00:00:00 2001 From: Paul Irwin Date: Sat, 16 Nov 2024 09:46:57 -0700 Subject: [PATCH 1/4] Implement ISpanAppendable in CharBlockArray, ICharTermAttribute, and CharTermAttributeImpl --- .../Taxonomy/WriterCache/CharBlockArray.cs | 140 ++++++------------ .../TokenAttributes/CharTermAttribute.cs | 21 +-- .../TokenAttributes/CharTermAttributeImpl.cs | 43 +++--- 3 files changed, 81 insertions(+), 123 deletions(-) diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs index f64c7eab3d..ff06a371e5 100644 --- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs +++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs @@ -35,10 +35,11 @@ namespace Lucene.Net.Facet.Taxonomy.WriterCache /// @lucene.experimental /// // LUCENENET NOTE: The serialization features here are strictly for testing purposes, - // therefore it doesn't make any difference what type of serialization is used. - // To make things simpler, we are using BinaryReader and BinaryWriter since + // therefore it doesn't make any difference what type of serialization is used. + // To make things simpler, we are using BinaryReader and BinaryWriter since // BinaryFormatter is not implemented in .NET Standard 1.x. - internal class CharBlockArray : IAppendable, ICharSequence + internal class CharBlockArray : IAppendable, ICharSequence, + ISpanAppendable /* LUCENENET specific */ { private const long serialVersionUID = 1L; @@ -65,8 +66,6 @@ public object Clone() return clone; } - - // LUCENENET specific public void Serialize(Stream writer) { @@ -192,28 +191,8 @@ public virtual CharBlockArray Append(char[]? value) return this; // No-op } - int remain = value.Length; - int offset = 0; - while (remain > 0) - { - if (this.current.length == this.blockSize) - { - AddBlock(); - } - int toCopy = remain; - int remainingInBlock = this.blockSize - this.current.length; - if (remainingInBlock < toCopy) - { - toCopy = remainingInBlock; - } - Arrays.Copy(value, offset, this.current.chars, this.current.length, toCopy); - offset += toCopy; - remain -= toCopy; - this.current.length += toCopy; - } - - this.length += value.Length; - return this; + // LUCENENET specific - use ReadOnlySpan version + return Append(value.AsSpan()); } public virtual CharBlockArray Append(char[]? value, int startIndex, int length) @@ -235,29 +214,8 @@ public virtual CharBlockArray Append(char[]? value, int startIndex, int length) if (startIndex > value.Length - length) throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}."); - - int offset = startIndex; - int remain = length; - while (remain > 0) - { - if (this.current.length == this.blockSize) - { - AddBlock(); - } - int toCopy = remain; - int remainingInBlock = this.blockSize - this.current.length; - if (remainingInBlock < toCopy) - { - toCopy = remainingInBlock; - } - Arrays.Copy(value, offset, this.current.chars, this.current.length, toCopy); - offset += toCopy; - remain -= toCopy; - this.current.length += toCopy; - } - - this.length += length; - return this; + // LUCENENET specific - use ReadOnlySpan version + return Append(value.AsSpan(startIndex, length)); } public virtual CharBlockArray Append(string? value) @@ -267,28 +225,8 @@ public virtual CharBlockArray Append(string? value) return this; // No-op } - int remain = value.Length; - int offset = 0; - while (remain > 0) - { - if (this.current.length == this.blockSize) - { - AddBlock(); - } - int toCopy = remain; - int remainingInBlock = this.blockSize - this.current.length; - if (remainingInBlock < toCopy) - { - toCopy = remainingInBlock; - } - value.CopyTo(offset, this.current.chars, this.current.length, toCopy); - offset += toCopy; - remain -= toCopy; - this.current.length += toCopy; - } - - this.length += value.Length; - return this; + // LUCENENET specific - use ReadOnlySpan version + return Append(value.AsSpan()); } public virtual CharBlockArray Append(string? value, int startIndex, int length) @@ -310,29 +248,8 @@ public virtual CharBlockArray Append(string? value, int startIndex, int length) if (startIndex > value.Length - length) throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}."); - - int offset = startIndex; - int remain = length; - while (remain > 0) - { - if (this.current.length == this.blockSize) - { - AddBlock(); - } - int toCopy = remain; - int remainingInBlock = this.blockSize - this.current.length; - if (remainingInBlock < toCopy) - { - toCopy = remainingInBlock; - } - value.CopyTo(offset, this.current.chars, this.current.length, toCopy); - offset += toCopy; - remain -= toCopy; - this.current.length += toCopy; - } - - this.length += length; - return this; + // LUCENENET specific - use ReadOnlySpan version + return Append(value.AsSpan(startIndex, length)); } public virtual CharBlockArray Append(StringBuilder? value) @@ -409,6 +326,32 @@ public virtual CharBlockArray Append(StringBuilder? value, int startIndex, int l return this; } + public virtual CharBlockArray Append(ReadOnlySpan value) + { + int offset = 0; + int remain = value.Length; + while (remain > 0) + { + if (this.current.length == this.blockSize) + { + AddBlock(); + } + int toCopy = remain; + int remainingInBlock = this.blockSize - this.current.length; + if (remainingInBlock < toCopy) + { + toCopy = remainingInBlock; + } + value.Slice(offset, toCopy).CopyTo(this.current.chars.AsSpan(this.current.length)); + offset += toCopy; + remain -= toCopy; + this.current.length += toCopy; + } + + this.length += value.Length; + return this; + } + #nullable restore #region IAppendable Members @@ -431,6 +374,11 @@ public virtual CharBlockArray Append(StringBuilder? value, int startIndex, int l IAppendable IAppendable.Append(ICharSequence value, int startIndex, int count) => Append(value, startIndex, count); + #endregion + + #region ISpanAppendable Members + + ISpanAppendable ISpanAppendable.Append(ReadOnlySpan value) => Append(value); #endregion @@ -612,4 +560,4 @@ internal bool Equals(int startIndex, int length, ReadOnlySpan other) return true; } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs index e15b1517d8..f52620e398 100644 --- a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs +++ b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs @@ -26,7 +26,8 @@ namespace Lucene.Net.Analysis.TokenAttributes /// /// The term text of a . /// - public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable + public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable, + ISpanAppendable /* LUCENENET specific */ { /// /// Copies the contents of buffer, starting at offset for @@ -43,7 +44,7 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable /// you can then directly alter. If the array is too /// small for your token, use /// to increase it. After - /// altering the buffer be sure to call + /// altering the buffer be sure to call /// to record the number of valid /// characters that were placed into the termBuffer. /// @@ -76,15 +77,15 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable /// the termBuffer array. Use this to truncate the termBuffer /// or to synchronize with external manipulation of the termBuffer. /// Note: to grow the size of the array, - /// use first. - /// NOTE: This is exactly the same operation as calling the setter, the primary + /// use first. + /// NOTE: This is exactly the same operation as calling the setter, the primary /// difference is that this method returns a reference to the current object so it can be chained. /// /// obj.SetLength(30).Append("hey you"); /// /// /// the truncated length - ICharTermAttribute SetLength(int length); + ICharTermAttribute SetLength(int length); /// /// Sets the length of the termBuffer to zero. @@ -197,8 +198,8 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable /// /// The sequence of characters to append. /// - /// LUCENENET specific method, added because the .NET data type - /// doesn't implement . + /// LUCENENET specific method, added because the .NET data type + /// doesn't implement . /// new ICharTermAttribute Append(string value); @@ -228,8 +229,8 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable /// + is greater than the length of . /// /// - /// LUCENENET specific method, added because the .NET data type - /// doesn't implement . + /// LUCENENET specific method, added because the .NET data type + /// doesn't implement . /// new ICharTermAttribute Append(string value, int startIndex, int count); // LUCENENET TODO: API - change to startIndex/length to match .NET @@ -270,7 +271,7 @@ public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable /// + is greater than the length of . /// /// - /// LUCENENET specific method, added because the .NET data type + /// LUCENENET specific method, added because the .NET data type /// doesn't implement . /// new ICharTermAttribute Append(StringBuilder value, int startIndex, int count); diff --git a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs index bd04b96d0a..3440977eb4 100644 --- a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs +++ b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs @@ -34,7 +34,8 @@ namespace Lucene.Net.Analysis.TokenAttributes /// /// Default implementation of . - public class CharTermAttribute : Attribute, ICharTermAttribute, ITermToBytesRefAttribute, IAppendable // LUCENENET specific: Not implementing ICloneable per Microsoft's recommendation + public class CharTermAttribute : Attribute, ICharTermAttribute, ITermToBytesRefAttribute, IAppendable, // LUCENENET specific: Not implementing ICloneable per Microsoft's recommendation + ISpanAppendable /* LUCENENET specific */ { private const int MIN_BUFFER_SIZE = 10; @@ -85,7 +86,7 @@ public char[] ResizeBuffer(int newSize) { // Not big enough; create a new array with slight // over allocation and preserve content - + // LUCENENET: Resize rather than copy Array.Resize(ref termBuffer, ArrayUtil.Oversize(newSize, RamUsageEstimator.NUM_BYTES_CHAR)); } @@ -196,7 +197,6 @@ public ICharSequence Subsequence(int startIndex, int length) // *** Appendable interface *** - public CharTermAttribute Append(string value, int startIndex, int charCount) { // LUCENENET: Changed semantics to be the same as the StringBuilder in .NET @@ -216,10 +216,8 @@ public CharTermAttribute Append(string value, int startIndex, int charCount) if (startIndex > value.Length - charCount) throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(charCount)} <= {nameof(Length)}."); - value.CopyTo(startIndex, InternalResizeBuffer(termLength + charCount), termLength, charCount); - Length += charCount; - - return this; + // LUCENENET specific - use ReadOnlySpan version for better performance + return Append(value.AsSpan(startIndex, charCount)); } public CharTermAttribute Append(char value) @@ -234,11 +232,8 @@ public CharTermAttribute Append(char[] value) //return AppendNull(); return this; // No-op - int len = value.Length; - value.CopyTo(InternalResizeBuffer(termLength + len), termLength); - Length += len; - - return this; + // LUCENENET specific - use ReadOnlySpan version for better performance + return Append(value.AsSpan()); } public CharTermAttribute Append(char[] value, int startIndex, int charCount) @@ -260,10 +255,8 @@ public CharTermAttribute Append(char[] value, int startIndex, int charCount) if (startIndex > value.Length - charCount) throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(charCount)} <= {nameof(Length)}."); - Arrays.Copy(value, startIndex, InternalResizeBuffer(termLength + charCount), termLength, charCount); - Length += charCount; - - return this; + // LUCENENET specific - use ReadOnlySpan version for better performance + return Append(value.AsSpan(startIndex, charCount)); } public CharTermAttribute Append(string value) @@ -358,6 +351,17 @@ public CharTermAttribute Append(ICharSequence value, int startIndex, int charCou return this; } + public CharTermAttribute Append(ReadOnlySpan value) + { + if (value.Length == 0) + return this; + + value.CopyTo(InternalResizeBuffer(termLength + value.Length).AsSpan(termLength)); + Length += value.Length; + + return this; + } + private char[] InternalResizeBuffer(int length) { if (termBuffer.Length < length) @@ -524,7 +528,12 @@ public override void CopyTo(IAttribute target) // LUCENENET specific - intention IAppendable IAppendable.Append(ICharSequence value, int startIndex, int count) => Append(value, startIndex, count); + #endregion + + #region ISpanAppendable Members + + ISpanAppendable ISpanAppendable.Append(ReadOnlySpan value) => Append(value); #endregion } -} \ No newline at end of file +} From 50b670297cce12f346f2c1064bf403dfb4f59ebc Mon Sep 17 00:00:00 2001 From: Paul Irwin Date: Sat, 16 Nov 2024 17:39:04 -0700 Subject: [PATCH 2/4] Add tests for ReadOnlySpan overloads of CharBlockArray.Append and CharTermAttributeImpl.Append --- .../Taxonomy/WriterCache/TestCharBlockArray.cs | 18 ++++++++++++++---- .../TestCharTermAttributeImpl.cs | 8 ++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs b/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs index 8b2c667714..b30010a64c 100644 --- a/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs +++ b/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs @@ -44,9 +44,9 @@ public virtual void TestArray() // CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() // .onUnmappableCharacter(CodingErrorAction.REPLACE) // .onMalformedInput(CodingErrorAction.REPLACE); - // - // Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, - // new EncoderReplacementFallback("?"), + // + // Encoding decoder = Encoding.GetEncoding(Encoding.UTF8.CodePage, + // new EncoderReplacementFallback("?"), // new DecoderReplacementFallback("?")); for (int i = 0; i < n; i++) @@ -247,6 +247,11 @@ public virtual void TestAppendableInterface() expected = t.ToString(); t.Append((char[])null); // No-op Assert.AreEqual(expected, t.ToString()); + + // LUCENENET specific - test ReadOnlySpan overload + t = new CharBlockArray(); + t.Append("12345678".AsSpan()); + Assert.AreEqual("12345678", t.ToString()); } // LUCENENET: Borrowed this test from TestCharTermAttributeImpl @@ -285,6 +290,11 @@ public virtual void TestAppendableInterfaceWithLongSequences() const string longTestString = "012345678901234567890123456789"; t.Append(new CharSequenceAnonymousClass(longTestString)); Assert.AreEqual("4567890123456" + longTestString, t.ToString()); + + // LUCENENET specific - test ReadOnlySpan overload + t = new CharBlockArray(); + t.Append("01234567890123456789012345678901234567890123456789".AsSpan()); + Assert.AreEqual("01234567890123456789012345678901234567890123456789", t.ToString()); } private sealed class CharSequenceAnonymousClass : ICharSequence @@ -319,4 +329,4 @@ public override string ToString() } } } -} \ No newline at end of file +} diff --git a/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs b/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs index c7fe4d28d1..bcd8345e12 100644 --- a/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs +++ b/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs @@ -292,6 +292,10 @@ public virtual void TestAppendableInterface() t.Append((char[])null); // No-op Assert.AreEqual("4teste", t.ToString()); + + // LUCENENET specific - test ReadOnlySpan overload + t.SetEmpty().Append("12345678".AsSpan()); + Assert.AreEqual("12345678", t.ToString()); } [Test] @@ -326,6 +330,10 @@ public virtual void TestAppendableInterfaceWithLongSequences() const string longTestString = "012345678901234567890123456789"; t.Append(new CharSequenceAnonymousClass(longTestString)); Assert.AreEqual("4567890123456" + longTestString, t.ToString()); + + // LUCENENET specific - test ReadOnlySpan overload + t.SetEmpty().Append("01234567890123456789012345678901234567890123456789".AsSpan()); + Assert.AreEqual("01234567890123456789012345678901234567890123456789", t.ToString()); } private sealed class CharSequenceAnonymousClass : ICharSequence From 1a8b7837d4e72bae1b0c7e9c73de5d9b564c0723 Mon Sep 17 00:00:00 2001 From: Paul Irwin Date: Tue, 19 Nov 2024 13:39:12 -0700 Subject: [PATCH 3/4] PR feedback --- .../Taxonomy/WriterCache/CharBlockArray.cs | 96 +++++++++++++++++-- .../TestCharTermAttributeImpl.cs | 26 ++++- .../TokenAttributes/CharTermAttribute.cs | 3 +- .../TokenAttributes/CharTermAttributeImpl.cs | 19 ++-- 4 files changed, 123 insertions(+), 21 deletions(-) diff --git a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs index ff06a371e5..7a253a0bd7 100644 --- a/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs +++ b/src/Lucene.Net.Facet/Taxonomy/WriterCache/CharBlockArray.cs @@ -191,8 +191,28 @@ public virtual CharBlockArray Append(char[]? value) return this; // No-op } - // LUCENENET specific - use ReadOnlySpan version - return Append(value.AsSpan()); + int remain = value.Length; + int offset = 0; + while (remain > 0) + { + if (this.current.length == this.blockSize) + { + AddBlock(); + } + int toCopy = remain; + int remainingInBlock = this.blockSize - this.current.length; + if (remainingInBlock < toCopy) + { + toCopy = remainingInBlock; + } + Arrays.Copy(value, offset, this.current.chars, this.current.length, toCopy); + offset += toCopy; + remain -= toCopy; + this.current.length += toCopy; + } + + this.length += value.Length; + return this; } public virtual CharBlockArray Append(char[]? value, int startIndex, int length) @@ -214,8 +234,28 @@ public virtual CharBlockArray Append(char[]? value, int startIndex, int length) if (startIndex > value.Length - length) throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}."); - // LUCENENET specific - use ReadOnlySpan version - return Append(value.AsSpan(startIndex, length)); + int offset = startIndex; + int remain = length; + while (remain > 0) + { + if (this.current.length == this.blockSize) + { + AddBlock(); + } + int toCopy = remain; + int remainingInBlock = this.blockSize - this.current.length; + if (remainingInBlock < toCopy) + { + toCopy = remainingInBlock; + } + Arrays.Copy(value, offset, this.current.chars, this.current.length, toCopy); + offset += toCopy; + remain -= toCopy; + this.current.length += toCopy; + } + + this.length += length; + return this; } public virtual CharBlockArray Append(string? value) @@ -225,8 +265,28 @@ public virtual CharBlockArray Append(string? value) return this; // No-op } - // LUCENENET specific - use ReadOnlySpan version - return Append(value.AsSpan()); + int remain = value.Length; + int offset = 0; + while (remain > 0) + { + if (this.current.length == this.blockSize) + { + AddBlock(); + } + int toCopy = remain; + int remainingInBlock = this.blockSize - this.current.length; + if (remainingInBlock < toCopy) + { + toCopy = remainingInBlock; + } + value.CopyTo(offset, this.current.chars, this.current.length, toCopy); + offset += toCopy; + remain -= toCopy; + this.current.length += toCopy; + } + + this.length += value.Length; + return this; } public virtual CharBlockArray Append(string? value, int startIndex, int length) @@ -248,8 +308,28 @@ public virtual CharBlockArray Append(string? value, int startIndex, int length) if (startIndex > value.Length - length) throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(length)} <= {nameof(Length)}."); - // LUCENENET specific - use ReadOnlySpan version - return Append(value.AsSpan(startIndex, length)); + int offset = startIndex; + int remain = length; + while (remain > 0) + { + if (this.current.length == this.blockSize) + { + AddBlock(); + } + int toCopy = remain; + int remainingInBlock = this.blockSize - this.current.length; + if (remainingInBlock < toCopy) + { + toCopy = remainingInBlock; + } + value.CopyTo(offset, this.current.chars, this.current.length, toCopy); + offset += toCopy; + remain -= toCopy; + this.current.length += toCopy; + } + + this.length += length; + return this; } public virtual CharBlockArray Append(StringBuilder? value) diff --git a/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs b/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs index bcd8345e12..2df8210f4b 100644 --- a/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs +++ b/src/Lucene.Net.Tests/Analysis/TokenAttributes/TestCharTermAttributeImpl.cs @@ -1,5 +1,6 @@ using J2N.IO; using J2N.Text; +using Lucene.Net.Attributes; using NUnit.Framework; using System; using System.Collections.Generic; @@ -292,10 +293,6 @@ public virtual void TestAppendableInterface() t.Append((char[])null); // No-op Assert.AreEqual("4teste", t.ToString()); - - // LUCENENET specific - test ReadOnlySpan overload - t.SetEmpty().Append("12345678".AsSpan()); - Assert.AreEqual("12345678", t.ToString()); } [Test] @@ -330,10 +327,29 @@ public virtual void TestAppendableInterfaceWithLongSequences() const string longTestString = "012345678901234567890123456789"; t.Append(new CharSequenceAnonymousClass(longTestString)); Assert.AreEqual("4567890123456" + longTestString, t.ToString()); + } + + [Test] + [LuceneNetSpecific] + public virtual void TestSpanAppendableInterface() + { + CharTermAttribute t = new CharTermAttribute(); + + // Test with a span + t.Append("12345678".AsSpan()); + Assert.AreEqual("12345678", t.ToString()); - // LUCENENET specific - test ReadOnlySpan overload + // test with a span slice + t.Append("0123456789".AsSpan(3, 5 - 3)); + Assert.AreEqual("1234567834", t.ToString()); + + // test with a long span t.SetEmpty().Append("01234567890123456789012345678901234567890123456789".AsSpan()); Assert.AreEqual("01234567890123456789012345678901234567890123456789", t.ToString()); + + // test with a long span slice + t.Append("01234567890123456789012345678901234567890123456789".AsSpan(3, 50 - 3)); + Assert.AreEqual("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789", t.ToString()); } private sealed class CharSequenceAnonymousClass : ICharSequence diff --git a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs index f52620e398..fa3cede774 100644 --- a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs +++ b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttribute.cs @@ -26,8 +26,7 @@ namespace Lucene.Net.Analysis.TokenAttributes /// /// The term text of a . /// - public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable, - ISpanAppendable /* LUCENENET specific */ + public interface ICharTermAttribute : IAttribute, ICharSequence, IAppendable { /// /// Copies the contents of buffer, starting at offset for diff --git a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs index 3440977eb4..8ab9aca194 100644 --- a/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs +++ b/src/Lucene.Net/Analysis/TokenAttributes/CharTermAttributeImpl.cs @@ -216,8 +216,10 @@ public CharTermAttribute Append(string value, int startIndex, int charCount) if (startIndex > value.Length - charCount) throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(charCount)} <= {nameof(Length)}."); - // LUCENENET specific - use ReadOnlySpan version for better performance - return Append(value.AsSpan(startIndex, charCount)); + value.CopyTo(startIndex, InternalResizeBuffer(termLength + charCount), termLength, charCount); + Length += charCount; + + return this; } public CharTermAttribute Append(char value) @@ -232,8 +234,11 @@ public CharTermAttribute Append(char[] value) //return AppendNull(); return this; // No-op - // LUCENENET specific - use ReadOnlySpan version for better performance - return Append(value.AsSpan()); + int len = value.Length; + value.CopyTo(InternalResizeBuffer(termLength + len), termLength); + Length += len; + + return this; } public CharTermAttribute Append(char[] value, int startIndex, int charCount) @@ -255,8 +260,10 @@ public CharTermAttribute Append(char[] value, int startIndex, int charCount) if (startIndex > value.Length - charCount) throw new ArgumentOutOfRangeException(nameof(startIndex), $"Index and length must refer to a location within the string. For example {nameof(startIndex)} + {nameof(charCount)} <= {nameof(Length)}."); - // LUCENENET specific - use ReadOnlySpan version for better performance - return Append(value.AsSpan(startIndex, charCount)); + Arrays.Copy(value, startIndex, InternalResizeBuffer(termLength + charCount), termLength, charCount); + Length += charCount; + + return this; } public CharTermAttribute Append(string value) From 25ad8d436c96190b257dc892bbbd773016a95f96 Mon Sep 17 00:00:00 2001 From: Paul Irwin Date: Tue, 19 Nov 2024 13:42:59 -0700 Subject: [PATCH 4/4] Split out span appendable test for CharBlockArray --- .../WriterCache/TestCharBlockArray.cs | 26 ++++++++++++++----- 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs b/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs index b30010a64c..295b01afce 100644 --- a/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs +++ b/src/Lucene.Net.Tests.Facet/Taxonomy/WriterCache/TestCharBlockArray.cs @@ -247,11 +247,6 @@ public virtual void TestAppendableInterface() expected = t.ToString(); t.Append((char[])null); // No-op Assert.AreEqual(expected, t.ToString()); - - // LUCENENET specific - test ReadOnlySpan overload - t = new CharBlockArray(); - t.Append("12345678".AsSpan()); - Assert.AreEqual("12345678", t.ToString()); } // LUCENENET: Borrowed this test from TestCharTermAttributeImpl @@ -290,11 +285,30 @@ public virtual void TestAppendableInterfaceWithLongSequences() const string longTestString = "012345678901234567890123456789"; t.Append(new CharSequenceAnonymousClass(longTestString)); Assert.AreEqual("4567890123456" + longTestString, t.ToString()); + } - // LUCENENET specific - test ReadOnlySpan overload + [Test] + [LuceneNetSpecific] + public virtual void TestSpanAppendableInterface() + { + CharBlockArray t = new CharBlockArray(); + + // Test with a span + t.Append("12345678".AsSpan()); + Assert.AreEqual("12345678", t.ToString()); + + // test with a span slice + t.Append("0123456789".AsSpan(3, 5 - 3)); + Assert.AreEqual("1234567834", t.ToString()); + + // test with a long span t = new CharBlockArray(); t.Append("01234567890123456789012345678901234567890123456789".AsSpan()); Assert.AreEqual("01234567890123456789012345678901234567890123456789", t.ToString()); + + // test with a long span slice + t.Append("01234567890123456789012345678901234567890123456789".AsSpan(3, 50 - 3)); + Assert.AreEqual("0123456789012345678901234567890123456789012345678934567890123456789012345678901234567890123456789", t.ToString()); } private sealed class CharSequenceAnonymousClass : ICharSequence