diff --git a/src/buffer/out/Row.cpp b/src/buffer/out/Row.cpp
index 7729cdcdbd4..4722dc0dfc4 100644
--- a/src/buffer/out/Row.cpp
+++ b/src/buffer/out/Row.cpp
@@ -92,47 +92,35 @@ CharToColumnMapper::CharToColumnMapper(const wchar_t* chars, const uint16_t* cha
// If given a position (`offset`) inside the ROW's text, this function will return the corresponding column.
// This function in particular returns the glyph's first column.
-til::CoordType CharToColumnMapper::GetLeadingColumnAt(ptrdiff_t offset) noexcept
+til::CoordType CharToColumnMapper::GetLeadingColumnAt(ptrdiff_t targetOffset) noexcept
{
- offset = clamp(offset, 0, _lastCharOffset);
+ targetOffset = clamp(targetOffset, 0, _lastCharOffset);
+
+ // This code needs to fulfill two conditions on top of the obvious (a forward/backward search):
+ // A: We never want to stop on a column that is marked with CharOffsetsTrailer (= "GetLeadingColumn").
+ // B: With these parameters we always want to stop at currentOffset=4:
+ // _charOffsets={4, 6}
+ // currentOffset=4 *OR* 6
+ // targetOffset=5
+ // This is because we're being asked for a "LeadingColumn", while the caller gave us the offset of a
+ // trailing surrogate pair or similar. Returning the column of the leading half is the correct choice.
auto col = _currentColumn;
- const auto currentOffset = _charOffsets[col] & CharOffsetsMask;
+ auto currentOffset = _charOffsets[col];
- // Goal: Move the _currentColumn cursor to a cell which contains the given target offset.
- // Depending on where the target offset is we have to either search forward or backward.
- if (offset < currentOffset)
+ // A plain forward-search until we find our targetOffset.
+ // This loop may iterate too far and thus violate our example in condition B, however...
+ while (targetOffset > (currentOffset & CharOffsetsMask))
{
- // Backward search.
- // Goal: Find the first preceding column where the offset is <= the target offset. This results in the first
- // cell that contains our target offset, even if that offset is in the middle of a long grapheme.
- //
- // We abuse the fact that the trailing half of wide glyphs is marked with CharOffsetsTrailer to our advantage.
- // Since they're >0x8000, the `offset < _charOffsets[col]` check will always be true and ensure we iterate over them.
- //
- // Since _charOffsets cannot contain negative values and because offset has been
- // clamped to be positive we naturally exit when reaching the first column.
- for (; offset < _charOffsets[col - 1]; --col)
- {
- }
+ currentOffset = _charOffsets[++col];
}
- else if (offset > currentOffset)
+ // This backward-search is not just a counter-part to the above, but simultaneously also handles conditions A and B.
+ // It abuses the fact that columns marked with CharOffsetsTrailer are >0x8000 and targetOffset is always <0x8000.
+ // This means we skip all "trailer" columns when iterating backwards, and only stop on a non-trailer (= condition A).
+ // Condition B is fixed simply because we iterate backwards after the forward-search (in that exact order).
+ while (targetOffset < currentOffset)
{
- // Forward search.
- // Goal: Find the first subsequent column where the offset is > the target offset.
- // We stop 1 column before that however so that the next loop works correctly.
- // It's the inverse of the loop above.
- //
- // Since offset has been clamped to be at most 1 less than the maximum
- // _charOffsets value the loop naturally exits before hitting the end.
- for (; offset >= (_charOffsets[col + 1] & CharOffsetsMask); ++col)
- {
- }
- // Now that we found the cell that definitely includes this char offset,
- // we have to iterate back to the cell's starting column.
- for (; WI_IsFlagSet(_charOffsets[col], CharOffsetsTrailer); --col)
- {
- }
+ currentOffset = _charOffsets[--col];
}
_currentColumn = col;
diff --git a/src/buffer/out/Row.hpp b/src/buffer/out/Row.hpp
index af8088c3ccd..197343df6d8 100644
--- a/src/buffer/out/Row.hpp
+++ b/src/buffer/out/Row.hpp
@@ -71,7 +71,7 @@ struct CharToColumnMapper
{
CharToColumnMapper(const wchar_t* chars, const uint16_t* charOffsets, ptrdiff_t lastCharOffset, til::CoordType currentColumn) noexcept;
- til::CoordType GetLeadingColumnAt(ptrdiff_t offset) noexcept;
+ til::CoordType GetLeadingColumnAt(ptrdiff_t targetOffset) noexcept;
til::CoordType GetTrailingColumnAt(ptrdiff_t offset) noexcept;
til::CoordType GetLeadingColumnAt(const wchar_t* str) noexcept;
til::CoordType GetTrailingColumnAt(const wchar_t* str) noexcept;
diff --git a/src/buffer/out/ut_textbuffer/TextBuffer.Unit.Tests.vcxproj b/src/buffer/out/ut_textbuffer/TextBuffer.Unit.Tests.vcxproj
index ad3b48c7cb5..c52d012baec 100644
--- a/src/buffer/out/ut_textbuffer/TextBuffer.Unit.Tests.vcxproj
+++ b/src/buffer/out/ut_textbuffer/TextBuffer.Unit.Tests.vcxproj
@@ -14,6 +14,7 @@
+
Create
@@ -41,4 +42,4 @@
-
\ No newline at end of file
+
diff --git a/src/buffer/out/ut_textbuffer/UTextAdapterTests.cpp b/src/buffer/out/ut_textbuffer/UTextAdapterTests.cpp
new file mode 100644
index 00000000000..ee879f4f677
--- /dev/null
+++ b/src/buffer/out/ut_textbuffer/UTextAdapterTests.cpp
@@ -0,0 +1,63 @@
+// Copyright (c) Microsoft Corporation.
+// Licensed under the MIT license.
+
+#include "precomp.h"
+
+#include "WexTestClass.h"
+#include "../textBuffer.hpp"
+#include "../../renderer/inc/DummyRenderer.hpp"
+
+template<>
+class WEX::TestExecution::VerifyOutputTraits>
+{
+public:
+ static WEX::Common::NoThrowString ToString(const std::vector& vec)
+ {
+ WEX::Common::NoThrowString str;
+ str.Append(L"{ ");
+ for (size_t i = 0; i < vec.size(); ++i)
+ {
+ const auto& s = vec[i];
+ if (i != 0)
+ {
+ str.Append(L", ");
+ }
+ str.AppendFormat(L"{(%d, %d), (%d, %d)}", s.start.x, s.start.y, s.end.x, s.end.y);
+ }
+ str.Append(L" }");
+ return str;
+ }
+};
+
+class UTextAdapterTests
+{
+ TEST_CLASS(UTextAdapterTests);
+
+ TEST_METHOD(Unicode)
+ {
+ DummyRenderer renderer;
+ TextBuffer buffer{ til::size{ 24, 1 }, TextAttribute{}, 0, false, renderer };
+
+ RowWriteState state{
+ .text = L"abc πΆπ·πΈ abc γγ³γ‘γγ",
+ };
+ buffer.Write(0, TextAttribute{}, state);
+ VERIFY_IS_TRUE(state.text.empty());
+
+ static constexpr auto s = [](til::CoordType beg, til::CoordType end) -> til::point_span {
+ return { { beg, 0 }, { end, 0 } };
+ };
+
+ auto expected = std::vector{ s(0, 2), s(8, 10) };
+ auto actual = buffer.SearchText(L"abc", false);
+ VERIFY_ARE_EQUAL(expected, actual);
+
+ expected = std::vector{ s(5, 5) };
+ actual = buffer.SearchText(L"π·", false);
+ VERIFY_ARE_EQUAL(expected, actual);
+
+ expected = std::vector{ s(12, 15) };
+ actual = buffer.SearchText(L"γγ³", false);
+ VERIFY_ARE_EQUAL(expected, actual);
+ }
+};
diff --git a/src/buffer/out/ut_textbuffer/sources b/src/buffer/out/ut_textbuffer/sources
index 570467fce63..842246aaee6 100644
--- a/src/buffer/out/ut_textbuffer/sources
+++ b/src/buffer/out/ut_textbuffer/sources
@@ -17,6 +17,7 @@ SOURCES = \
ReflowTests.cpp \
TextColorTests.cpp \
TextAttributeTests.cpp \
+ UTextAdapterTests.cpp \
DefaultResource.rc \
TARGETLIBS = \
diff --git a/src/inc/til/point.h b/src/inc/til/point.h
index 332b6fd8e0b..4da8d5c6ddb 100644
--- a/src/inc/til/point.h
+++ b/src/inc/til/point.h
@@ -276,6 +276,18 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
{
til::point start;
til::point end;
+
+ constexpr bool operator==(const point_span& rhs) const noexcept
+ {
+ // `__builtin_memcmp` isn't an official standard, but it's the
+ // only way at the time of writing to get a constexpr `memcmp`.
+ return __builtin_memcmp(this, &rhs, sizeof(rhs)) == 0;
+ }
+
+ constexpr bool operator!=(const point_span& rhs) const noexcept
+ {
+ return __builtin_memcmp(this, &rhs, sizeof(rhs)) != 0;
+ }
};
}