Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add an efficient text stream write function #14821

Merged
merged 18 commits into from
Mar 24, 2023
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
375 changes: 311 additions & 64 deletions src/buffer/out/Row.cpp

Large diffs are not rendered by default.

67 changes: 64 additions & 3 deletions src/buffer/out/Row.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ Revision History:

#pragma once

#include <span>

#include <til/rle.h>

#include "LineRendition.hpp"
Expand All @@ -37,6 +35,34 @@ enum class DelimiterClass
RegularChar
};

struct RowTextIterator
{
RowTextIterator(std::span<const wchar_t> chars, std::span<const uint16_t> charOffsets, uint16_t offset) noexcept;

bool operator==(const RowTextIterator& other) const noexcept;
RowTextIterator& operator++() noexcept;
const RowTextIterator& operator*() const noexcept;

std::wstring_view Text() const noexcept;
til::CoordType Cols() const noexcept;
DbcsAttribute DbcsAttr() const noexcept;

private:
uint16_t _uncheckedCharOffset(size_t col) const noexcept;
bool _uncheckedIsTrailer(size_t col) const noexcept;

// To simplify the detection of wide glyphs, we don't just store the simple character offset as described
// for _charOffsets. Instead we use the most significant bit to indicate whether any column is the
// trailing half of a wide glyph. This simplifies many implementation details via _uncheckedIsTrailer.
static constexpr uint16_t CharOffsetsTrailer = 0x8000;
static constexpr uint16_t CharOffsetsMask = 0x7fff;

std::span<const wchar_t> _chars;
std::span<const uint16_t> _charOffsets;
uint16_t _beg;
uint16_t _end;
};

class ROW final
{
public:
Expand All @@ -57,17 +83,25 @@ class ROW final
bool WasDoubleBytePadded() const noexcept;
void SetLineRendition(const LineRendition lineRendition) noexcept;
LineRendition GetLineRendition() const noexcept;
RowTextIterator Begin() const noexcept;
RowTextIterator End() const noexcept;

void Reset(const TextAttribute& attr);
void Resize(wchar_t* charsBuffer, uint16_t* charOffsetsBuffer, uint16_t rowWidth, const TextAttribute& fillAttribute);
void TransferAttributes(const til::small_rle<TextAttribute, uint16_t, 1>& attr, til::CoordType newWidth);

til::CoordType NavigateToPrevious(til::CoordType column) const noexcept;
til::CoordType NavigateToNext(til::CoordType column) const noexcept;

void ClearCell(til::CoordType column);
OutputCellIterator WriteCells(OutputCellIterator it, til::CoordType columnBegin, std::optional<bool> wrap = std::nullopt, std::optional<til::CoordType> limitRight = std::nullopt);
bool SetAttrToEnd(til::CoordType columnBegin, TextAttribute attr);
void ReplaceAttributes(til::CoordType beginIndex, til::CoordType endIndex, const TextAttribute& newAttr);
void ReplaceCharacters(til::CoordType columnBegin, til::CoordType width, const std::wstring_view& chars);
til::CoordType Write(til::CoordType columnBegin, til::CoordType columnLimit, std::wstring_view& chars);
til::CoordType CopyRangeFrom(til::CoordType columnBegin, til::CoordType columnLimit, const ROW& other, til::CoordType& otherBegin, til::CoordType otherLimit);

til::small_rle<TextAttribute, uint16_t, 1>& Attributes() noexcept;
const til::small_rle<TextAttribute, uint16_t, 1>& Attributes() const noexcept;
TextAttribute GetAttrByColumn(til::CoordType column) const;
std::vector<uint16_t> GetHyperlinks() const;
Expand All @@ -89,6 +123,30 @@ class ROW final
#endif

private:
// WriteHelper exists because other forms of abstracting this functionality away (like templates with lambdas)
// where only very poorly optimized by MSVC as it failed to inline the templates.
struct WriteHelper
{
explicit WriteHelper(ROW& row, til::CoordType columnBegin, til::CoordType columnLimit, const std::wstring_view& chars) noexcept;
bool IsValid() const noexcept;
void ReplaceCharacters(til::CoordType width) noexcept;
void Write() noexcept;
void CopyRangeFrom(const std::span<const uint16_t>& charOffsets) noexcept;
void Finish();

ROW& row;
const std::wstring_view& chars;
uint16_t colBeg;
uint16_t colLimit;
uint16_t chExtBeg;
uint16_t colExtBeg;
uint16_t leadingSpaces;
uint16_t chBeg;
uint16_t colEnd;
uint16_t colExtEnd;
size_t charsConsumed;
};

// To simplify the detection of wide glyphs, we don't just store the simple character offset as described
// for _charOffsets. Instead we use the most significant bit to indicate whether any column is the
// trailing half of a wide glyph. This simplifies many implementation details via _uncheckedIsTrailer.
Expand All @@ -102,13 +160,16 @@ class ROW final
template<typename T>
constexpr uint16_t _clampedColumnInclusive(T v) const noexcept;

uint16_t _adjustBackward(uint16_t column) const noexcept;
uint16_t _adjustForward(uint16_t column) const noexcept;

wchar_t _uncheckedChar(size_t off) const noexcept;
uint16_t _charSize() const noexcept;
uint16_t _uncheckedCharOffset(size_t col) const noexcept;
bool _uncheckedIsTrailer(size_t col) const noexcept;

void _init() noexcept;
void _resizeChars(uint16_t colExtEnd, uint16_t chExtBeg, uint16_t chExtEnd, size_t chExtEndNew);
void _resizeChars(uint16_t colExtEnd, uint16_t chExtBeg, uint16_t chExtEndOld, size_t chExtEndNew);

// These fields are a bit "wasteful", but it makes all this a bit more robust against
// programming errors during initial development (which is when this comment was written).
Expand Down
19 changes: 19 additions & 0 deletions src/buffer/out/textBuffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,25 @@ bool TextBuffer::_PrepareForDoubleByteSequence(const DbcsAttribute dbcsAttribute
return fSuccess;
}

void TextBuffer::ConsumeGrapheme(std::wstring_view& chars) noexcept
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

kinda weird that this is a static on textbuffer when it just does a til::utf16_pop, but presumably this does more in the future?

Copy link
Member Author

@lhecker lhecker Mar 24, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the future I'll replace this function with some ICU code that actually advances the string by an entire grapheme cluster. So, if you have something like å ("a˚") it'll advance by 2 code points and not just 1 like it does now.

It's possible to just put the ICU code into til and use it throughout the code directly, just like we do it right now with the UTF-16 helpers. But from now on, I'd like to avoid doing that, even if it means writing such static methods, because I'd like to keep everything string handling related as close and tight as possible in the future. I think OutputCellIterator had the same intention originally and was well meant, but it suffers from being a leaky abstraction. Lots of code is now built around an implicit assumption that OutputCellIterator will always advance by exactly 1 or 2 columns. Using the til UTF-16 helpers directly elsewhere in our code would have a similar effect in my opinion, because it would equally leak (and potentially incorrectly leak) any assumptions about how TextBuffer handles incoming text under normal circumstances.

{
// This function is supposed to mirror the behavior of ROW::Write, when it reads characters off of `chars`.
// (I know that a UTF-16 code point is not a grapheme, but that's what we're working towards.)
chars = til::utf16_pop(chars);
}

til::CoordType TextBuffer::Write(til::CoordType row, til::CoordType columnBegin, til::CoordType columnLimit, bool wrapAtEOL, const TextAttribute& attributes, std::wstring_view& chars)
{
auto& r = GetRowByOffset(row);

const auto columnEnd = r.Write(columnBegin, columnLimit, chars);
r.ReplaceAttributes(columnBegin, columnEnd, attributes);
r.SetWrapForced(wrapAtEOL && columnEnd >= r.size());

TriggerRedraw(Viewport::FromExclusive({ columnBegin, row, columnEnd, row + 1 }));
return columnEnd;
}

// Routine Description:
// - Writes cells to the output buffer. Writes at the cursor.
// Arguments:
Expand Down
3 changes: 3 additions & 0 deletions src/buffer/out/textBuffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ class TextBuffer final
TextBufferTextIterator GetTextDataAt(const til::point at, const Microsoft::Console::Types::Viewport limit) const;

// Text insertion functions
static void ConsumeGrapheme(std::wstring_view& chars) noexcept;
til::CoordType Write(til::CoordType row, til::CoordType columnBegin, til::CoordType columnLimit, bool wrapAtEOL, const TextAttribute& attributes, std::wstring_view& chars);

OutputCellIterator Write(const OutputCellIterator givenIt);

OutputCellIterator Write(const OutputCellIterator givenIt,
Expand Down
10 changes: 10 additions & 0 deletions src/host/ut_host/ScreenBufferTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5248,6 +5248,16 @@ void ScreenBufferTests::SetAutoWrapMode()
// Content should be clamped to the line width, overwriting the last char.
VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"abf", attributes));
VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition());
// Writing a wide glyph into the last 2 columns and overwriting it with a narrow one.
cursor.SetPosition({ 80 - 3, startLine });
stateMachine.ProcessString(L"a\U0001F604b");
VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"a b", attributes));
VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition());
// Writing a wide glyph into the last column and overwriting it with a narrow one.
cursor.SetPosition({ 80 - 3, startLine });
stateMachine.ProcessString(L"ab\U0001F604c");
VERIFY_IS_TRUE(_ValidateLineContains({ 80 - 3, startLine }, L"abc", attributes));
VERIFY_ARE_EQUAL(til::point(79, startLine), cursor.GetPosition());

Log::Comment(L"When DECAWM is set, output is wrapped again.");
stateMachine.ProcessString(L"\x1b[?7h");
Expand Down
46 changes: 46 additions & 0 deletions src/host/ut_host/TextBufferTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ class TextBufferTests

TEST_METHOD(TestBurrito);
TEST_METHOD(TestOverwriteChars);
TEST_METHOD(TestRowWrite);

TEST_METHOD(TestAppendRTFText);

Expand Down Expand Up @@ -2046,6 +2047,51 @@ void TextBufferTests::TestOverwriteChars()
#undef complex1
}

void TextBufferTests::TestRowWrite()
{
til::size bufferSize{ 10, 3 };
UINT cursorSize = 12;
TextAttribute attr{ 0x7f };
TextBuffer buffer{ bufferSize, attr, cursorSize, false, _renderer };
auto& row = buffer.GetRowByOffset(0);
std::wstring_view str;
til::CoordType pos = 0;

#define complex L"\U0001F41B"

// Not enough space -> early exit
str = complex;
pos = row.Write(2, 2, str);
VERIFY_ARE_EQUAL(2, pos);
VERIFY_ARE_EQUAL(complex, str);
VERIFY_ARE_EQUAL(L" ", row.GetText());

// Writing with the exact right amount of space
str = complex;
pos = row.Write(2, 4, str);
VERIFY_ARE_EQUAL(4, pos);
VERIFY_ARE_EQUAL(L"", str);
VERIFY_ARE_EQUAL(L" " complex L" ", row.GetText());

// Overwrite a wide character, but with not enough space left
str = complex complex;
pos = row.Write(0, 3, str);
// It's not quite clear what Write() should return in that case,
// so this simply asserts on what it happens to return right now.
VERIFY_ARE_EQUAL(4, pos);
VERIFY_ARE_EQUAL(complex, str);
VERIFY_ARE_EQUAL(complex L" ", row.GetText());

// Various text, too much to fit into the row
str = L"a" complex L"b" complex L"c" complex L"foo";
pos = row.Write(1, til::CoordTypeMax, str);
VERIFY_ARE_EQUAL(10, pos);
VERIFY_ARE_EQUAL(L"foo", str);
VERIFY_ARE_EQUAL(L" a" complex L"b" complex L"c" complex, row.GetText());

#undef complex
}

void TextBufferTests::TestAppendRTFText()
{
{
Expand Down
43 changes: 7 additions & 36 deletions src/inc/test/CommonState.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -256,49 +256,20 @@ class CommonState
std::unique_ptr<TextBuffer> m_backupTextBufferInfo;
std::unique_ptr<INPUT_READ_HANDLE_DATA> m_readHandle;

struct TestString
{
std::wstring_view string;
bool wide = false;
};

static void applyTestString(ROW* pRow, const auto& testStrings)
{
uint16_t x = 0;
for (const auto& t : testStrings)
{
if (t.wide)
{
pRow->ReplaceCharacters(x, 2, t.string);
x += 2;
}
else
{
for (const auto& ch : t.string)
{
pRow->ReplaceCharacters(x, 1, { &ch, 1 });
x += 1;
}
}
}
}

void FillRow(ROW* pRow, bool wrapForced)
{
// fill a row
// 9 characters, 6 spaces. 15 total
// か = \x304b
// き = \x304d

static constexpr std::array testStrings{
TestString{ L"AB" },
TestString{ L"\x304b", true },
TestString{ L"C" },
TestString{ L"\x304d", true },
TestString{ L"DE " },
};

applyTestString(pRow, testStrings);
uint16_t column = 0;
for (const auto& ch : std::wstring_view{ L"AB\u304bC\u304dDE " })
{
const uint16_t width = ch >= 0x80 ? 2 : 1;
pRow->ReplaceCharacters(column, width, { &ch, 1 });
column += width;
}
Comment on lines +266 to +272
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I forgot why I made this change... But it's much shorter now, so there's that at least.


// A = bright red on dark gray
// This string starts at index 0
Expand Down
2 changes: 2 additions & 0 deletions src/inc/til/point.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
namespace til // Terminal Implementation Library. Also: "Today I Learned"
{
using CoordType = int32_t;
inline constexpr CoordType CoordTypeMin = INT32_MIN;
inline constexpr CoordType CoordTypeMax = INT32_MAX;

namespace details
{
Expand Down
9 changes: 9 additions & 0 deletions src/inc/til/rle.h
Original file line number Diff line number Diff line change
Expand Up @@ -479,6 +479,15 @@ namespace til // Terminal Implementation Library. Also: "Today I Learned"
_replace_unchecked(start_index, end_index, replacements);
}

// Replace the range [start_index, end_index) with replacements.
// If end_index is larger than size() it's set to size().
// start_index must be smaller or equal to end_index.
void replace(size_type start_index, size_type end_index, const basic_rle& replacements)
{
_check_indices(start_index, end_index);
_replace_unchecked(start_index, end_index, replacements._runs);
}

// Replaces every instance of old_value in this vector with new_value.
void replace_values(const value_type& old_value, const value_type& new_value)
{
Expand Down
24 changes: 24 additions & 0 deletions src/inc/til/unicode.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,30 @@ namespace til
return { ptr, len };
}

// Removes the first code point off of `wstr` and returns the rest.
constexpr std::wstring_view utf16_pop(std::wstring_view wstr) noexcept
{
auto it = wstr.begin();
const auto end = wstr.end();

if (it != end)
{
const auto wch = *it;
++it;

if (is_surrogate(wch))
{
const auto wch2 = it != end ? *it : wchar_t{};
if (is_leading_surrogate(wch) && is_trailing_surrogate(wch2))
{
++it;
}
}
}

return { it, end };
}

// Splits a UTF16 string into codepoints, yielding `wstring_view`s of UTF16 text. Use it as:
// for (const auto& str : til::utf16_iterator{ input }) { ... }
struct utf16_iterator
Expand Down
Loading