Skip to content

Commit

Permalink
Vectorize TextColor::GetColor (#10779)
Browse files Browse the repository at this point in the history
I was watching a video about vectorized instructions and I wanted to
try out some new things, as I had never written AVX code before.
This commit is the result of this tiny Thursday morning detour into
AVX land. It improves performance of `TextColor::GetColor` by about 3x.

## Validation Steps Performed

* Default colors are still properly shifted +8 ✔️
  • Loading branch information
lhecker authored Aug 2, 2021
1 parent 34a6b19 commit fc64ff3
Show file tree
Hide file tree
Showing 13 changed files with 154 additions and 129 deletions.
6 changes: 6 additions & 0 deletions .github/actions/spelling/expect/expect.txt
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,7 @@ autoscrolling
Autowrap
AVerify
AVI
AVX
awch
azuredevopspodcast
azzle
Expand Down Expand Up @@ -190,6 +191,7 @@ CARETBLINKINGENABLED
CARRIAGERETURN
cascadia
cassert
castsi
catid
cazamor
CBash
Expand Down Expand Up @@ -270,6 +272,7 @@ Cmdlet
cmdline
CMOUSEBUTTONS
cmp
cmpeq
cmt
cmyk
CNL
Expand Down Expand Up @@ -1261,6 +1264,7 @@ lnkd
lnkfile
LNM
LOADONCALL
loadu
LOBYTE
localappdata
localhost
Expand Down Expand Up @@ -1421,6 +1425,7 @@ MOUSEFIRST
MOUSEHWHEEL
MOUSEMOVE
mousewheel
movemask
MOVESTART
msb
msbuild
Expand Down Expand Up @@ -2531,6 +2536,7 @@ vcvarsall
vcxitems
vcxproj
vec
vectorized
VERCTRL
versioning
VERTBAR
Expand Down
2 changes: 1 addition & 1 deletion src/buffer/out/TextAttribute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ bool TextAttribute::IsLegacy() const noexcept
// - blinkingIsFaint: true if blinking should be interpreted as faint.
// Return Value:
// - the foreground and background colors that should be displayed.
std::pair<COLORREF, COLORREF> TextAttribute::CalculateRgbColors(const gsl::span<const COLORREF> colorTable,
std::pair<COLORREF, COLORREF> TextAttribute::CalculateRgbColors(const std::array<COLORREF, 256>& colorTable,
const COLORREF defaultFgColor,
const COLORREF defaultBgColor,
const bool reverseScreenMode,
Expand Down
2 changes: 1 addition & 1 deletion src/buffer/out/TextAttribute.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class TextAttribute final
static TextAttribute StripErroneousVT16VersionsOfLegacyDefaults(const TextAttribute& attribute) noexcept;
WORD GetLegacyAttributes() const noexcept;

std::pair<COLORREF, COLORREF> CalculateRgbColors(const gsl::span<const COLORREF> colorTable,
std::pair<COLORREF, COLORREF> CalculateRgbColors(const std::array<COLORREF, 256>& colorTable,
const COLORREF defaultFgColor,
const COLORREF defaultBgColor,
const bool reverseScreenMode = false,
Expand Down
66 changes: 60 additions & 6 deletions src/buffer/out/TextColor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ constexpr std::array<BYTE, 256> Index256ToIndex16 = {

// clang-format on

// We should only need 4B for TextColor. Any more than that is just waste.
static_assert(sizeof(TextColor) == 4);

bool TextColor::CanBeBrightened() const noexcept
{
return IsIndex16() || IsDefault();
Expand Down Expand Up @@ -138,15 +141,12 @@ void TextColor::SetDefault() noexcept
// - brighten: if true, we'll brighten a dark color table index.
// Return Value:
// - a COLORREF containing the real value of this TextColor.
COLORREF TextColor::GetColor(gsl::span<const COLORREF> colorTable,
const COLORREF defaultColor,
bool brighten) const noexcept
COLORREF TextColor::GetColor(const std::array<COLORREF, 256>& colorTable, const COLORREF defaultColor, bool brighten) const noexcept
{
if (IsDefault())
{
if (brighten)
{
FAIL_FAST_IF(colorTable.size() < 16);
// See MSFT:20266024 for context on this fix.
// Additionally todo MSFT:20271956 to fix this better for 19H2+
// If we're a default color, check to see if the defaultColor exists
Expand All @@ -156,13 +156,67 @@ COLORREF TextColor::GetColor(gsl::span<const COLORREF> colorTable,
// (Settings::_DefaultForeground==INVALID_COLOR, and the index
// from _wFillAttribute is being used instead.)
// If we find a match, return instead the bright version of this color

static_assert(sizeof(COLORREF) * 8 == 32, "The vectorized code broke. If you can't fix COLORREF, just remove the vectorized code.");

#pragma warning(push)
#pragma warning(disable : 26481) // Don't use pointer arithmetic. Use span instead (bounds.1).
#pragma warning(disable : 26490) // Don't use reinterpret_cast (type.1).
#ifdef __AVX2__
// I wrote this vectorized code one day, because the sun was shining so nicely.
// There's no other reason for this to exist here, except for being pretty.
// This code implements the exact same for loop you can find below, but is ~3x faster.
//
// A brief explanation for people unfamiliar with vectorized instructions:
// Vectorized instructions, like "SSE" or "AVX", allow you to run
// common operations like additions, multiplications, comparisons,
// or bitwise operations concurrently on multiple values at once.
//
// We want to find the given defaultColor in the first 8 values of colorTable.
// Coincidentally a COLORREF is a DWORD and 8 of them are exactly 256 bits.
// -- The size of a single AVX register.
//
// Thus, the code works like this:
// 1. Load all 8 DWORDs at once into one register
// 2. Set the same defaultColor 8 times in another register
// 3. Compare all 8 values at once
// The result is either 0xffffffff or 0x00000000.
// 4. Extract the most significant bit of each DWORD
// Assuming that no duplicate colors exist in colorTable,
// the result will be something like 0b00100000.
// 5. Use BitScanForward (bsf) to find the index of the most significant 1 bit.
const auto haystack = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(colorTable.data())); // 1.
const auto needle = _mm256_set1_epi32(__builtin_bit_cast(int, defaultColor)); // 2.
const auto result = _mm256_cmpeq_epi32(haystack, needle); // 3.
const auto mask = _mm256_movemask_ps(_mm256_castsi256_ps(result)); // 4.
unsigned long index;
return _BitScanForward(&index, mask) ? til::at(colorTable, static_cast<size_t>(index) + 8) : defaultColor; // 5.
#elif _M_AMD64
// If you look closely this SSE2 algorithm is the exact same as the AVX one.
// The two differences are that we need to:
// * do everything twice, because SSE is limited to 128 bits and not 256.
// * use _mm_packs_epi32 to merge two 128 bits vectors into one in step 3.5.
// _mm_packs_epi32 takes two SSE registers and truncates all 8 DWORDs into 8 WORDs,
// the latter of which fits into a single register (which is then used in the identical step 4).
const auto haystack1 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(colorTable.data() + 0));
const auto haystack2 = _mm_loadu_si128(reinterpret_cast<const __m128i*>(colorTable.data() + 4));
const auto needle = _mm_set1_epi32(__builtin_bit_cast(int, defaultColor));
const auto result1 = _mm_cmpeq_epi32(haystack1, needle);
const auto result2 = _mm_cmpeq_epi32(haystack2, needle);
const auto result = _mm_packs_epi32(result1, result2); // 3.5
const auto mask = _mm_movemask_ps(_mm_castsi128_ps(result));
unsigned long index;
return _BitScanForward(&index, mask) ? til::at(colorTable, static_cast<size_t>(index) + 8) : defaultColor;
#else
for (size_t i = 0; i < 8; i++)
{
if (til::at(colorTable, i) == defaultColor)
{
return til::at(colorTable, i + 8);
}
}
#endif
#pragma warning(pop)
}

return defaultColor;
Expand Down Expand Up @@ -199,7 +253,7 @@ BYTE TextColor::GetLegacyIndex(const BYTE defaultIndex) const noexcept
}
else if (IsIndex256())
{
return Index256ToIndex16.at(GetIndex());
return til::at(Index256ToIndex16, GetIndex());
}
else
{
Expand All @@ -208,7 +262,7 @@ BYTE TextColor::GetLegacyIndex(const BYTE defaultIndex) const noexcept
const BYTE compressedRgb = (_red & 0b11100000) +
((_green >> 3) & 0b00011100) +
((_blue >> 6) & 0b00000011);
return CompressedRgbToIndex16.at(compressedRgb);
return til::at(CompressedRgbToIndex16, compressedRgb);
}
}

Expand Down
7 changes: 1 addition & 6 deletions src/buffer/out/TextColor.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,7 @@ struct TextColor
void SetIndex(const BYTE index, const bool isIndex256) noexcept;
void SetDefault() noexcept;

COLORREF GetColor(gsl::span<const COLORREF> colorTable,
const COLORREF defaultColor,
const bool brighten = false) const noexcept;

COLORREF GetColor(const std::array<COLORREF, 256>& colorTable, const COLORREF defaultColor, bool brighten = false) const noexcept;
BYTE GetLegacyIndex(const BYTE defaultIndex) const noexcept;

constexpr BYTE GetIndex() const noexcept
Expand Down Expand Up @@ -157,5 +154,3 @@ namespace WEX
}
}
#endif

static_assert(sizeof(TextColor) <= 4 * sizeof(BYTE), "We should only need 4B for an entire TextColor. Any more than that is just waste");
2 changes: 1 addition & 1 deletion src/buffer/out/lib/bufferout.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
<ClInclude Include="..\Row.hpp" />
<ClInclude Include="..\search.h" />
<ClInclude Include="..\TextColor.h" />
<ClInclude Include="..\TextAttribute.h" />
<ClInclude Include="..\TextAttribute.hpp" />
<ClInclude Include="..\textBuffer.hpp" />
<ClInclude Include="..\textBufferCellIterator.hpp" />
<ClInclude Include="..\textBufferTextIterator.hpp" />
Expand Down
71 changes: 31 additions & 40 deletions src/buffer/out/ut_textbuffer/TextAttributeTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,9 @@ class TextAttributeTests
TEST_METHOD(TestReverseDefaultColors);
TEST_METHOD(TestRoundtripDefaultColors);

static const int COLOR_TABLE_SIZE = 16;
COLORREF _colorTable[COLOR_TABLE_SIZE];
std::array<COLORREF, 256> _colorTable;
COLORREF _defaultFg = RGB(1, 2, 3);
COLORREF _defaultBg = RGB(4, 5, 6);
gsl::span<const COLORREF> _GetTableView();
};

bool TextAttributeTests::ClassSetup()
Expand All @@ -51,11 +49,6 @@ bool TextAttributeTests::ClassSetup()
return true;
}

gsl::span<const COLORREF> TextAttributeTests::_GetTableView()
{
return gsl::span<const COLORREF>(&_colorTable[0], COLOR_TABLE_SIZE);
}

void TextAttributeTests::TestRoundtripLegacy()
{
WORD expectedLegacy = FOREGROUND_BLUE | BACKGROUND_RED;
Expand Down Expand Up @@ -133,23 +126,22 @@ void TextAttributeTests::TestTextAttributeColorGetters()
const COLORREF faintRed = RGB(127, 0, 0);
const COLORREF green = RGB(0, 255, 0);
TextAttribute attr(red, green);
auto view = _GetTableView();

// verify that calculated foreground/background are the same as the direct
// values when reverse video is not set
VERIFY_IS_FALSE(attr.IsReverseVideo());

VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(view, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(view, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(red, green), attr.CalculateRgbColors(view, _defaultFg, _defaultBg));
VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(_colorTable, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(_colorTable, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(red, green), attr.CalculateRgbColors(_colorTable, _defaultFg, _defaultBg));

// with reverse video set, calculated foreground/background values should be
// switched while getters stay the same
attr.SetReverseVideo(true);

VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(view, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(view, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(green, red), attr.CalculateRgbColors(view, _defaultFg, _defaultBg));
VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(_colorTable, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(_colorTable, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(green, red), attr.CalculateRgbColors(_colorTable, _defaultFg, _defaultBg));

// reset the reverse video
attr.SetReverseVideo(false);
Expand All @@ -158,17 +150,17 @@ void TextAttributeTests::TestTextAttributeColorGetters()
// while the background and getters stay the same
attr.SetFaint(true);

VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(view, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(view, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(faintRed, green), attr.CalculateRgbColors(view, _defaultFg, _defaultBg));
VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(_colorTable, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(_colorTable, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(faintRed, green), attr.CalculateRgbColors(_colorTable, _defaultFg, _defaultBg));

// with reverse video set, calculated foreground/background values should be
// switched, and the background fainter, while getters stay the same
attr.SetReverseVideo(true);

VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(view, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(view, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(green, faintRed), attr.CalculateRgbColors(view, _defaultFg, _defaultBg));
VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(_colorTable, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(_colorTable, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(green, faintRed), attr.CalculateRgbColors(_colorTable, _defaultFg, _defaultBg));

// reset the reverse video and faint attributes
attr.SetReverseVideo(false);
Expand All @@ -178,58 +170,57 @@ void TextAttributeTests::TestTextAttributeColorGetters()
// background, while getters stay the same
attr.SetInvisible(true);

VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(view, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(view, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(green, green), attr.CalculateRgbColors(view, _defaultFg, _defaultBg));
VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(_colorTable, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(_colorTable, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(green, green), attr.CalculateRgbColors(_colorTable, _defaultFg, _defaultBg));

// with reverse video set, the calculated background value should match
// the foreground, while getters stay the same
attr.SetReverseVideo(true);

VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(view, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(view, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(red, red), attr.CalculateRgbColors(view, _defaultFg, _defaultBg));
VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(_colorTable, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(_colorTable, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(red, red), attr.CalculateRgbColors(_colorTable, _defaultFg, _defaultBg));
}

void TextAttributeTests::TestReverseDefaultColors()
{
const COLORREF red = RGB(255, 0, 0);
const COLORREF green = RGB(0, 255, 0);
TextAttribute attr{};
auto view = _GetTableView();

// verify that calculated foreground/background are the same as the direct
// values when reverse video is not set
VERIFY_IS_FALSE(attr.IsReverseVideo());

VERIFY_ARE_EQUAL(_defaultFg, attr.GetForeground().GetColor(view, _defaultFg));
VERIFY_ARE_EQUAL(_defaultBg, attr.GetBackground().GetColor(view, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(_defaultFg, _defaultBg), attr.CalculateRgbColors(view, _defaultFg, _defaultBg));
VERIFY_ARE_EQUAL(_defaultFg, attr.GetForeground().GetColor(_colorTable, _defaultFg));
VERIFY_ARE_EQUAL(_defaultBg, attr.GetBackground().GetColor(_colorTable, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(_defaultFg, _defaultBg), attr.CalculateRgbColors(_colorTable, _defaultFg, _defaultBg));

// with reverse video set, calculated foreground/background values should be
// switched while getters stay the same
attr.SetReverseVideo(true);
VERIFY_IS_TRUE(attr.IsReverseVideo());

VERIFY_ARE_EQUAL(_defaultFg, attr.GetForeground().GetColor(view, _defaultFg));
VERIFY_ARE_EQUAL(_defaultBg, attr.GetBackground().GetColor(view, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(_defaultBg, _defaultFg), attr.CalculateRgbColors(view, _defaultFg, _defaultBg));
VERIFY_ARE_EQUAL(_defaultFg, attr.GetForeground().GetColor(_colorTable, _defaultFg));
VERIFY_ARE_EQUAL(_defaultBg, attr.GetBackground().GetColor(_colorTable, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(_defaultBg, _defaultFg), attr.CalculateRgbColors(_colorTable, _defaultFg, _defaultBg));

attr.SetForeground(red);
VERIFY_IS_TRUE(attr.IsReverseVideo());

VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(view, _defaultFg));
VERIFY_ARE_EQUAL(_defaultBg, attr.GetBackground().GetColor(view, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(_defaultBg, red), attr.CalculateRgbColors(view, _defaultFg, _defaultBg));
VERIFY_ARE_EQUAL(red, attr.GetForeground().GetColor(_colorTable, _defaultFg));
VERIFY_ARE_EQUAL(_defaultBg, attr.GetBackground().GetColor(_colorTable, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(_defaultBg, red), attr.CalculateRgbColors(_colorTable, _defaultFg, _defaultBg));

attr.Invert();
VERIFY_IS_FALSE(attr.IsReverseVideo());
attr.SetDefaultForeground();
attr.SetBackground(green);

VERIFY_ARE_EQUAL(_defaultFg, attr.GetForeground().GetColor(view, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(view, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(_defaultFg, green), attr.CalculateRgbColors(view, _defaultFg, _defaultBg));
VERIFY_ARE_EQUAL(_defaultFg, attr.GetForeground().GetColor(_colorTable, _defaultFg));
VERIFY_ARE_EQUAL(green, attr.GetBackground().GetColor(_colorTable, _defaultBg));
VERIFY_ARE_EQUAL(std::make_pair(_defaultFg, green), attr.CalculateRgbColors(_colorTable, _defaultFg, _defaultBg));
}

void TextAttributeTests::TestRoundtripDefaultColors()
Expand Down
Loading

0 comments on commit fc64ff3

Please sign in to comment.