Skip to content

Commit 06b3931

Browse files
authored
Unify UTF-8 handling using til::u8u16 & revise WriteConsoleAImpl (#4422)
Replace `utf8Parser` with `til::u8u16` in order to have the same conversion algorithms used in terminal and conhost. This PR addresses item 2 in this list: 1. ✉ Implement `til::u8u16` and `til::u16u8` (done in PR #4093) 2. ✔ **Unify UTF-8 handling using `til::u8u16` (this PR)** 2.1. ✔ **Update VtInputThread::_HandleRunInput()** 2.2. ✔ **Update ApiRoutines::WriteConsoleAImpl()** 2.3. ❌ (optional / ask the core team) Remove Utf8ToWideCharParser from the code base to avoid further use 3. ❌ Enable BOM discarding (follow up) 3.1. ❌ extend `til::u8u16` and `til::u16u8` with a 3rd parameter to enable discarding the BOM 3.2. ❌ Make use of the 3rd parameter to discard the BOM in all current function callers, or (optional / ask the core team) make it the default for `til::u8u16` and `til::u16u8` 4. ❌ Find UTF-16 to UTF-8 conversions and examine if they can be unified, too (follow up) Closes #4086 Closes #3378
1 parent 0d92f71 commit 06b3931

File tree

4 files changed

+86
-148
lines changed

4 files changed

+86
-148
lines changed

src/host/VtInputThread.cpp

Lines changed: 11 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ VtInputThread::VtInputThread(_In_ wil::unique_hfile hPipe,
2828
const bool inheritCursor) :
2929
_hFile{ std::move(hPipe) },
3030
_hThread{},
31-
_utf8Parser{ CP_UTF8 },
31+
_u8State{},
3232
_dwThreadId{ 0 },
3333
_exitRequested{ false },
3434
_exitResult{ S_OK }
@@ -47,15 +47,14 @@ VtInputThread::VtInputThread(_In_ wil::unique_hfile hPipe,
4747
}
4848

4949
// Method Description:
50-
// - Processes a buffer of input characters. The characters should be utf-8
51-
// encoded, and will get converted to wchar_t's to be processed by the
50+
// - Processes a string of input characters. The characters should be UTF-8
51+
// encoded, and will get converted to wstring to be processed by the
5252
// input state machine.
5353
// Arguments:
54-
// - charBuffer - the UTF-8 characters recieved.
55-
// - cch - number of UTF-8 characters in charBuffer
54+
// - u8Str - the UTF-8 string received.
5655
// Return Value:
5756
// - S_OK on success, otherwise an appropriate failure.
58-
[[nodiscard]] HRESULT VtInputThread::_HandleRunInput(_In_reads_(cch) const byte* const charBuffer, const int cch)
57+
[[nodiscard]] HRESULT VtInputThread::_HandleRunInput(const std::string_view u8Str)
5958
{
6059
// Make sure to call the GLOBAL Lock/Unlock, not the gci's lock/unlock.
6160
// Only the global unlock attempts to dispatch ctrl events. If you use the
@@ -67,16 +66,14 @@ VtInputThread::VtInputThread(_In_ wil::unique_hfile hPipe,
6766

6867
try
6968
{
70-
std::unique_ptr<wchar_t[]> pwsSequence;
71-
unsigned int cchConsumed;
72-
unsigned int cchSequence;
73-
auto hr = _utf8Parser.Parse(charBuffer, cch, cchConsumed, pwsSequence, cchSequence);
69+
std::wstring wstr{};
70+
auto hr = til::u8u16(u8Str, wstr, _u8State);
7471
// If we hit a parsing error, eat it. It's bad utf-8, we can't do anything with it.
7572
if (FAILED(hr))
7673
{
7774
return S_FALSE;
7875
}
79-
_pInputStateMachine->ProcessString({ pwsSequence.get(), cchSequence });
76+
_pInputStateMachine->ProcessString(wstr);
8077
}
8178
CATCH_RETURN();
8279

@@ -100,12 +97,12 @@ DWORD WINAPI VtInputThread::StaticVtInputThreadProc(_In_ LPVOID lpParameter)
10097
// failed, throw or log, depending on what the caller wants.
10198
// Arguments:
10299
// - throwOnFail: If true, throw an exception if there was an error processing
103-
// the input recieved. Otherwise, log the error.
100+
// the input received. Otherwise, log the error.
104101
// Return Value:
105102
// - <none>
106103
void VtInputThread::DoReadInput(const bool throwOnFail)
107104
{
108-
byte buffer[256];
105+
char buffer[256];
109106
DWORD dwRead = 0;
110107
bool fSuccess = !!ReadFile(_hFile.get(), buffer, ARRAYSIZE(buffer), &dwRead, nullptr);
111108

@@ -120,7 +117,7 @@ void VtInputThread::DoReadInput(const bool throwOnFail)
120117
return;
121118
}
122119

123-
HRESULT hr = _HandleRunInput(buffer, dwRead);
120+
HRESULT hr = _HandleRunInput({ buffer, gsl::narrow_cast<size_t>(dwRead) });
124121
if (FAILED(hr))
125122
{
126123
if (throwOnFail)

src/host/VtInputThread.hpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ Author(s):
1515
#pragma once
1616

1717
#include "..\terminal\parser\StateMachine.hpp"
18-
#include "utf8ToWideCharParser.hpp"
1918

2019
namespace Microsoft::Console
2120
{
@@ -29,7 +28,7 @@ namespace Microsoft::Console
2928
void DoReadInput(const bool throwOnFail);
3029

3130
private:
32-
[[nodiscard]] HRESULT _HandleRunInput(_In_reads_(cch) const byte* const charBuffer, const int cch);
31+
[[nodiscard]] HRESULT _HandleRunInput(const std::string_view u8Str);
3332
DWORD _InputThread();
3433

3534
wil::unique_hfile _hFile;
@@ -40,6 +39,6 @@ namespace Microsoft::Console
4039
HRESULT _exitResult;
4140

4241
std::unique_ptr<Microsoft::Console::VirtualTerminal::StateMachine> _pInputStateMachine;
43-
Utf8ToWideCharParser _utf8Parser;
42+
til::u8state _u8State;
4443
};
4544
}

0 commit comments

Comments
 (0)