diff --git a/stl/inc/filesystem b/stl/inc/filesystem index 8b93dd566d4..a2f4bd1dd09 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -102,6 +102,32 @@ namespace filesystem { return _Output; } + // More lenient version of _Convert_wide_to_narrow: Instead of failing on non-representable characters, + // replace them with a replacement character. + template + _NODISCARD basic_string _Convert_wide_to_narrow_replace_chars( + const __std_code_page _Code_page, const wstring_view _Input, const _Alloc& _Al) { + basic_string _Output(_Al); + + if (!_Input.empty()) { + if (_Input.size() > static_cast(INT_MAX)) { + _Throw_system_error(errc::invalid_argument); + } + + const int _Len = _Check_convert_result(__std_fs_convert_wide_to_narrow_replace_chars( + _Code_page, _Input.data(), static_cast(_Input.size()), nullptr, 0)); + + _Output.resize(static_cast(_Len)); + + const auto _Data_as_char = reinterpret_cast(_Output.data()); + + (void) _Check_convert_result(__std_fs_convert_wide_to_narrow_replace_chars( + _Code_page, _Input.data(), static_cast(_Input.size()), _Data_as_char, _Len)); + } + + return _Output; + } + _NODISCARD inline wstring _Convert_utf32_to_wide(const u32string_view _Input) { wstring _Output; @@ -1786,8 +1812,12 @@ namespace filesystem { static string _Pretty_message(const string_view _Op, const path& _Path1, const path& _Path2 = {}) { using namespace _STD string_view_literals; // TRANSITION, VSO-571749 string _Result; - const string _Path1_str = _Path1.string(); - const string _Path2_str = _Path2.string(); + // Convert the paths to narrow encoding in a way that gracefully handles non-encodable characters + const auto _Code_page = __std_fs_code_page(); + const string _Path1_str = _Convert_wide_to_narrow_replace_chars>( + _Code_page, _Path1.native(), allocator{}); + const string _Path2_str = _Convert_wide_to_narrow_replace_chars>( + _Code_page, _Path2.native(), allocator{}); _Result.reserve(_Op.size() + (_Path2_str.empty() ? 4 : 8) + _Path1_str.size() + _Path2_str.size()); _Result += _Op; _Result += R"(: ")"sv; // 3 chars diff --git a/stl/inc/xfilesystem_abi.h b/stl/inc/xfilesystem_abi.h index 0d3ce7035a9..2e3fcddde59 100644 --- a/stl/inc/xfilesystem_abi.h +++ b/stl/inc/xfilesystem_abi.h @@ -276,6 +276,10 @@ _NODISCARD __std_fs_convert_result __stdcall __std_fs_convert_wide_to_narrow(_In _In_reads_(_Input_len) const wchar_t* _Input_str, _In_ int _Input_len, _Out_writes_opt_(_Output_len) char* _Output_str, _In_ int _Output_len) noexcept; +_NODISCARD __std_fs_convert_result __stdcall __std_fs_convert_wide_to_narrow_replace_chars( + _In_ __std_code_page _Code_page, _In_reads_(_Input_len) const wchar_t* _Input_str, _In_ int _Input_len, + _Out_writes_opt_(_Output_len) char* _Output_str, _In_ int _Output_len) noexcept; + _NODISCARD __std_win_error __stdcall __std_fs_get_file_id( _Out_ __std_fs_file_id* _Id, _In_z_ const wchar_t* _Path) noexcept; diff --git a/stl/src/filesystem.cpp b/stl/src/filesystem.cpp index ca312314f04..0a1d3d5a1e8 100644 --- a/stl/src/filesystem.cpp +++ b/stl/src/filesystem.cpp @@ -421,6 +421,27 @@ void __stdcall __std_fs_directory_iterator_close(const __std_fs_dir_handle _Hand return _Result; } +[[nodiscard]] __std_fs_convert_result __stdcall __std_fs_convert_wide_to_narrow_replace_chars( + const __std_code_page _Code_page, const wchar_t* const _Input_str, const int _Input_len, char* const _Output_str, + const int _Output_len) noexcept { + __std_fs_convert_result _Result; + + _Result._Len = WideCharToMultiByte(static_cast(_Code_page), WC_NO_BEST_FIT_CHARS, _Input_str, + _Input_len, _Output_str, _Output_len, nullptr, nullptr); + + _Result._Err = _Result._Len == 0 ? __std_win_error{GetLastError()} : __std_win_error::_Success; + + // Some codepages don't support WC_NO_BEST_FIT_CHARS, fall back to default conversion. + if (_Result._Err == __std_win_error{ERROR_INVALID_FLAGS}) { + _Result._Len = WideCharToMultiByte(static_cast(_Code_page), 0, _Input_str, _Input_len, + _Output_str, _Output_len, nullptr, nullptr); + + _Result._Err = _Result._Len == 0 ? __std_win_error{GetLastError()} : __std_win_error::_Success; + } + + return _Result; +} + [[nodiscard]] __std_fs_copy_file_result __stdcall __std_fs_copy_file(const wchar_t* const _Source, const wchar_t* const _Target, __std_fs_copy_options _Options) noexcept { // copy _Source to _Target _Options &= __std_fs_copy_options::_Existing_mask; diff --git a/tests/std/test.lst b/tests/std/test.lst index 6358d0afa94..4f9f2c4bc34 100644 --- a/tests/std/test.lst +++ b/tests/std/test.lst @@ -160,6 +160,7 @@ tests\GH_000545_include_compare tests\GH_000685_condition_variable_any tests\GH_000690_overaligned_function tests\GH_000890_pow_template +tests\GH_001010_filesystem_error_encoding tests\LWG2597_complex_branch_cut tests\LWG3018_shared_ptr_function tests\P0024R2_parallel_algorithms_adjacent_difference diff --git a/tests/std/tests/GH_001010_filesystem_error_encoding/env.lst b/tests/std/tests/GH_001010_filesystem_error_encoding/env.lst new file mode 100644 index 00000000000..2de7aab2959 --- /dev/null +++ b/tests/std/tests/GH_001010_filesystem_error_encoding/env.lst @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +RUNALL_INCLUDE ..\usual_17_matrix.lst diff --git a/tests/std/tests/GH_001010_filesystem_error_encoding/test.cpp b/tests/std/tests/GH_001010_filesystem_error_encoding/test.cpp new file mode 100644 index 00000000000..a823c2599a2 --- /dev/null +++ b/tests/std/tests/GH_001010_filesystem_error_encoding/test.cpp @@ -0,0 +1,33 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include + +using namespace std; +namespace fs = std::filesystem; + +void test_filesystem_error_with_bad_codepage_characters() { + fs::path problem_path{L"problematic\xD83Dtestpath"}; // path containing lone high surrogate + + // Make sure the path is not encodable in our current locale, otherwise the test tests nothing + bool exception_caught = false; + try { + (void) problem_path.string(); + } catch (const exception&) { + exception_caught = true; + } + assert(exception_caught); + + // filesystem_error should handle the non-encodable character gracefully when building its message + fs::filesystem_error err{"testexception", problem_path, error_code{}}; + assert(string_view{err.what()}.find("problematic") != string_view::npos); + assert(string_view{err.what()}.find("testpath") != string_view::npos); +} + +int main() { + test_filesystem_error_with_bad_codepage_characters(); +}