diff --git a/CMakeLists.txt b/CMakeLists.txt index 5ec7d70b8f..92d6669741 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,19 +6,19 @@ cmake_minimum_required(VERSION 3.31.0) set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY) project(msvc_standard_libraries LANGUAGES CXX) -if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "19.44.35207.1") - message(FATAL_ERROR "The STL must be built with VS 2022 17.14 Preview 7 or later.") +if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "19.44.35209") + message(FATAL_ERROR "The STL must be built with VS 2022 17.14.5 Preview 1 or later.") endif() include(CheckCXXSourceCompiles) check_cxx_source_compiles([=[ #include -static_assert(WDK_NTDDI_VERSION >= NTDDI_WIN10_NI, "Inspecting WDK_NTDDI_VERSION, the Windows SDK version."); +static_assert(WDK_NTDDI_VERSION >= NTDDI_WIN11_GE, "Inspecting WDK_NTDDI_VERSION, the Windows SDK version."); int main() {} ]=] WINDOWS_SDK_VERSION_CHECK) if(NOT WINDOWS_SDK_VERSION_CHECK) - message(FATAL_ERROR "The STL must be built with the Windows 11 SDK (10.0.22621.0) or later. Make sure it's available by selecting it in the Individual Components tab of the VS Installer.") + message(FATAL_ERROR "The STL must be built with the Windows 11 SDK (10.0.26100.3916) or later. Make sure it's available by selecting it in the Individual Components tab of the VS Installer.") endif() if(NOT DEFINED VCLIBS_TARGET_ARCHITECTURE) @@ -96,21 +96,22 @@ set(TOOLSET_LIB "${TOOLSET_ROOT_DIR}/lib/${VCLIBS_X86_OR_X64}") set(STL_ARCHIVE_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/out/lib/${VCLIBS_I386_OR_AMD64}") set(STL_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/out/lib/${VCLIBS_I386_OR_AMD64}") set(STL_RUNTIME_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/out/bin/${VCLIBS_I386_OR_AMD64}") + +# TRANSITION, update internal crt-common.settings.targets, atlmfc.settings.targets, and fe-components.settings.targets +# from NTDDI_WIN10_NI to NTDDI_WIN11_GE when the internal WinSDK is updated. add_compile_definitions( _ALLOW_ITERATOR_DEBUG_LEVEL_MISMATCH WIN32_LEAN_AND_MEAN STRICT _CRT_STDIO_ARBITRARY_WIDE_SPECIFIERS - _WIN32_WINNT=0x0A00 NTDDI_VERSION=NTDDI_WIN10_NI) + _WIN32_WINNT=0x0A00 NTDDI_VERSION=NTDDI_WIN11_GE) if(STL_USE_ANALYZE) - # TRANSITION OS-40109504: Windows SDK: incorrect SAL annotations on functions the STL uses - # warning C6553: The annotation for function 'LCMapStringEx' on _Param_(9) - # does not apply to a value type. - # There's a bug in the declaration for LCMapStringEx - it applies _In_opt_ to an LPARAM. - # LPARAM is a LONG_PTR (intptr_t), and it's invalid to apply _In_opt_ to a non-pointer. - # As of the Windows 11 SDK (10.0.22621.0), there are 5 total occurrences of warning C6553 affecting the STL's build. + # TRANSITION, Windows SDK 10.0.26100.3916 emits + # "warning C6553: The annotation for function 'LCMapStringEx' on _Param_(9) does not apply to a value type." + # Reported as OS-40109504 "Windows SDK: incorrect SAL annotations on functions the STL uses". add_compile_options("$<$:/analyze:autolog-;/wd6553>") if(VCLIBS_TARGET_ARCHITECTURE STREQUAL "arm64ec") - # TRANSITION, the Windows SDK emits "warning C28301: No annotations for first declaration of 'meow'" + # TRANSITION, Windows SDK 10.0.26100.3916 emits + # "warning C28301: No annotations for first declaration of 'meow'" # for various intrinsics when building for ARM64EC. add_compile_options("$<$:/wd28301>") endif() diff --git a/README.md b/README.md index 8b08535859..922d91a706 100644 --- a/README.md +++ b/README.md @@ -141,8 +141,8 @@ Just try to follow these rules, so we can spend more time fixing bugs and implem # How To Build With The Visual Studio IDE -1. Install Visual Studio 2022 17.14 Preview 7 or later. - * Select "Windows 11 SDK (10.0.22621.0)" in the VS Installer. +1. Install Visual Studio 2022 17.14.5 Preview 1 or later. + * Select "Windows 11 SDK (10.0.26100.3916)" in the VS Installer. * Select "MSVC v143 - VS 2022 C++ ARM64/ARM64EC build tools (Latest)" in the VS Installer if you would like to build the ARM64/ARM64EC target. * Select "MSVC v143 - VS 2022 C++ ARM build tools (Latest)" in the VS Installer @@ -160,8 +160,8 @@ Just try to follow these rules, so we can spend more time fixing bugs and implem # How To Build With A Native Tools Command Prompt -1. Install Visual Studio 2022 17.14 Preview 7 or later. - * Select "Windows 11 SDK (10.0.22621.0)" in the VS Installer. +1. Install Visual Studio 2022 17.14.5 Preview 1 or later. + * Select "Windows 11 SDK (10.0.26100.3916)" in the VS Installer. * Select "MSVC v143 - VS 2022 C++ ARM64/ARM64EC build tools (Latest)" in the VS Installer if you would like to build the ARM64/ARM64EC target. * Select "MSVC v143 - VS 2022 C++ ARM build tools (Latest)" in the VS Installer diff --git a/azure-devops/build-and-test.yml b/azure-devops/build-and-test.yml index a90c7c2f38..11cf23afa9 100644 --- a/azure-devops/build-and-test.yml +++ b/azure-devops/build-and-test.yml @@ -58,8 +58,21 @@ jobs: targetPlatform: ${{ parameters.targetPlatform }} analyzeBuild: ${{ parameters.analyzeBuild }} asanBuild: ${{ parameters.asanBuild }} - buildBenchmarks: ${{ parameters.buildBenchmarks }} testsBuildOnly: ${{ parameters.testsBuildOnly }} + - template: build-benchmarks.yml + parameters: + hostArch: ${{ parameters.hostArch }} + targetArch: ${{ parameters.targetArch }} + targetPlatform: ${{ parameters.targetPlatform }} + buildBenchmarks: ${{ parameters.buildBenchmarks }} + compiler: cl + - template: build-benchmarks.yml + parameters: + hostArch: ${{ parameters.hostArch }} + targetArch: ${{ parameters.targetArch }} + targetPlatform: ${{ parameters.targetPlatform }} + buildBenchmarks: ${{ parameters.buildBenchmarks }} + compiler: clang-cl - template: run-tests.yml parameters: hostArch: ${{ parameters.hostArch }} diff --git a/azure-devops/build-benchmarks.yml b/azure-devops/build-benchmarks.yml new file mode 100644 index 0000000000..9aef40e89a --- /dev/null +++ b/azure-devops/build-benchmarks.yml @@ -0,0 +1,46 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +parameters: +- name: hostArch + type: string +- name: targetArch + type: string +- name: targetPlatform + type: string +- name: buildBenchmarks + type: boolean +- name: compiler + type: string + +steps: +- script: | + if exist "$(benchmarkBuildOutputLocation)\${{ parameters.compiler }}" ( + rmdir /S /Q "$(benchmarkBuildOutputLocation)\${{ parameters.compiler }}" + ) + call "%ProgramFiles%\Microsoft Visual Studio\2022\Preview\Common7\Tools\VsDevCmd.bat" ^ + -host_arch=${{ parameters.hostArch }} -arch=${{ parameters.targetArch }} -no_logo + cmake -G Ninja ^ + -DCMAKE_CXX_COMPILER=${{ parameters.compiler }} ^ + -DCMAKE_BUILD_TYPE=Release ^ + -DSTL_BINARY_DIR="$(buildOutputLocation)" ^ + -DVCLIBS_TARGET_ARCHITECTURE=${{ parameters.targetPlatform }} ^ + -S $(Build.SourcesDirectory)/benchmarks -B "$(benchmarkBuildOutputLocation)\${{ parameters.compiler }}" + displayName: 'Configure the benchmarks for ${{ parameters.compiler }}' + timeoutInMinutes: 2 + env: { TMP: $(tmpDir), TEMP: $(tmpDir) } + # TRANSITION, we currently don't build the benchmarks with Clang for ARM64 or ARM64EC + condition: > + and(succeeded(), ${{ parameters.buildBenchmarks }}, + not(and(eq('${{ parameters.compiler }}', 'clang-cl'), startsWith('${{ parameters.targetPlatform }}', 'arm64')))) +- script: | + call "%ProgramFiles%\Microsoft Visual Studio\2022\Preview\Common7\Tools\VsDevCmd.bat" ^ + -host_arch=${{ parameters.hostArch }} -arch=${{ parameters.targetArch }} -no_logo + cmake --build "$(benchmarkBuildOutputLocation)\${{ parameters.compiler }}" + displayName: 'Build the benchmarks for ${{ parameters.compiler }}' + timeoutInMinutes: 2 + env: { TMP: $(tmpDir), TEMP: $(tmpDir) } + # TRANSITION, we currently don't build the benchmarks with Clang for ARM64 or ARM64EC + condition: > + and(succeeded(), ${{ parameters.buildBenchmarks }}, + not(and(eq('${{ parameters.compiler }}', 'clang-cl'), startsWith('${{ parameters.targetPlatform }}', 'arm64')))) diff --git a/azure-devops/cmake-configure-build.yml b/azure-devops/cmake-configure-build.yml index 39d2716984..c8db703e61 100644 --- a/azure-devops/cmake-configure-build.yml +++ b/azure-devops/cmake-configure-build.yml @@ -12,8 +12,6 @@ parameters: type: boolean - name: asanBuild type: boolean -- name: buildBenchmarks - type: boolean - name: testsBuildOnly type: boolean - name: litFlags @@ -50,27 +48,3 @@ steps: displayName: 'Build the STL' timeoutInMinutes: 5 env: { TMP: $(tmpDir), TEMP: $(tmpDir) } -- script: | - if exist "$(benchmarkBuildOutputLocation)" ( - rmdir /S /Q "$(benchmarkBuildOutputLocation)" - ) - call "%ProgramFiles%\Microsoft Visual Studio\2022\Preview\Common7\Tools\VsDevCmd.bat" ^ - -host_arch=${{ parameters.hostArch }} -arch=${{ parameters.targetArch }} -no_logo - cmake -G Ninja ^ - -DCMAKE_CXX_COMPILER=cl ^ - -DCMAKE_BUILD_TYPE=Release ^ - -DSTL_BINARY_DIR="$(buildOutputLocation)" ^ - -DVCLIBS_TARGET_ARCHITECTURE=${{ parameters.targetPlatform }} ^ - -S $(Build.SourcesDirectory)/benchmarks -B "$(benchmarkBuildOutputLocation)" - displayName: 'Configure the benchmarks' - timeoutInMinutes: 2 - env: { TMP: $(tmpDir), TEMP: $(tmpDir) } - condition: and(succeeded(), ${{ parameters.buildBenchmarks }}) -- script: | - call "%ProgramFiles%\Microsoft Visual Studio\2022\Preview\Common7\Tools\VsDevCmd.bat" ^ - -host_arch=${{ parameters.hostArch }} -arch=${{ parameters.targetArch }} -no_logo - cmake --build "$(benchmarkBuildOutputLocation)" - displayName: 'Build the benchmarks' - timeoutInMinutes: 2 - env: { TMP: $(tmpDir), TEMP: $(tmpDir) } - condition: and(succeeded(), ${{ parameters.buildBenchmarks }}) diff --git a/azure-devops/config.yml b/azure-devops/config.yml index b0833824a7..8ec1c3934c 100644 --- a/azure-devops/config.yml +++ b/azure-devops/config.yml @@ -5,7 +5,7 @@ variables: - name: poolName - value: 'StlBuild-2025-05-16T0735-Pool' + value: 'StlBuild-2025-06-10T1243-Pool' readonly: true - name: poolDemands value: 'EnableSpotVM -equals false' diff --git a/azure-devops/provision-image.ps1 b/azure-devops/provision-image.ps1 index 901299799b..e89d6492a1 100644 --- a/azure-devops/provision-image.ps1 +++ b/azure-devops/provision-image.ps1 @@ -29,7 +29,7 @@ $VisualStudioWorkloads = @( 'Microsoft.VisualStudio.Component.VC.Tools.ARM64', 'Microsoft.VisualStudio.Component.VC.Tools.ARM64EC', 'Microsoft.VisualStudio.Component.VC.Tools.x86.x64', - 'Microsoft.VisualStudio.Component.Windows11SDK.22621' + 'Microsoft.VisualStudio.Component.Windows11SDK.26100' ) $VisualStudioUrl = 'https://aka.ms/vs/17/pre/vs_enterprise.exe' @@ -43,7 +43,7 @@ foreach ($workload in $VisualStudioWorkloads) { $PowerShellUrl = 'https://github.com/PowerShell/PowerShell/releases/download/v7.5.1/PowerShell-7.5.1-win-x64.msi' $PowerShellArgs = @('/quiet', '/norestart') -$PythonUrl = 'https://www.python.org/ftp/python/3.13.3/python-3.13.3-amd64.exe' +$PythonUrl = 'https://www.python.org/ftp/python/3.13.4/python-3.13.4-amd64.exe' $PythonArgs = @('/quiet', 'InstallAllUsers=1', 'PrependPath=1', 'CompileAll=1', 'Include_doc=0') $CudaUrl = 'https://developer.download.nvidia.com/compute/cuda/12.4.0/local_installers/cuda_12.4.0_551.61_windows.exe' diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 1f8c650534..e096c0fa27 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -52,21 +52,21 @@ stages: numShards: 1 skipTesting: true - - stage: Early_Build_ARM - dependsOn: [] - displayName: 'Early Build ARM' - pool: - name: ${{ variables.poolName }} - demands: ${{ variables.poolDemands }} - jobs: - - template: azure-devops/build-and-test.yml - parameters: - hostArch: x64 - targetArch: arm - targetPlatform: arm - analyzeBuild: true - numShards: 1 - skipTesting: true +# - stage: Early_Build_ARM +# dependsOn: [] +# displayName: 'Early Build ARM' +# pool: +# name: ${{ variables.poolName }} +# demands: ${{ variables.poolDemands }} +# jobs: +# - template: azure-devops/build-and-test.yml +# parameters: +# hostArch: x64 +# targetArch: arm +# targetPlatform: arm +# analyzeBuild: true +# numShards: 1 +# skipTesting: true - stage: Early_Build_ARM64 dependsOn: [] @@ -98,6 +98,7 @@ stages: targetArch: arm64 targetPlatform: arm64ec analyzeBuild: true + buildBenchmarks: true numShards: 1 skipTesting: true @@ -106,7 +107,7 @@ stages: - Code_Format - Early_Build_x64 - Early_Build_x86 - - Early_Build_ARM +# - Early_Build_ARM - Early_Build_ARM64 - Early_Build_ARM64EC displayName: 'Build and Test x64' @@ -133,9 +134,23 @@ stages: targetArch: x86 targetPlatform: x86 - - stage: Build_And_Test_ARM +# - stage: Build_And_Test_ARM +# dependsOn: Build_And_Test_x64 +# displayName: 'Build and Test ARM' +# pool: +# name: ${{ variables.poolName }} +# demands: ${{ variables.poolDemands }} +# jobs: +# - template: azure-devops/build-and-test.yml +# parameters: +# hostArch: x64 +# targetArch: arm +# targetPlatform: arm +# testsBuildOnly: true + + - stage: Build_And_Test_ARM64 dependsOn: Build_And_Test_x64 - displayName: 'Build and Test ARM' + displayName: 'Build and Test ARM64' pool: name: ${{ variables.poolName }} demands: ${{ variables.poolDemands }} @@ -143,13 +158,13 @@ stages: - template: azure-devops/build-and-test.yml parameters: hostArch: x64 - targetArch: arm - targetPlatform: arm + targetArch: arm64 + targetPlatform: arm64 testsBuildOnly: true - - stage: Build_And_Test_ARM64 + - stage: Build_And_Test_ARM64EC dependsOn: Build_And_Test_x64 - displayName: 'Build and Test ARM64' + displayName: 'Build and Test ARM64EC' pool: name: ${{ variables.poolName }} demands: ${{ variables.poolDemands }} @@ -158,5 +173,5 @@ stages: parameters: hostArch: x64 targetArch: arm64 - targetPlatform: arm64 + targetPlatform: arm64ec testsBuildOnly: true diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 338f9c6a23..f54fdae227 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -27,6 +27,11 @@ if(DEFINED STL_BINARY_DIR) set(VCLIBS_I386_OR_AMD64 "arm") elseif(VCLIBS_TARGET_ARCHITECTURE STREQUAL "arm64") set(VCLIBS_I386_OR_AMD64 "arm64") + elseif(VCLIBS_TARGET_ARCHITECTURE STREQUAL "arm64ec") + set(VCLIBS_I386_OR_AMD64 "arm64ec") + add_compile_options($<$:/arm64EC>) + add_link_options("/machine:arm64ec") + set(CMAKE_STATIC_LINKER_FLAGS "/machine:arm64ec") else() message(FATAL_ERROR "Could not determine target architecture: VCLIBS_TARGET_ARCHITECTURE: ${VCLIBS_TARGET_ARCHITECTURE}") endif() @@ -52,7 +57,8 @@ set(CMAKE_MSVC_RUNTIME_LIBRARY "${STL_BENCHMARK_MSVC_RUNTIME_LIBRARY}") set(CMAKE_BUILD_TYPE Release) # /utf-8 affects . -add_compile_options("$<$:/Zi;/nologo;/diagnostics:caret;/W4;/WX;/w14265;/w15038;/w15262;/utf-8;/Zc:preprocessor>") +add_compile_options("$<$:/Zi;/nologo;/diagnostics:caret;/W4;/WX;/w14265;/w15038;/w15262;/utf-8>") +add_compile_options("$<$:/Zc:preprocessor>") # TRANSITION, LLVM-48220 clang-cl: ignore /Zc:preprocessor add_link_options("/DEBUG") @@ -106,9 +112,11 @@ add_benchmark(fill src/fill.cpp) add_benchmark(find_and_count src/find_and_count.cpp) add_benchmark(find_first_of src/find_first_of.cpp) add_benchmark(has_single_bit src/has_single_bit.cpp) +add_benchmark(includes src/includes.cpp) add_benchmark(iota src/iota.cpp) add_benchmark(is_sorted_until src/is_sorted_until.cpp) add_benchmark(locale_classic src/locale_classic.cpp) +add_benchmark(locate_zone src/locate_zone.cpp) add_benchmark(minmax_element src/minmax_element.cpp) add_benchmark(mismatch src/mismatch.cpp) add_benchmark(move_only_function src/move_only_function.cpp) diff --git a/benchmarks/google-benchmark b/benchmarks/google-benchmark index 299e592895..eddb024138 160000 --- a/benchmarks/google-benchmark +++ b/benchmarks/google-benchmark @@ -1 +1 @@ -Subproject commit 299e5928955cc62af9968370293b916f5130916f +Subproject commit eddb0241389718a23a42db6af5f0164b6e0139af diff --git a/benchmarks/src/bitset_from_string.cpp b/benchmarks/src/bitset_from_string.cpp index cf0c48b52d..26703ecd80 100644 --- a/benchmarks/src/bitset_from_string.cpp +++ b/benchmarks/src/bitset_from_string.cpp @@ -42,7 +42,7 @@ const auto random_digits = random_digits_init(); template void bitset_from_string(benchmark::State& state) { - const auto& digit_array = random_digits; + auto digit_array = random_digits; for (auto _ : state) { benchmark::DoNotOptimize(digit_array); const auto arr_data = digit_array.data(); diff --git a/benchmarks/src/bitset_to_string.cpp b/benchmarks/src/bitset_to_string.cpp index 0dcc1031a7..43a54c2b1d 100644 --- a/benchmarks/src/bitset_to_string.cpp +++ b/benchmarks/src/bitset_to_string.cpp @@ -30,10 +30,11 @@ void BM_bitset_to_string(benchmark::State& state) { static_assert(N <= 64); for (auto _ : state) { - for (const auto& bits : random_bits<>) { + // make a copy, so that it can be potentially modified by DoNotOptimize + for (auto bits : random_bits<>) { benchmark::DoNotOptimize(bits); bitset bs{bits}; - benchmark::DoNotOptimize(bs.to_string()); + benchmark::DoNotOptimize(bs.template to_string()); } } } @@ -43,10 +44,10 @@ void BM_bitset_to_string_large_single(benchmark::State& state) { static_assert(N % 64 == 0 && N >= 64); const auto& bitset_data = random_bits; - const auto large_bitset = bit_cast>(bitset_data); + auto large_bitset = bit_cast>(bitset_data); for (auto _ : state) { benchmark::DoNotOptimize(large_bitset); - benchmark::DoNotOptimize(large_bitset.to_string()); + benchmark::DoNotOptimize(large_bitset.template to_string()); } } diff --git a/benchmarks/src/filesystem.cpp b/benchmarks/src/filesystem.cpp index 33c8e69e87..120aa107ca 100644 --- a/benchmarks/src/filesystem.cpp +++ b/benchmarks/src/filesystem.cpp @@ -6,12 +6,12 @@ #include void symlink_status(benchmark::State& state) { - const auto path = std::filesystem::temp_directory_path(); + auto path = std::filesystem::temp_directory_path(); for (auto _ : state) { std::error_code ec; benchmark::DoNotOptimize(path); - const auto status = std::filesystem::symlink_status(path, ec); + auto status = std::filesystem::symlink_status(path, ec); benchmark::DoNotOptimize(status); benchmark::DoNotOptimize(ec); } diff --git a/benchmarks/src/has_single_bit.cpp b/benchmarks/src/has_single_bit.cpp index 7f2519e0a1..3e8255b534 100644 --- a/benchmarks/src/has_single_bit.cpp +++ b/benchmarks/src/has_single_bit.cpp @@ -11,7 +11,7 @@ using namespace std; template void bm_has_single_bit_if(benchmark::State& state) { - const auto random_v = random_vector(8); + auto random_v = random_vector(8); for (auto _ : state) { benchmark::DoNotOptimize(random_v); unsigned int count_true = 0; @@ -28,7 +28,7 @@ void bm_has_single_bit_if(benchmark::State& state) { template void bm_has_single_bit(benchmark::State& state) { - const auto random_v = random_vector(8); + auto random_v = random_vector(8); for (auto _ : state) { benchmark::DoNotOptimize(random_v); unsigned int r = 0; diff --git a/benchmarks/src/includes.cpp b/benchmarks/src/includes.cpp new file mode 100644 index 0000000000..d349bde168 --- /dev/null +++ b/benchmarks/src/includes.cpp @@ -0,0 +1,124 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "skewed_allocator.hpp" +#include "utility.hpp" + +using namespace std; + +enum class alg_type { std_fn, rng }; + +enum class needle_spread { dense, dense_random, sparse, sparse_random }; + +template +void bm_includes(benchmark::State& state) { + const auto hay_size = static_cast(state.range(0)); + const auto needle_size = static_cast(state.range(1)); + const auto spread = static_cast(state.range(2)); + const auto expected_match = static_cast(state.range(3)); + + auto hay = random_vector(hay_size); + ranges::sort(hay); + + vector> needle; + switch (spread) { + case needle_spread::dense: + needle.assign(hay.begin() + hay_size / 2 - needle_size / 2, hay.begin() + hay_size / 2 + (needle_size + 1) / 2); + break; + + case needle_spread::dense_random: + { + mt19937 gen{}; + geometric_distribution dis_dis{}; + vector idx(needle_size); + const size_t mid = needle_size / 2; + idx[mid] = hay_size / 2; + + const size_t max_shift = hay_size / needle_size; + + for (size_t i = mid; i != 0; --i) { + idx[i - 1] = idx[i] - min(dis_dis(gen) + 1, max_shift); + } + + for (size_t i = mid; i != needle_size - 1; ++i) { + idx[i + 1] = idx[i] + min(dis_dis(gen) + 1, max_shift); + } + + needle.assign_range(idx | views::transform([&hay](const size_t i) { return hay[i]; })); + } + break; + + case needle_spread::sparse: + needle.resize(needle_size); + for (size_t i = 0; i != needle_size; ++i) { + needle[i] = hay[hay_size * i / needle_size + hay_size / (needle_size * 2)]; + } + break; + + case needle_spread::sparse_random: + needle.resize(needle_size); + ranges::sample(hay, needle.begin(), needle_size, mt19937{}); + break; + } + + if (!expected_match) { + const T v = needle[needle_size / 2]; + const T r = static_cast(static_cast>(v) + 1); + ranges::replace(hay, v, r); + ranges::sort(hay); + } + + for (auto _ : state) { + benchmark::DoNotOptimize(hay); + benchmark::DoNotOptimize(needle); + bool found; + if constexpr (Alg == alg_type::rng) { + found = ranges::includes(hay, needle); + } else { + found = includes(hay.begin(), hay.end(), needle.begin(), needle.end()); + } + benchmark::DoNotOptimize(found); + if (found != expected_match) { + cerr << "Unexpected 'includes' result: " << found << '\n'; + abort(); + } + } +} + +void common_args(auto bm) { + for (const auto& spread : + {needle_spread::dense, needle_spread::dense_random, needle_spread::sparse, needle_spread::sparse_random}) { + for (const auto& expected_match : {true, false}) { + for (const auto& needle_size : {3, 22, 105, 1504, 2750}) { + bm->Args({3000, needle_size, static_cast>(spread), expected_match}); + } + + for (const auto& needle_size : {3, 22, 105, 290}) { + bm->Args({300, needle_size, static_cast>(spread), expected_match}); + } + } + } +} + +BENCHMARK(bm_includes)->Apply(common_args); +BENCHMARK(bm_includes)->Apply(common_args); +BENCHMARK(bm_includes)->Apply(common_args); +BENCHMARK(bm_includes)->Apply(common_args); + +BENCHMARK(bm_includes)->Apply(common_args); +BENCHMARK(bm_includes)->Apply(common_args); +BENCHMARK(bm_includes)->Apply(common_args); +BENCHMARK(bm_includes)->Apply(common_args); + +BENCHMARK_MAIN(); diff --git a/benchmarks/src/locale_classic.cpp b/benchmarks/src/locale_classic.cpp index cac18e5a15..aa4a739a27 100644 --- a/benchmarks/src/locale_classic.cpp +++ b/benchmarks/src/locale_classic.cpp @@ -8,7 +8,8 @@ using namespace std; // GH-3048 : Double-checked locking for locale::classic void BM_locale_classic(benchmark::State& state) { for (auto _ : state) { - benchmark::DoNotOptimize(locale::classic()); + auto v = locale::classic(); + benchmark::DoNotOptimize(v); } } BENCHMARK(BM_locale_classic); diff --git a/benchmarks/src/locate_zone.cpp b/benchmarks/src/locate_zone.cpp new file mode 100644 index 0000000000..08eff0983c --- /dev/null +++ b/benchmarks/src/locate_zone.cpp @@ -0,0 +1,19 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "benchmark/benchmark.h" +#include + +void locate_zone(benchmark::State& state) { + const auto& db = std::chrono::get_tzdb(); + for (auto _ : state) { + for (const auto& z : db.zones) { + auto res = db.locate_zone(z.name()); + benchmark::DoNotOptimize(res); + } + } +} + +BENCHMARK(locate_zone); + +BENCHMARK_MAIN(); diff --git a/benchmarks/src/move_only_function.cpp b/benchmarks/src/move_only_function.cpp index 9760a602b2..c5f108d25c 100644 --- a/benchmarks/src/move_only_function.cpp +++ b/benchmarks/src/move_only_function.cpp @@ -5,6 +5,10 @@ #include #include +#ifdef __clang__ +#pragma clang diagnostic ignored "-Wunqualified-std-cast-call" +#endif // defined(__clang__) + using namespace std; void mof_none(benchmark::State& state) { diff --git a/benchmarks/src/priority_queue_push_range.cpp b/benchmarks/src/priority_queue_push_range.cpp index c15663c90e..d2e96bf17a 100644 --- a/benchmarks/src/priority_queue_push_range.cpp +++ b/benchmarks/src/priority_queue_push_range.cpp @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include @@ -57,31 +56,20 @@ void BM_push_range(benchmark::State& state) { } } -template -void putln(const benchmark::State&) { - static bool b = [] { - puts(""); - return true; - }(); +void common_args(auto bm) { + bm->RangeMultiplier(100)->Range(1, vec_size)->Arg(vec_size / 2 + 1); } -#define TEST_PUSH_RANGE(T, source) \ - BENCHMARK(BM_push_range) \ - ->Setup(putln<__LINE__>) \ - ->RangeMultiplier(100) \ - ->Range(1, vec_size) \ - ->Arg(vec_size / 2 + 1); +BENCHMARK(BM_push_range)->Apply(common_args); +BENCHMARK(BM_push_range)->Apply(common_args); +BENCHMARK(BM_push_range)->Apply(common_args); +BENCHMARK(BM_push_range)->Apply(common_args); +BENCHMARK(BM_push_range)->Apply(common_args); +BENCHMARK(BM_push_range)->Apply(common_args); -TEST_PUSH_RANGE(uint8_t, vec_u8); -TEST_PUSH_RANGE(uint16_t, vec_u16); -TEST_PUSH_RANGE(uint32_t, vec_u32); -TEST_PUSH_RANGE(uint64_t, vec_u64); -TEST_PUSH_RANGE(float, vec_float); -TEST_PUSH_RANGE(double, vec_double); - -TEST_PUSH_RANGE(string_view, vec_str); -TEST_PUSH_RANGE(string, vec_str); -TEST_PUSH_RANGE(wstring_view, vec_wstr); -TEST_PUSH_RANGE(wstring, vec_wstr); +BENCHMARK(BM_push_range)->Apply(common_args); +BENCHMARK(BM_push_range)->Apply(common_args); +BENCHMARK(BM_push_range)->Apply(common_args); +BENCHMARK(BM_push_range)->Apply(common_args); BENCHMARK_MAIN(); diff --git a/benchmarks/src/regex_search.cpp b/benchmarks/src/regex_search.cpp index 36f4cb9d6a..bc6a3d1253 100644 --- a/benchmarks/src/regex_search.cpp +++ b/benchmarks/src/regex_search.cpp @@ -31,8 +31,15 @@ void bm_lorem_search(benchmark::State& state, const char* pattern) { } } +BENCHMARK_CAPTURE(bm_lorem_search, "^bibe", "^bibe")->Arg(2)->Arg(3)->Arg(4); BENCHMARK_CAPTURE(bm_lorem_search, "bibe", "bibe")->Arg(2)->Arg(3)->Arg(4); +BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)", "(bibe)")->Arg(2)->Arg(3)->Arg(4); BENCHMARK_CAPTURE(bm_lorem_search, "(bibe)+", "(bibe)+")->Arg(2)->Arg(3)->Arg(4); BENCHMARK_CAPTURE(bm_lorem_search, "(?:bibe)+", "(?:bibe)+")->Arg(2)->Arg(3)->Arg(4); +BENCHMARK_CAPTURE(bm_lorem_search, R"(\bbibe)", R"(\bbibe)")->Arg(2)->Arg(3)->Arg(4); +BENCHMARK_CAPTURE(bm_lorem_search, R"(\Bibe)", R"(\Bibe)")->Arg(2)->Arg(3)->Arg(4); +BENCHMARK_CAPTURE(bm_lorem_search, R"((?=....)bibe)", R"((?=....)bibe)")->Arg(2)->Arg(3)->Arg(4); +BENCHMARK_CAPTURE(bm_lorem_search, R"((?=bibe)....)", R"((?=bibe)....)")->Arg(2)->Arg(3)->Arg(4); +BENCHMARK_CAPTURE(bm_lorem_search, R"((?!lorem)bibe)", R"((?!lorem)bibe)")->Arg(2)->Arg(3)->Arg(4); BENCHMARK_MAIN(); diff --git a/benchmarks/src/rotate.cpp b/benchmarks/src/rotate.cpp index 9ba7c4b898..c0780aaf6e 100644 --- a/benchmarks/src/rotate.cpp +++ b/benchmarks/src/rotate.cpp @@ -55,4 +55,6 @@ BENCHMARK(bm_rotate)->Apply(common_args); BENCHMARK(bm_rotate)->Apply(common_args); BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Args({35000, 520})->Args({35000, 3000}); + BENCHMARK_MAIN(); diff --git a/benchmarks/src/search.cpp b/benchmarks/src/search.cpp index 47583b8214..4f803cec3a 100644 --- a/benchmarks/src/search.cpp +++ b/benchmarks/src/search.cpp @@ -32,7 +32,8 @@ template constexpr std::array fill_pattern_array = make_fill_pattern_array(); template -constexpr std::string_view fill_pattern_view = fill_pattern_array; +constexpr std::string_view fill_pattern_view{ + fill_pattern_array.data(), fill_pattern_array.size()}; struct data_and_pattern { std::string_view data; @@ -60,8 +61,8 @@ void c_strstr(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const not_highly_aligned_string haystack(src_haystack); - const not_highly_aligned_string needle(src_needle); + not_highly_aligned_string haystack(src_haystack); + not_highly_aligned_string needle(src_needle); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -76,8 +77,8 @@ void classic_search(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector> haystack(src_haystack.begin(), src_haystack.end()); - const std::vector> needle(src_needle.begin(), src_needle.end()); + std::vector> haystack(src_haystack.begin(), src_haystack.end()); + std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -92,8 +93,8 @@ void ranges_search(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector> haystack(src_haystack.begin(), src_haystack.end()); - const std::vector> needle(src_needle.begin(), src_needle.end()); + std::vector> haystack(src_haystack.begin(), src_haystack.end()); + std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -108,8 +109,8 @@ void search_default_searcher(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector> haystack(src_haystack.begin(), src_haystack.end()); - const std::vector> needle(src_needle.begin(), src_needle.end()); + std::vector> haystack(src_haystack.begin(), src_haystack.end()); + std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -124,8 +125,8 @@ void member_find(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const T haystack(src_haystack.begin(), src_haystack.end()); - const T needle(src_needle.begin(), src_needle.end()); + T haystack(src_haystack.begin(), src_haystack.end()); + T needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -140,8 +141,8 @@ void classic_find_end(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector> haystack(src_haystack.begin(), src_haystack.end()); - const std::vector> needle(src_needle.begin(), src_needle.end()); + std::vector> haystack(src_haystack.begin(), src_haystack.end()); + std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -156,8 +157,8 @@ void ranges_find_end(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const std::vector> haystack(src_haystack.begin(), src_haystack.end()); - const std::vector> needle(src_needle.begin(), src_needle.end()); + std::vector> haystack(src_haystack.begin(), src_haystack.end()); + std::vector> needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); @@ -172,8 +173,8 @@ void member_rfind(benchmark::State& state) { const auto& src_haystack = patterns[static_cast(state.range())].data; const auto& src_needle = patterns[static_cast(state.range())].pattern; - const T haystack(src_haystack.begin(), src_haystack.end()); - const T needle(src_needle.begin(), src_needle.end()); + T haystack(src_haystack.begin(), src_haystack.end()); + T needle(src_needle.begin(), src_needle.end()); for (auto _ : state) { benchmark::DoNotOptimize(haystack); diff --git a/benchmarks/src/sv_equal.cpp b/benchmarks/src/sv_equal.cpp index 219b6b927c..67e09846cc 100644 --- a/benchmarks/src/sv_equal.cpp +++ b/benchmarks/src/sv_equal.cpp @@ -31,7 +31,7 @@ constexpr std::array make_svs() { template void sv_equal(benchmark::State& state) { - constexpr auto arr = make_svs(); + auto arr = make_svs(); benchmark::DoNotOptimize(arr); for (auto _ : state) { diff --git a/stl/inc/__msvc_bit_utils.hpp b/stl/inc/__msvc_bit_utils.hpp index 4217cb208a..df0af9064d 100644 --- a/stl/inc/__msvc_bit_utils.hpp +++ b/stl/inc/__msvc_bit_utils.hpp @@ -281,7 +281,8 @@ _NODISCARD int _Checked_x86_x64_countr_zero(const _Ty _Val) noexcept { #define _POPCNT_INTRINSICS_ALWAYS_AVAILABLE 0 #endif // ^^^ intrinsics not always available ^^^ #else // ^^^ intrinsics available / intrinsics unavailable vvv -#define _HAS_POPCNT_INTRINSICS 0 +#define _HAS_POPCNT_INTRINSICS 0 +#define _POPCNT_INTRINSICS_ALWAYS_AVAILABLE 0 #endif // ^^^ intrinsics unavailable ^^^ #if _HAS_POPCNT_INTRINSICS @@ -384,9 +385,7 @@ _CONSTEXPR20 decltype(auto) _Select_popcount_impl(_Fn _Callback) { return _Callback([](_Ty _Val) _STATIC_LAMBDA { return _Popcount_fallback(_Val); }); } -#undef _HAS_POPCNT_INTRINSICS #undef _HAS_TZCNT_BSF_INTRINSICS -#undef _POPCNT_INTRINSICS_ALWAYS_AVAILABLE _STD_END diff --git a/stl/inc/__msvc_ranges_tuple_formatter.hpp b/stl/inc/__msvc_ranges_tuple_formatter.hpp index 677997e3ab..176bf1eb76 100644 --- a/stl/inc/__msvc_ranges_tuple_formatter.hpp +++ b/stl/inc/__msvc_ranges_tuple_formatter.hpp @@ -239,19 +239,20 @@ class basic_format_arg { void(__cdecl* _Format)(basic_format_parse_context<_CharType>& _Parse_ctx, _Context& _Format_ctx, const void*); template - explicit handle(_Ty& _Val) noexcept - : _Ptr(_STD addressof(_Val)), _Format([](basic_format_parse_context<_CharType>& _Parse_ctx, - _Context& _Format_ctx, const void* _Ptr) _STATIC_LAMBDA { - using _Td = remove_const_t<_Ty>; - // doesn't drop const-qualifier per an unnumbered LWG issue - using _Tq = conditional_t<_Formattable_with, const _Ty, _Ty>; - _STL_INTERNAL_STATIC_ASSERT(_Formattable_with<_Tq, _Context>); - - typename _Context::template formatter_type<_Td> _Formatter; - _Parse_ctx.advance_to(_Formatter.parse(_Parse_ctx)); - _Format_ctx.advance_to( - _Formatter.format(*const_cast<_Tq*>(static_cast(_Ptr)), _Format_ctx)); - }) {} + static void __cdecl _Handle_format( + basic_format_parse_context<_CharType>& _Parse_ctx, _Context& _Format_ctx, const void* _Ptr) { + using _Td = remove_const_t<_Ty>; + // doesn't drop const-qualifier per an unnumbered LWG issue + using _Tq = conditional_t<_Formattable_with, const _Ty, _Ty>; + _STL_INTERNAL_STATIC_ASSERT(_Formattable_with<_Tq, _Context>); + + typename _Context::template formatter_type<_Td> _Formatter; + _Parse_ctx.advance_to(_Formatter.parse(_Parse_ctx)); + _Format_ctx.advance_to(_Formatter.format(*const_cast<_Tq*>(static_cast(_Ptr)), _Format_ctx)); + } + + template + explicit handle(_Ty& _Val) noexcept : _Ptr(_STD addressof(_Val)), _Format(_Handle_format<_Ty>) {} public: void format(basic_format_parse_context<_CharType>& _Parse_ctx, _Context& _Format_ctx) const { diff --git a/stl/inc/algorithm b/stl/inc/algorithm index f853fbd329..195166ab1f 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -351,11 +351,6 @@ _Ty* _Unique_copy_vectorized(const _Ty* const _First, const _Ty* const _Last, _T _STL_INTERNAL_STATIC_ASSERT(false); // Unexpected size } } - -// Can we activate the vector algorithms for find_first_of? -template -constexpr bool _Vector_alg_in_find_first_of_is_safe = _Equal_memcmp_is_safe<_It1, _It2, _Pr>; - // Can we activate the vector algorithms for replace? template constexpr bool _Vector_alg_in_replace_is_safe = _Vector_alg_in_find_is_safe<_Iter, _Ty1> // can search for the value @@ -376,7 +371,7 @@ constexpr bool _Vector_alg_in_search_n_is_safe = _Vector_alg_in_find_is_safe<_It equal_to<>>; // Can we activate the vector algorithms for unique? template -constexpr bool _Vector_alg_in_unique_is_safe = _Equal_memcmp_is_safe<_Iter, _Iter, _Pr>; +constexpr bool _Vector_alg_in_unique_is_safe = _Vector_alg_in_search_is_safe<_Iter, _Iter, _Pr>; // Can we use this output iterator for remove_copy or unique_copy? template @@ -712,7 +707,7 @@ _NODISCARD _CONSTEXPR20 _FwdIt adjacent_find(const _FwdIt _First, _FwdIt _Last, auto _ULast = _STD _Get_unwrapped(_Last); if (_UFirst != _ULast) { #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Equal_memcmp_is_safe) { + if constexpr (_Vector_alg_in_search_is_safe) { if (!_STD _Is_constant_evaluated()) { const auto _First_ptr = _STD _To_address(_UFirst); const auto _Result = _STD _Adjacent_find_vectorized(_First_ptr, _STD _To_address(_ULast)); @@ -888,7 +883,7 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(_InIt1 _First1, const _InI const auto _ULast1 = _STD _Get_unwrapped(_Last1); auto _UFirst2 = _STD _Get_unwrapped_n(_First2, _STD _Idl_distance<_InIt1>(_UFirst1, _ULast1)); #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Equal_memcmp_is_safe) { + if constexpr (_Vector_alg_in_search_is_safe) { if (!_STD _Is_constant_evaluated()) { constexpr size_t _Elem_size = sizeof(_Iter_value_t<_InIt1>); @@ -952,7 +947,7 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch( const auto _Count = static_cast<_Iter_diff_t<_InIt1>>((_STD min)(_Count1, _Count2)); _ULast1 = _UFirst1 + _Count; #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Equal_memcmp_is_safe) { + if constexpr (_Vector_alg_in_search_is_safe) { if (!_STD _Is_constant_evaluated()) { constexpr size_t _Elem_size = sizeof(_Iter_value_t<_InIt1>); @@ -3796,7 +3791,7 @@ _NODISCARD _CONSTEXPR20 _FwdIt1 find_first_of( } #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Vector_alg_in_find_first_of_is_safe) { + if constexpr (_Vector_alg_in_search_is_safe) { if (!_STD _Is_constant_evaluated() && _ULast1 - _UFirst1 >= _Threshold_find_first_of) { const auto _First1_ptr = _STD _To_address(_UFirst1); const auto _Result = _STD _Find_first_of_vectorized( @@ -3900,7 +3895,7 @@ namespace ranges { } #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Vector_alg_in_find_first_of_is_safe<_It1, _It2, _Pr> && sized_sentinel_for<_Se1, _It1> + if constexpr (_Vector_alg_in_search_is_safe<_It1, _It2, _Pr> && sized_sentinel_for<_Se1, _It1> && sized_sentinel_for<_Se2, _It2> && is_same_v<_Pj1, identity> && is_same_v<_Pj2, identity>) { if (!_STD is_constant_evaluated() && _Last1 - _First1 >= _Threshold_find_first_of) { const auto _Count1 = static_cast(_Last1 - _First1); @@ -5616,6 +5611,10 @@ namespace ranges { } // namespace ranges #endif // _HAS_CXX20 +template +struct _Is_trivially_copy_assignable_returning_same_reference + : bool_constant<_Is_trivially_assignable_returning_same_reference_v<_Ty&, const _Ty&>> {}; + _EXPORT_STD template _CONSTEXPR20 _OutIt reverse_copy(_BidIt _First, _BidIt _Last, _OutIt _Dest) { // copy reversing elements in [_First, _Last) @@ -5629,8 +5628,8 @@ _CONSTEXPR20 _OutIt reverse_copy(_BidIt _First, _BidIt _Last, _OutIt _Dest) { using _Elem = remove_reference_t<_Iter_ref_t>>; using _DestElem = remove_reference_t<_Iter_ref_t>; constexpr bool _Allow_vectorization = conjunction_v, _DestElem>, - bool_constant<_Iterators_are_contiguous>, is_trivially_copyable<_Elem>, - negation>>; + bool_constant<_Iterators_are_contiguous>, + _Is_trivially_copy_assignable_returning_same_reference<_Elem>, negation>>; constexpr size_t _Nx = sizeof(_Elem); if constexpr (_Allow_vectorization && _Nx <= 8 && (_Nx & (_Nx - 1)) == 0) { @@ -5719,7 +5718,7 @@ namespace ranges { using _Elem = remove_reference_t>; using _DestElem = remove_reference_t>; constexpr bool _Allow_vectorization = conjunction_v, _DestElem>, - is_trivially_copyable<_Elem>, negation>>; + _Is_trivially_copy_assignable_returning_same_reference<_Elem>, negation>>; constexpr size_t _Nx = sizeof(_Elem); if constexpr (_Allow_vectorization && _Nx <= 8 && (_Nx & (_Nx - 1)) == 0) { @@ -10158,17 +10157,31 @@ _NODISCARD _CONSTEXPR20 bool includes(_InIt1 _First1, _InIt1 _Last1, _InIt2 _Fir const auto _ULast2 = _STD _Get_unwrapped(_Last2); _DEBUG_ORDER_SET_UNWRAPPED(_InIt2, _UFirst1, _ULast1, _Pred); _DEBUG_ORDER_SET_UNWRAPPED(_InIt1, _UFirst2, _ULast2, _Pred); - for (; _UFirst1 != _ULast1 && _UFirst2 != _ULast2; ++_UFirst1) { - if (_DEBUG_LT_PRED(_Pred, *_UFirst2, *_UFirst1)) { - return false; - } - if (!_Pred(*_UFirst1, *_UFirst2)) { + if (_UFirst2 == _ULast2) { + return true; + } else if (_UFirst1 == _ULast1) { + return false; + } + + for (;;) { + if (_DEBUG_LT_PRED(_Pred, *_UFirst1, *_UFirst2)) { + ++_UFirst1; + if (_UFirst1 == _ULast1) { + return false; + } + } else if (_Pred(*_UFirst2, *_UFirst1)) { + return false; + } else { + ++_UFirst1; ++_UFirst2; + if (_UFirst2 == _ULast2) { + return true; + } else if (_UFirst1 == _ULast1) { + return false; + } } } - - return _UFirst2 == _ULast2; } _EXPORT_STD template @@ -10246,20 +10259,16 @@ namespace ranges { if (_First1 == _Last1) { return false; } - - continue; - } - - if (_STD invoke(_Pred, _STD invoke(_Proj2, *_First2), _STD invoke(_Proj1, *_First1))) { - return false; - } - - ++_First1; - ++_First2; - if (_First2 == _Last2) { - return true; - } else if (_First1 == _Last1) { + } else if (_STD invoke(_Pred, _STD invoke(_Proj2, *_First2), _STD invoke(_Proj1, *_First1))) { return false; + } else { + ++_First1; + ++_First2; + if (_First2 == _Last2) { + return true; + } else if (_First1 == _Last1) { + return false; + } } } } diff --git a/stl/inc/atomic b/stl/inc/atomic index 75e45dde4a..929fcd7c26 100644 --- a/stl/inc/atomic +++ b/stl/inc/atomic @@ -2990,7 +2990,8 @@ public: break; default: // Unrecognized bit pattern - _CSTD abort(); + _STL_REPORT_ERROR("atomic shared_ptr invariant broken"); + break; } } } diff --git a/stl/inc/bit b/stl/inc/bit index 6d82d9bc10..f9be4e925f 100644 --- a/stl/inc/bit +++ b/stl/inc/bit @@ -84,6 +84,12 @@ _NODISCARD constexpr int countl_zero(_Ty _Val) noexcept; _EXPORT_STD template <_Standard_unsigned_integral _Ty> _NODISCARD constexpr bool has_single_bit(const _Ty _Val) noexcept { +#if _POPCNT_INTRINSICS_ALWAYS_AVAILABLE + if (!_STD is_constant_evaluated()) { + return _Unchecked_popcount(_Val) == 1; + } +#endif // ^^^ _POPCNT_INTRINSICS_ALWAYS_AVAILABLE ^^^ + return (_Val ^ (_Val - 1)) > _Val - 1; } diff --git a/stl/inc/chrono b/stl/inc/chrono index d6558177e2..d85567af8f 100644 --- a/stl/inc/chrono +++ b/stl/inc/chrono @@ -2105,8 +2105,14 @@ namespace chrono { template _NODISCARD const _Ty* _Locate_zone_impl(const vector<_Ty>& _Vec, string_view _Name) { - const auto _Result = _STD find_if(_Vec.begin(), _Vec.end(), [&](auto& _Tz) { return _Tz.name() == _Name; }); - return _Result == _Vec.end() ? nullptr : &*_Result; + // N5008 [time.zone.db.tzdb]/1: "Each vector in a tzdb object is sorted to enable fast lookup." + const auto _Result = _STD lower_bound( + _Vec.begin(), _Vec.end(), _Name, [](const auto& _Tz, const auto& _Sv) { return _Tz.name() < _Sv; }); + if (_Result != _Vec.end() && _Result->name() == _Name) { + return &*_Result; + } else { + return nullptr; + } } _EXPORT_STD struct tzdb { diff --git a/stl/inc/format b/stl/inc/format index 2d81e60ce5..0e6af0261b 100644 --- a/stl/inc/format +++ b/stl/inc/format @@ -3540,7 +3540,7 @@ _NODISCARD _FormatContext::iterator _Tuple_formatter_format(const tuple(_Tmp_str); diff --git a/stl/inc/functional b/stl/inc/functional index 21c0e80ccc..6496b8d3df 100644 --- a/stl/inc/functional +++ b/stl/inc/functional @@ -770,7 +770,8 @@ private: auto& _Myax = _Mypair._Get_first(); if constexpr (!is_copy_constructible_v<_Callable>) { // used exclusively for packaged_task (void) _Myax; - _CSTD abort(); // shouldn't be called, see GH-3888 + _STL_REPORT_ERROR("this callable object should not be copied"); + return nullptr; } else if constexpr (_Is_large<_Func_impl>) { _Myalty _Rebound(_Myax); _Alloc_construct_ptr<_Myalty> _Constructor{_Rebound}; @@ -808,7 +809,8 @@ private: } #else // ^^^ _HAS_STATIC_RTTI / !_HAS_STATIC_RTTI vvv [[noreturn]] const type_info& _Target_type() const noexcept override { - _CSTD abort(); // shouldn't be called, see GH-3888 + _STL_REPORT_ERROR("type info is disabled"); + _STL_UNREACHABLE; // no return value available for "continue on error" } #endif // ^^^ !_HAS_STATIC_RTTI ^^^ @@ -859,7 +861,8 @@ public: private: _Mybase* _Copy(void* _Where) const override { if constexpr (!is_copy_constructible_v<_Callable>) { // used exclusively for packaged_task - _CSTD abort(); // shouldn't be called, see GH-3888 + _STL_REPORT_ERROR("this callable object should not be copied"); + return nullptr; } else if constexpr (_Is_large<_Func_impl_no_alloc>) { return _STD _Global_new<_Func_impl_no_alloc>(_Callee); } else { @@ -889,7 +892,8 @@ private: } #else // ^^^ _HAS_STATIC_RTTI / !_HAS_STATIC_RTTI vvv [[noreturn]] const type_info& _Target_type() const noexcept override { - _CSTD abort(); // shouldn't be called, see GH-3888 + _STL_REPORT_ERROR("type info is disabled"); + _STL_UNREACHABLE; // no return value available for "continue on error" } #endif // ^^^ !_HAS_STATIC_RTTI ^^^ @@ -1352,8 +1356,8 @@ inline constexpr size_t _Minimum_function_size = 2 * sizeof(void*); template [[noreturn]] _Rx __stdcall _Function_not_callable(const _Move_only_function_data&, _Types&&...) noexcept { - _CSTD abort(); // Unlike std::function, move_only_function doesn't throw bad_function_call - // (N4950 [func.wrap.move.inv]/2) + _STL_REPORT_ERROR("empty move_only_function call (N5008 [func.wrap.move.inv]/2)"); + _STL_UNREACHABLE; // no return value available for "continue on error" } template diff --git a/stl/inc/future b/stl/inc/future index 135fa8fecd..318427ba74 100644 --- a/stl/inc/future +++ b/stl/inc/future @@ -1319,7 +1319,7 @@ template class _Fake_no_copy_callable_adapter { // async() is built on packaged_task internals which incorrectly use // std::function, which requires that things be copyable. We can't fix this in an - // update, so this adapter turns copies into abort(). When VSO-153581 is + // update, so this adapter turns copies into _STL_REPORT_ERROR(). When VSO-153581 is // fixed, remove this adapter. private: using _Storaget = tuple...>; @@ -1331,7 +1331,7 @@ public: [[noreturn]] _Fake_no_copy_callable_adapter(const _Fake_no_copy_callable_adapter& _Other) : _Storage(_STD move(_Other._Storage)) { - _CSTD abort(); // shouldn't be called + _STL_REPORT_ERROR("this callable object should not be copied"); // shouldn't be called } _Fake_no_copy_callable_adapter(_Fake_no_copy_callable_adapter&& _Other) = default; diff --git a/stl/inc/memory b/stl/inc/memory index e959623f7b..74d85f4e56 100644 --- a/stl/inc/memory +++ b/stl/inc/memory @@ -1128,11 +1128,11 @@ private: #ifdef _M_CEE_PURE // permanent workaround to avoid mentioning _purecall in msvcurt.lib, ptrustu.lib, or other support libs virtual void _Destroy() noexcept { - _CSTD abort(); + _STL_REPORT_ERROR("_Ref_count_base base class member functions should not be called"); } virtual void _Delete_this() noexcept { - _CSTD abort(); + _STL_REPORT_ERROR("_Ref_count_base base class member functions should not be called"); } #else // ^^^ defined(_M_CEE_PURE) / !defined(_M_CEE_PURE) vvv virtual void _Destroy() noexcept = 0; // destroy managed resource diff --git a/stl/inc/regex b/stl/inc/regex index 46dab2b06d..38d5fdec47 100644 --- a/stl/inc/regex +++ b/stl/inc/regex @@ -33,6 +33,16 @@ _STL_DISABLE_CLANG_WARNINGS #pragma push_macro("new") #undef new +// Controls whether LWG-2503 "multiline option should be added to syntax_option_type" is implemented. +// Defining this to 0 requests Standard behavior: +// * For ECMAScript, matching is non-multiline by default, but regex_constants::multiline can be requested. +// * For POSIX grammars, matching is non-multiline, and regex_constants::multiline is ignored (N5008 [tab:re.synopt]). +// Defining this to 1 requests legacy behavior: +// * For all grammars, matching is multiline, and regex_constants::multiline is redundant. +#ifndef _REGEX_LEGACY_MULTILINE_MODE +#define _REGEX_LEGACY_MULTILINE_MODE 0 +#endif + #ifndef _REGEX_MAX_COMPLEXITY_COUNT #define _REGEX_MAX_COMPLEXITY_COUNT 10000000L // set to 0 to disable #endif // !defined(_REGEX_MAX_COMPLEXITY_COUNT) @@ -121,10 +131,11 @@ namespace regex_constants { _Gmask = 0x3F, _Any_posix = basic | extended | grep | egrep | awk, - icase = 0x0100, - nosubs = 0x0200, - optimize = 0x0400, - collate = 0x0800 + icase = 0x0100, + nosubs = 0x0200, + optimize = 0x0400, + collate = 0x0800, + multiline = 0x1000 }; _BITMASK_OPS(_EXPORT_STD, syntax_option_type) @@ -164,7 +175,7 @@ namespace regex_constants { error_complexity, error_stack, _Error_parse, // TRANSITION, was error_parse, keeping behavior - error_syntax + _Error_syntax // TRANSITION, was error_syntax, keeping behavior }; } // namespace regex_constants @@ -228,34 +239,54 @@ struct _Lex_compare_memcmp_classify_pred<_Elem, _Elem, _Std_char_traits_lt<_Elem : _Lex_compare_memcmp_classify_pred_for_std_char_traits_lt<_Elem> {}; template -struct _Cmp_cs { // functor to compare two character values for equality +struct _Cmp_icase { // functor to compare for equality following case-insensitive translation of both characters using _Elem = typename _RxTraits::char_type; - _STATIC_CALL_OPERATOR bool operator()(_Elem _Ex1, _Elem _Ex2) _CONST_CALL_OPERATOR { - return _Ex1 == _Ex2; + + explicit _Cmp_icase(const _RxTraits& _Tr) noexcept : _Traits(_Tr) {} + + bool operator()(_Elem _Ex1, _Elem _Ex2) const { + return _Traits.translate_nocase(_Ex1) == _Traits.translate_nocase(_Ex2); } + + const _RxTraits& _Traits; }; template -struct _Cmp_icase { // functor to compare for case-insensitive equality +struct _Cmp_collate { // functor to compare for equality following collating translation of both characters using _Elem = typename _RxTraits::char_type; - explicit _Cmp_icase(const _RxTraits& _Tr) noexcept : _Traits(_Tr) {} + explicit _Cmp_collate(const _RxTraits& _Tr) noexcept : _Traits(_Tr) {} bool operator()(_Elem _Ex1, _Elem _Ex2) const { - return _Traits.translate_nocase(_Ex1) == _Traits.translate_nocase(_Ex2); + return _Traits.translate(_Ex1) == _Traits.translate(_Ex2); } const _RxTraits& _Traits; }; template -struct _Cmp_collate { // functor to compare for locale-specific equality +struct _Cmp_icase_translateleft { + // functor to compare for equality following collating translation of the left character using _Elem = typename _RxTraits::char_type; - explicit _Cmp_collate(const _RxTraits& _Tr) noexcept : _Traits(_Tr) {} + explicit _Cmp_icase_translateleft(const _RxTraits& _Tr) noexcept : _Traits(_Tr) {} bool operator()(_Elem _Ex1, _Elem _Ex2) const { - return _Traits.translate(_Ex1) == _Traits.translate(_Ex2); + return _Traits.translate_nocase(_Ex1) == _Ex2; + } + + const _RxTraits& _Traits; +}; + +template +struct _Cmp_collate_translateleft { + // functor to compare for equality following collating translation of the left character + using _Elem = typename _RxTraits::char_type; + + explicit _Cmp_collate_translateleft(const _RxTraits& _Tr) noexcept : _Traits(_Tr) {} + + bool operator()(_Elem _Ex1, _Elem _Ex2) const { + return _Traits.translate(_Ex1) == _Ex2; } const _RxTraits& _Traits; @@ -574,7 +605,7 @@ private: "could match the specified character sequence."; case regex_constants::_Error_parse: // TRANSITION, keeping behavior return "regex_error(error_parse)"; - case regex_constants::error_syntax: + case regex_constants::_Error_syntax: return "regex_error(error_syntax)"; default: return "regex_error"; @@ -1574,26 +1605,24 @@ enum class _Rx_char_class_kind : int { // must be aligned with corresponding _No }; template -class _Builder { // provides operations used by _Parser to build the nfa +class _Builder2 { // provides operations used by _Parser2 to build the nfa public: - _Builder(const _RxTraits& _Tr, regex_constants::syntax_option_type); + _Builder2(const _RxTraits& _Tr, regex_constants::syntax_option_type); void _Setlong(); - // _Discard_pattern is an ABI zombie name void _Tidy() noexcept; _Node_base* _Getmark() const; - void _Add_nop(); void _Add_bol(); void _Add_eol(); void _Add_wbound(); void _Add_dot(); - void _Add_char2(_Elem _Ch); + void _Add_char(_Elem _Ch); void _Add_class(); void _Add_char_to_class(_Elem _Ch); - void _Add_range3(_Elem, _Elem); + void _Add_range(_Elem, _Elem); void _Add_named_class(typename _RxTraits::char_class_type, _Rx_char_class_kind); - void _Add_equiv2(const _Elem*, const _Elem*); - void _Add_coll2(const _Elem*, const _Elem*); + void _Add_equiv(const _Elem*, const _Elem*); + void _Add_coll(const _Elem*, const _Elem*); _Node_base* _Begin_group(); void _End_group(_Node_base* _Back); _Node_base* _Begin_assert_group(bool); @@ -1602,7 +1631,7 @@ public: void _Add_backreference(unsigned int _Idx); _Node_base* _Begin_if(_Node_base* _Start); void _Else_if(_Node_base*, _Node_base*); - void _Add_rep2(int _Min, int _Max, bool _Greedy); + void _Add_rep(int _Min, int _Max, bool _Greedy); void _Negate(); _Root_node* _End_pattern(); @@ -1614,17 +1643,16 @@ private: void _Add_char_to_bitmap(_Elem _Ch); void _Add_char_to_array(_Elem _Ch); void _Add_elts(_Node_class<_Elem, _RxTraits>*, typename _RxTraits::char_class_type, bool); - void _Char_to_elts2(const _Elem*, const _Elem*, _Sequence<_Elem>**); + void _Char_to_elts(const _Elem*, const _Elem*, _Sequence<_Elem>**); _Root_node* _Root; _Node_base* _Current; regex_constants::syntax_option_type _Flags; const _RxTraits& _Traits; - const int _Bmax; // TRANSITION, ABI: preserved for binary compatibility - const int _Tmax; // TRANSITION, ABI: preserved for binary compatibility public: - _Builder& operator=(const _Builder&) = delete; + _Builder2(const _Builder2&) = delete; + _Builder2& operator=(const _Builder2&) = delete; }; template @@ -1669,6 +1697,15 @@ public: if (_Re->_Flags & _Fl_begin_needs_d) { _Char_class_d = _Lookup_char_class(static_cast<_Elem>('D')); } + +// sanitize multiline mode setting +#if _REGEX_LEGACY_MULTILINE_MODE + _Sflags |= regex_constants::multiline; // old matcher applied multiline mode for all grammars +#else // ^^^ _REGEX_LEGACY_MULTILINE_MODE / !_REGEX_LEGACY_MULTILINE_MODE vvv + if (_Sflags & regex_constants::_Any_posix) { // multiline mode is ECMAScript-only + _Sflags &= ~regex_constants::multiline; + } +#endif // ^^^ !_REGEX_LEGACY_MULTILINE_MODE ^^^ } void _Setf(regex_constants::match_flag_type _Mf) { // set specified flags @@ -1695,9 +1732,10 @@ public: } _Tgt_state._Cur = _Begin; - _Tgt_state._Grp_valid.resize(_Ncap); - _Tgt_state._Grps.resize(_Ncap); - _Cap = static_cast(_Matches); + if (_Ncap > 1U) { + _Tgt_state._Grp_valid.resize(_Ncap); + _Tgt_state._Grps.resize(_Ncap); + } _Full = _Full_match; _Max_complexity_count = _REGEX_MAX_COMPLEXITY_COUNT; _Max_stack_count = _REGEX_MAX_STACK_COUNT; @@ -1711,7 +1749,13 @@ public: if (_Matches) { // copy results to _Matches _Matches->_Resize(_Ncap); const auto& _Result = _Longest ? _Res : _Tgt_state; - for (unsigned int _Idx = 0; _Idx < _Ncap; ++_Idx) { // copy submatch _Idx + + auto& _Submatch0 = _Matches->_At(0U); + _Submatch0.matched = true; + _Submatch0.first = _Begin; + _Submatch0.second = _Result._Cur; + + for (unsigned int _Idx = 1U; _Idx < _Ncap; ++_Idx) { // copy submatch _Idx if (_Result._Grp_valid[_Idx]) { // copy successful match _Matches->_At(_Idx).matched = true; _Matches->_At(_Idx).first = _Result._Grps[_Idx]._Begin; @@ -1737,7 +1781,7 @@ public: return true; } - _BidIt _Skip(_BidIt, _BidIt, _Node_base* = nullptr); + _BidIt _Skip(_BidIt, _BidIt, _Node_base* = nullptr, unsigned int _Recursion_depth = 0U); private: _Tgt_state_t<_It> _Tgt_state; @@ -1763,7 +1807,6 @@ private: regex_constants::syntax_option_type _Sflags; regex_constants::match_flag_type _Mflags; bool _Matched = false; - bool _Cap; unsigned int _Ncap; bool _Longest; const _RxTraits& _Traits; @@ -1775,6 +1818,7 @@ private: typename _RxTraits::char_class_type _Char_class_d{}; public: + _Matcher2(const _Matcher2&) = delete; _Matcher2& operator=(const _Matcher2&) = delete; }; @@ -1784,19 +1828,19 @@ enum _Prs_ret { // indicate class element type _Prs_set }; +enum class _Lex_mode : unsigned char { _Default, _Character_class }; + template -class _Parser { // parse a regular expression +class _Parser2 { // parse a regular expression public: - using char_class_type = typename _RxTraits::char_class_type; - - _Parser(const _RxTraits& _Tr, _FwdIt _Pfirst, _FwdIt _Plast, regex_constants::syntax_option_type _Fx); + _Parser2(const _RxTraits& _Tr, _FwdIt _Pfirst, _FwdIt _Plast, regex_constants::syntax_option_type _Fx); _Root_node* _Compile(); +private: unsigned int _Mark_count() const noexcept { return _Grp_idx + 1; } -private: // lexing [[noreturn]] void _Error(regex_constants::error_type); @@ -1807,12 +1851,12 @@ private: // parsing int _Do_digits(int _Base, int _Initial, int _Count, regex_constants::error_type _Error_type); - bool _DecimalDigits3(regex_constants::error_type _Error_type, int _Initial = 0); + bool _DecimalDigits(regex_constants::error_type _Error_type, int _Initial = 0); void _HexDigits(int); bool _OctalDigits(); - _Prs_ret _Do_ex_class2(_Meta_type); + _Prs_ret _Do_ex_class(_Meta_type); bool _CharacterClassEscape(bool); - _Prs_ret _ClassEscape3(); + _Prs_ret _ClassEscape(); _Prs_ret _ClassAtom(bool); void _ClassRanges(); void _CharacterClass(); @@ -1832,53 +1876,53 @@ private: void _Calculate_loop_simplicity(_Node_base* _Nx, _Node_base* _Ne, _Node_rep* _Outer_rep); _FwdIt _Pat; - _FwdIt _Begin; _FwdIt _End; unsigned int _Grp_idx = 0; int _Disj_count = 0; vector _Finished_grps; - _Builder<_FwdIt, _Elem, _RxTraits> _Nfa; + _Builder2<_FwdIt, _Elem, _RxTraits> _Nfa; const _RxTraits& _Traits; + unsigned long long _L_flags; regex_constants::syntax_option_type _Flags; int _Val; - _Elem _Char; _Meta_type _Mchar; - unsigned int _L_flags; + _Lex_mode _Mode = _Lex_mode::_Default; + _Elem _Char; }; -enum _Lang_flags { // describe language properties - _L_ext_rep = 0x00000001, // + and ? repetitions - _L_alt_pipe = 0x00000002, // uses '|' for alternation - _L_alt_nl = 0x00000004, // uses '\n' for alternation (grep, egrep) - _L_nex_grp = 0x00000008, // has non-escaped capture groups - _L_nex_rep = 0x00000010, // has non-escaped repeats - _L_nc_grp = 0x00000020, // has non-capture groups (?:xxx) - _L_asrt_gen = 0x00000040, // has generalized assertions (?=xxx), (?!xxx) - _L_asrt_wrd = 0x00000080, // has word boundary assertions (\b, \B) - _L_bckr = 0x00000100, // has backreferences (ERE doesn't) - _L_lim_bckr = 0x00000200, // has limited backreferences (BRE \1-\9) - _L_ngr_rep = 0x00000400, // has non-greedy repeats - _L_esc_uni = 0x00000800, // has Unicode escape sequences - _L_esc_hex = 0x00001000, // has hexadecimal escape sequences - _L_esc_oct = 0x00002000, // has octal escape sequences - _L_esc_bsp = 0x00004000, // has backspace escape in character classes - _L_esc_ffnx = 0x00008000, // has extra file escapes (\a and \b) - _L_esc_ffn = 0x00010000, // has limited file escapes (\[fnrtv]) - _L_esc_wsd = 0x00020000, // has w, s, and d character set escapes - _L_esc_ctrl = 0x00040000, // has control escape - _L_no_nl = 0x00080000, // no newline in pattern or matching text - _L_bzr_chr = 0x00100000, // \0 is a valid character constant - _L_grp_esc = 0x00200000, // \ is special character in group - _L_ident_ECMA = 0x00400000, // ECMA identity escape (not identifierpart) - _L_ident_ERE = 0x00800000, // ERE identity escape (.[\*^$, plus {+?}() - _L_ident_awk = 0x01000000, // awk identity escape ( ERE plus "/) - _L_anch_rstr = 0x02000000, // anchor restricted to beginning/end - _L_star_beg = 0x04000000, // star okay at beginning of RE/expr (BRE) - _L_empty_grp = 0x08000000, // empty group allowed (ERE prohibits "()") - _L_paren_bal = 0x10000000, // ')'/'}' special only after '('/'{' - _L_brk_bal = 0x20000000, // ']' special only after '[' (ERE, BRE); TRANSITION, ABI: same value as _L_brk_rstr - _L_brk_rstr = 0x20000000, // ']' not special when first character in set - _L_mtch_long = 0x40000000, // find longest match (ERE, BRE) +enum _Lang_flags2 : unsigned long long { // describe language properties + _L_ext_rep = 0x000000001ULL, // + and ? repetitions + _L_alt_pipe = 0x000000002ULL, // uses '|' for alternation + _L_alt_nl = 0x000000004ULL, // newlines outside parentheses/brackets represent alternations (grep, egrep) + _L_nex_grp = 0x000000008ULL, // has non-escaped capture groups + _L_nex_rep = 0x000000010ULL, // has non-escaped repeats + _L_nc_asrt = 0x000000020ULL, // has non-capture groups (?:xxx) and generalized assertions (?=xxx), (?!xxx) + _L_asrt_wrd = 0x000000040ULL, // has word boundary assertions (\b, \B) + _L_bckr = 0x000000080ULL, // has backreferences (ERE doesn't) + _L_lim_bckr = 0x000000100ULL, // has limited backreferences (BRE \1-\9) + _L_non_greedy = 0x000000200ULL, // has non-greedy repeats + _L_esc_uni = 0x000000400ULL, // has Unicode escape sequences + _L_esc_hex = 0x000000800ULL, // has hexadecimal escape sequences + _L_esc_oct = 0x000001000ULL, // has octal escape sequences (awk) + _L_esc_bsp = 0x000002000ULL, // has backspace escape in character classes + _L_esc_ffnx = 0x000004000ULL, // has extra file escapes (\a and \b) (awk) + _L_esc_ffn = 0x000008000ULL, // has limited file escapes (\f \n \r \t \v) + _L_esc_wsd = 0x000010000ULL, // has w, s, and d character set escapes + _L_esc_ctrl = 0x000020000ULL, // has control escape + _L_no_nl = 0x000040000ULL, // newlines within parentheses/brackets are syntax errors (grep, egrep) + _L_bzr_chr = 0x000080000ULL, // \0 is a valid character constant + _L_grp_esc = 0x000100000ULL, // \ is special character in group + _L_ident_ECMA = 0x000200000ULL, // ECMA identity escape (not identifierpart) + _L_ident_ERE = 0x000400000ULL, // ERE identity escape (.[\*^$, plus {+?}() + _L_ident_awk = 0x000800000ULL, // awk identity escape (awk additionally supports " and / characters) + _L_anch_rstr = 0x001000000ULL, // anchor restricted to beginning/end (BRE) + _L_star_beg = 0x002000000ULL, // star okay at beginning of RE/expr (BRE) + _L_empty_grp = 0x004000000ULL, // empty group allowed (ERE prohibits "()") + _L_paren_bal = 0x008000000ULL, // ')'/'}' special only after '('/'{' (ERE) + _L_brk_bal = 0x010000000ULL, // ']' special only after '[' (ERE, BRE) + _L_brk_rstr = 0x020000000ULL, // ']' not special when first character in set (ERE, BRE) + _L_dsh_rstr = 0x040000000ULL, // '-' forbidden at range start in set except when first character (ERE, BRE) + _L_mtch_long = 0x080000000ULL, // find longest match (ERE, BRE) }; class _Regex_base : public _Container_base { // base class for basic_regex to construct and destroy proxy @@ -1916,6 +1960,7 @@ public: static constexpr flag_type awk = regex_constants::awk; static constexpr flag_type grep = regex_constants::grep; static constexpr flag_type egrep = regex_constants::egrep; + static constexpr flag_type multiline = regex_constants::multiline; basic_regex() = default; // construct empty object @@ -2121,7 +2166,7 @@ private: _Visualization.assign(_First, _Last); #endif // _ENHANCED_REGEX_VISUALIZER - _Parser<_InIt, _Elem, _RxTraits> _Prs(_Traits, _First, _Last, _Flags); + _Parser2<_InIt, _Elem, _RxTraits> _Prs(_Traits, _First, _Last, _Flags); _Root_node* _Rx = _Prs._Compile(); _Reset(_Rx); } else { @@ -2837,28 +2882,26 @@ _EXPORT_STD using sregex_token_iterator = regex_token_iterator; template -_Builder<_FwdIt, _Elem, _RxTraits>::_Builder(const _RxTraits& _Tr, regex_constants::syntax_option_type _Fx) - : _Root(new _Root_node), _Current(_Root), _Flags(_Fx), _Traits(_Tr), - _Bmax(static_cast(_Fx & regex_constants::collate ? 0U : _Bmp_max)), - _Tmax(static_cast(_Fx & regex_constants::collate ? 0U : _ARRAY_THRESHOLD)) {} +_Builder2<_FwdIt, _Elem, _RxTraits>::_Builder2(const _RxTraits& _Tr, regex_constants::syntax_option_type _Fx) + : _Root(new _Root_node), _Current(_Root), _Flags(_Fx), _Traits(_Tr) {} template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Setlong() { // set flag +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Setlong() { // set flag _Root->_Flags |= _Fl_longest; } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Negate() { // set flag +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Negate() { // set flag _Current->_Flags ^= _Fl_negate; } template -_Node_base* _Builder<_FwdIt, _Elem, _RxTraits>::_Getmark() const { +_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Getmark() const { return _Current; } template -_Node_base* _Builder<_FwdIt, _Elem, _RxTraits>::_Link_node(_Node_base* _Nx) { // insert _Nx at current location +_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Link_node(_Node_base* _Nx) { // insert _Nx at current location _Nx->_Prev = _Current; if (_Current->_Next) { // set back pointer _Nx->_Next = _Current->_Next; @@ -2870,7 +2913,7 @@ _Node_base* _Builder<_FwdIt, _Elem, _RxTraits>::_Link_node(_Node_base* _Nx) { // } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Insert_node(_Node_base* _Insert_before, _Node_base* _To_insert) { +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Insert_node(_Node_base* _Insert_before, _Node_base* _To_insert) { // insert _To_insert into the graph before the node _Insert_before _Insert_before->_Prev->_Next = _To_insert; _To_insert->_Prev = _Insert_before->_Prev; @@ -2879,42 +2922,37 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Insert_node(_Node_base* _Insert_before } template -_Node_base* _Builder<_FwdIt, _Elem, _RxTraits>::_New_node(_Node_type _Kind) { // allocate and link simple node +_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_New_node(_Node_type _Kind) { // allocate and link simple node return _Link_node(new _Node_base(_Kind)); } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_nop() { // add nop node - _New_node(_N_nop); -} - -template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_bol() { // add bol node +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_bol() { // add bol node _New_node(_N_bol); } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_eol() { // add eol node +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_eol() { // add eol node _New_node(_N_eol); } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_wbound() { // add wbound node +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_wbound() { // add wbound node _New_node(_N_wbound); } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_dot() { // add dot node +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_dot() { // add dot node _New_node(_N_dot); } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_str_node() { // add string node +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_str_node() { // add string node _Link_node(new _Node_str<_Elem>); } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_char2(_Elem _Ch) { // append character +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_char(_Elem _Ch) { // append character if (_Current->_Kind != _N_str) { _Add_str_node(); } @@ -2930,16 +2968,12 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_char2(_Elem _Ch) { // append chara } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_class() { // add bracket expression node +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_class() { // add bracket expression node _Link_node(new _Node_class<_Elem, _RxTraits>); } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_char_to_bitmap(_Elem _Ch) { // add character to accelerator table - if (_Flags & regex_constants::icase) { - _Ch = _Traits.translate_nocase(_Ch); - } - +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_char_to_bitmap(_Elem _Ch) { // add character to accelerator table _Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current); if (!_Node->_Small) { @@ -2950,11 +2984,7 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_char_to_bitmap(_Elem _Ch) { // add } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_char_to_array(_Elem _Ch) { // append character to character array - if (_Flags & regex_constants::icase) { - _Ch = _Traits.translate_nocase(_Ch); - } - +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_char_to_array(_Elem _Ch) { // append character to character array _Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current); if (!_Node->_Large) { _Node->_Large = new _Buf<_Elem>; @@ -2964,7 +2994,13 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_char_to_array(_Elem _Ch) { // appe } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_char_to_class(_Elem _Ch) { // add character to bracket expression +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_char_to_class(_Elem _Ch) { // add character to bracket expression + if (_Flags & regex_constants::icase) { + _Ch = _Traits.translate_nocase(_Ch); + } else if (_Flags & regex_constants::collate) { + _Ch = _Traits.translate(_Ch); + } + if (static_cast(_Ch) < _Bmp_max) { _Add_char_to_bitmap(_Ch); } else { @@ -2973,7 +3009,7 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_char_to_class(_Elem _Ch) { // add } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_range3(const _Elem _Arg0, const _Elem _Arg1) { +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_range(const _Elem _Arg0, const _Elem _Arg1) { // add character range to set using string_type = typename _RxTraits::string_type; unsigned int _Ex0 = static_cast(_Arg0); @@ -3032,12 +3068,12 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_range3(const _Elem _Arg0, const _E } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_elts( - _Node_class<_Elem, _RxTraits>* _Node, typename _RxTraits::char_class_type _Cl, bool _Negate) { +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_elts( + _Node_class<_Elem, _RxTraits>* _Node, typename _RxTraits::char_class_type _Cl, bool _Negative) { // add characters in named class to set for (unsigned int _Ch = 0; _Ch < _Bmp_max; ++_Ch) { // add elements or their inverse bool _Matches = _Traits.isctype(static_cast<_Elem>(_Ch), _Cl); - if (_Matches != _Negate) { // add contents of named class to accelerator table + if (_Matches != _Negative) { // add contents of named class to accelerator table if (!_Node->_Small) { _Node->_Small = new _Bitmap; } @@ -3048,7 +3084,7 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_elts( } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_named_class( +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_named_class( typename _RxTraits::char_class_type _Cl, const _Rx_char_class_kind _Kind) { // add contents of named class to bracket expression using _Char_class_type = typename _RxTraits::char_class_type; @@ -3073,7 +3109,7 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_named_class( } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Char_to_elts2(const _Elem* const _First, const _Elem* const _Last, +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Char_to_elts(const _Elem* const _First, const _Elem* const _Last, _Sequence<_Elem>** _Cur) { // add collation element to element sequence auto _Diff = static_cast(_Last - _First); while (*_Cur && _Diff < (*_Cur)->_Sz) { @@ -3090,7 +3126,7 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Char_to_elts2(const _Elem* const _Firs } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_equiv2(const _Elem* const _First, const _Elem* const _Last) { +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_equiv(const _Elem* const _First, const _Elem* const _Last) { // add elements of equivalence class to bracket expression _Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current); typename _RxTraits::string_type _Str = _Traits.transform_primary(_First, _Last); @@ -3112,25 +3148,25 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_equiv2(const _Elem* const _First, } if (_Bmp_max < static_cast(_STD _Max_limit<_Elem>())) { // map range _Sequence<_Elem>** _Cur = _STD addressof(_Node->_Equiv); - _Char_to_elts2(_First, _Last, _Cur); + _Char_to_elts(_First, _Last, _Cur); } } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_coll2(const _Elem* const _First, const _Elem* const _Last) { +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_coll(const _Elem* const _First, const _Elem* const _Last) { // add collation element to bracket expression _Node_class<_Elem, _RxTraits>* _Node = static_cast<_Node_class<_Elem, _RxTraits>*>(_Current); _Sequence<_Elem>** _Cur = _STD addressof(_Node->_Coll); - _Char_to_elts2(_First, _Last, _Cur); + _Char_to_elts(_First, _Last, _Cur); } template -_Node_base* _Builder<_FwdIt, _Elem, _RxTraits>::_Begin_group() { // add group node +_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Begin_group() { // add group node return _New_node(_N_group); } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_End_group(_Node_base* _Back) { // add end of group node +void _Builder2<_FwdIt, _Elem, _RxTraits>::_End_group(_Node_base* _Back) { // add end of group node _Node_type _Elt; if (_Back->_Kind == _N_group) { _Elt = _N_end_group; @@ -3144,7 +3180,7 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_End_group(_Node_base* _Back) { // add } template -_Node_base* _Builder<_FwdIt, _Elem, _RxTraits>::_Begin_assert_group(const bool _Neg) { // add assert node +_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Begin_assert_group(const bool _Neg) { // add assert node auto _Node1_unique = _STD make_unique<_Node_assert>(_Neg ? _N_neg_assert : _N_assert); _Node_base* _Node2 = new _Node_base(_N_nop); _Node_assert* _Node1 = _Node1_unique.release(); @@ -3156,23 +3192,23 @@ _Node_base* _Builder<_FwdIt, _Elem, _RxTraits>::_Begin_assert_group(const bool _ } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_End_assert_group(_Node_base* _Nx) { // add end of assert node +void _Builder2<_FwdIt, _Elem, _RxTraits>::_End_assert_group(_Node_base* _Nx) { // add end of assert node _End_group(_Nx); _Current = _Nx; } template -_Node_base* _Builder<_FwdIt, _Elem, _RxTraits>::_Begin_capture_group(unsigned int _Idx) { // add capture group node +_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Begin_capture_group(unsigned int _Idx) { // add capture group node return _Link_node(new _Node_capture(_Idx)); } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_backreference(unsigned int _Idx) { // add back reference node +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_backreference(unsigned int _Idx) { // add back reference node _Link_node(new _Node_back(_Idx)); } template -_Node_base* _Builder<_FwdIt, _Elem, _RxTraits>::_Begin_if(_Node_base* _Start) { // add if node +_Node_base* _Builder2<_FwdIt, _Elem, _RxTraits>::_Begin_if(_Node_base* _Start) { // add if node // append endif node _Node_base* _Res = new _Node_endif; _Link_node(_Res); @@ -3185,7 +3221,7 @@ _Node_base* _Builder<_FwdIt, _Elem, _RxTraits>::_Begin_if(_Node_base* _Start) { } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Else_if(_Node_base* _Start, _Node_base* _End) { // add else node +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Else_if(_Node_base* _Start, _Node_base* _End) { // add else node _Node_if* _Parent = static_cast<_Node_if*>(_Start->_Next); _Node_base* _First = _End->_Next; _End->_Next = nullptr; @@ -3203,12 +3239,12 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Else_if(_Node_base* _Start, _Node_base } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_rep2(int _Min, int _Max, bool _Greedy) { // add repeat node +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Add_rep(int _Min, int _Max, bool _Greedy) { // add repeat node if (_Current->_Kind == _N_str && static_cast<_Node_str<_Elem>*>(_Current)->_Data._Size() != 1) { // move final character to new string node _Node_str<_Elem>* _Node = static_cast<_Node_str<_Elem>*>(_Current); _Add_str_node(); - _Add_char2(_Node->_Data._Del()); + _Add_char(_Node->_Data._Del()); } _Node_base* _Pos = _Current; @@ -3256,13 +3292,13 @@ void _Builder<_FwdIt, _Elem, _RxTraits>::_Add_rep2(int _Min, int _Max, bool _Gre } template -_Root_node* _Builder<_FwdIt, _Elem, _RxTraits>::_End_pattern() { // wrap up +_Root_node* _Builder2<_FwdIt, _Elem, _RxTraits>::_End_pattern() { // wrap up _New_node(_N_end); return _Root; } template -void _Builder<_FwdIt, _Elem, _RxTraits>::_Tidy() noexcept { // free memory +void _Builder2<_FwdIt, _Elem, _RxTraits>::_Tidy() noexcept { // free memory _Destroy_node(_Root); _Root = nullptr; } @@ -3499,10 +3535,10 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep_first(_Node_rep* // Determine first capture group in repetition for later capture group reset, if not done so previously. // No capture group reset is performed for POSIX regexes, - // so we prevent any reset by setting the first capture group to the number of capture groups _Ncap. - if (_Psav->_Group_first == 0) { + // so we prevent any reset by setting the first capture group to the size of the capture group vector. + if (_Psav->_Group_first == 0U) { if ((_Sflags & regex_constants::_Any_posix) || !_Find_first_inner_capture_group(_Node->_Next, _Psav)) { - _Psav->_Group_first = _Ncap; + _Psav->_Group_first = static_cast(_Tgt_state._Grp_valid.size()); } } @@ -3573,7 +3609,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Find_first_inner_capture _Found_group = true; _Nx = nullptr; } else { - _Inner_loop_state->_Group_first = _Ncap; + _Inner_loop_state->_Group_first = static_cast(_Tgt_state._Grp_valid.size()); _Nx = _Inner_rep->_End_rep; } break; @@ -3610,23 +3646,33 @@ _BidIt1 _Cmp_chrange(_BidIt1 _Begin1, _BidIt1 _End1, _BidIt2 _Begin2, _BidIt2 _E return _Res; } } - return _Begin2 == _End2 ? _Begin1 : _Res; } template -_BidIt1 _Compare(_BidIt1 _Begin1, _BidIt1 _End1, _BidIt2 _Begin2, _BidIt2 _End2, const _RxTraits& _Traits, - regex_constants::syntax_option_type _Sflags) { // compare character ranges - _BidIt1 _Res = _End1; +_BidIt1 _Compare_translate_both(_BidIt1 _Begin1, _BidIt1 _End1, _BidIt2 _Begin2, _BidIt2 _End2, + const _RxTraits& _Traits, regex_constants::syntax_option_type _Sflags) { + // compare character ranges, translating characters in both ranges according to syntax options if (_Sflags & regex_constants::icase) { - _Res = _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_icase<_RxTraits>{_Traits}); + return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_icase<_RxTraits>{_Traits}); } else if (_Sflags & regex_constants::collate) { - _Res = _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_collate<_RxTraits>{_Traits}); + return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_collate<_RxTraits>{_Traits}); } else { - _Res = _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_cs<_RxTraits>{}); + return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, equal_to{}); } +} - return _Res; +template +_BidIt1 _Compare_translate_left(_BidIt1 _Begin1, _BidIt1 _End1, _BidIt2 _Begin2, _BidIt2 _End2, + const _RxTraits& _Traits, regex_constants::syntax_option_type _Sflags) { + // compare character ranges, translating characters in the left range according to syntax options + if (_Sflags & regex_constants::icase) { + return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_icase_translateleft<_RxTraits>{_Traits}); + } else if (_Sflags & regex_constants::collate) { + return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, _Cmp_collate_translateleft<_RxTraits>{_Traits}); + } else { + return _STD _Cmp_chrange(_Begin1, _End1, _Begin2, _End2, equal_to{}); + } } template @@ -3781,7 +3827,13 @@ _It _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_class(_Node_base* _Nx, template bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Better_match() { // check for better match under leftmost-longest rule - for (unsigned int _Ix = 0; _Ix < _Ncap; ++_Ix) { // check each capture group + + // a longer match is better than a shorter one + if (_Res._Cur != _Tgt_state._Cur) { + return _STD distance(_Begin, _Res._Cur) < _STD distance(_Begin, _Tgt_state._Cur); + } + + for (unsigned int _Ix = 1U; _Ix < _Ncap; ++_Ix) { // check each capture group // any match (even an empty one) is better than no match at all if (_Res._Grp_valid[_Ix] != _Tgt_state._Grp_valid[_Ix]) { return _Tgt_state._Grp_valid[_Ix]; @@ -3830,6 +3882,11 @@ typename _RxTraits::char_class_type _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Al return _Traits.lookup_classname(_Ptr, _Ptr + 1, (_Sflags & regex_constants::icase) != 0); } +template +bool _Is_ecmascript_line_terminator(_Elem _Ch) { + return _Ch == _Meta_nl || _Ch == _Meta_cr || _Ch == _Meta_ls || _Ch == _Meta_ps; +} + template bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _Nx) { // check for match if (0 < _Max_stack_count && --_Max_stack_count <= 0) { @@ -3849,18 +3906,19 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N case _N_bol: if ((_Mflags & regex_constants::match_prev_avail) || _Tgt_state._Cur != _Begin) { // if --_Cur is valid, check for preceding newline - _Failed = *_Prev_iter(_Tgt_state._Cur) != _Meta_nl; + _Failed = !(_Sflags & regex_constants::multiline) + || !_STD _Is_ecmascript_line_terminator(*_STD _Prev_iter(_Tgt_state._Cur)); } else { _Failed = (_Mflags & regex_constants::match_not_bol) != 0; } - break; case _N_eol: if (_Tgt_state._Cur == _End) { _Failed = (_Mflags & regex_constants::match_not_eol) != 0; } else { - _Failed = *_Tgt_state._Cur != _Meta_nl; + _Failed = + !(_Sflags & regex_constants::multiline) || !_STD _Is_ecmascript_line_terminator(*_Tgt_state._Cur); } break; @@ -3878,7 +3936,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N if (_Ch == _Elem()) { _Failed = true; } - } else if (_Ch == _Meta_nl || _Ch == _Meta_cr || _Ch == _Meta_ls || _Ch == _Meta_ps) { // ECMAScript + } else if (_STD _Is_ecmascript_line_terminator(_Ch)) { _Failed = true; } @@ -3893,7 +3951,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N { // check for string match _Node_str<_Elem>* _Node = static_cast<_Node_str<_Elem>*>(_Nx); _It _Res0; - if ((_Res0 = _Compare(_Tgt_state._Cur, _End, _Node->_Data._Str(), + if ((_Res0 = _STD _Compare_translate_left(_Tgt_state._Cur, _End, _Node->_Data._Str(), _Node->_Data._Str() + _Node->_Data._Size(), _Traits, _Sflags)) != _Tgt_state._Cur) { _Tgt_state._Cur = _Res0; @@ -3939,8 +3997,10 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N case _N_capture: { // record current position - _Node_capture* _Node = static_cast<_Node_capture*>(_Nx); - _Tgt_state._Grps[_Node->_Idx]._Begin = _Tgt_state._Cur; + _Node_capture* _Node = static_cast<_Node_capture*>(_Nx); + if (_Node->_Idx != 0U) { + _Tgt_state._Grps[_Node->_Idx]._Begin = _Tgt_state._Cur; + } break; } @@ -3948,7 +4008,7 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N { // record successful capture _Node_end_group* _Node = static_cast<_Node_end_group*>(_Nx); _Node_capture* _Node0 = static_cast<_Node_capture*>(_Node->_Back); - if (_Cap || _Node0->_Idx != 0) { // update capture data + if (_Node0->_Idx != 0U) { // update capture data _Tgt_state._Grp_valid[_Node0->_Idx] = true; _Tgt_state._Grps[_Node0->_Idx]._End = _Tgt_state._Cur; } @@ -3966,7 +4026,8 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N _It _Bx = _Tgt_state._Grps[_Node->_Idx]._Begin; _It _Ex = _Tgt_state._Grps[_Node->_Idx]._End; if (_Bx != _Ex // _Bx == _Ex for zero-length match - && (_Res0 = _Compare(_Tgt_state._Cur, _End, _Bx, _Ex, _Traits, _Sflags)) == _Tgt_state._Cur) { + && (_Res0 = _STD _Compare_translate_both(_Tgt_state._Cur, _End, _Bx, _Ex, _Traits, _Sflags)) + == _Tgt_state._Cur) { _Failed = true; } else { _Tgt_state._Cur = _Res0; @@ -4046,40 +4107,68 @@ bool _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N } template -_BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg, _BidIt _Last, _Node_base* _Node_arg) { +_BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip( + _BidIt _First_arg, _BidIt _Last, _Node_base* _Node_arg, unsigned int _Recursion_depth) { // skip until possible match // assumes --_First_arg is valid - _Node_base* _Nx = _Node_arg ? _Node_arg : _Rep; + static constexpr char _Line_terminators_char[] = {static_cast(_Meta_cr), static_cast(_Meta_nl)}; + static constexpr wchar_t _Line_terminators_wchar_t[] = {static_cast(_Meta_cr), + static_cast(_Meta_nl), static_cast(_Meta_ls), static_cast(_Meta_ps)}; + constexpr unsigned int _Max_recursion_depth = 50U; + _Node_base* _Nx = _Node_arg ? _Node_arg : _Rep; while (_First_arg != _Last && _Nx) { // check current node switch (_Nx->_Kind) { // handle current node's type case _N_nop: break; - case _N_bol: - { // check for embedded newline - // return iterator to character just after the newline; for input like "\nabc" - // matching "^abc", _First_arg could be pointing at 'a', so we need to check - // --_First_arg for '\n' - if (*_Prev_iter(_First_arg) != _Meta_nl) { - _First_arg = _STD find(_First_arg, _Last, _Meta_nl); + case _N_bol: // check for beginning anchor + if (_Sflags & regex_constants::multiline) { + // multiline mode: check for embedded line terminator + // return iterator to character just after the newline; for input like "\nabc" + // matching "^abc", _First_arg could be pointing at 'a', so we need to check + // --_First_arg for '\n' + if (!_STD _Is_ecmascript_line_terminator(*_STD _Prev_iter(_First_arg))) { + if constexpr (sizeof(_Elem) == 1) { + _First_arg = _STD find_first_of( + _First_arg, _Last, _Line_terminators_char, _STD end(_Line_terminators_char)); + } else { + _First_arg = _STD find_first_of( + _First_arg, _Last, _Line_terminators_wchar_t, _STD end(_Line_terminators_wchar_t)); + } + if (_First_arg != _Last) { ++_First_arg; } } return _First_arg; + } else { + // non-multiline mode: never matches because --_First_arg is valid + return _Last; } case _N_eol: - return _STD find(_First_arg, _Last, _Meta_nl); + if (_Sflags & regex_constants::multiline) { + // multiline mode: matches at next line terminator or end of input + if constexpr (sizeof(_Elem) == 1) { + return _STD find_first_of( + _First_arg, _Last, _Line_terminators_char, _STD end(_Line_terminators_char)); + } else { + return _STD find_first_of( + _First_arg, _Last, _Line_terminators_wchar_t, _STD end(_Line_terminators_wchar_t)); + } + } else { + return _Last; // non-multiline mode: matches at end of input or not at all + } case _N_str: { // check for string match _Node_str<_Elem>* _Node = static_cast<_Node_str<_Elem>*>(_Nx); for (; _First_arg != _Last; ++_First_arg) { // look for starting match _BidIt _Next = _First_arg; - if (_Compare(_First_arg, ++_Next, _Node->_Data._Str(), _Node->_Data._Str() + 1, _Traits, _Sflags) + if (_STD _Compare_translate_left( + _First_arg, ++_Next, _Node->_Data._Str(), _Node->_Data._Str() + 1, _Traits, _Sflags) != _First_arg) { break; } @@ -4120,6 +4209,9 @@ _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg // GH-5452: If this node has two or more branches, // examining all these branches has quadratic worst-case complexity. // Thus, we only continue if this node has a single branch only. + // TRANSITION, ABI: After GH-5539, the parser no longer generates single-branch _N_if nodes. + // But we have to retain this special handling to avoid performance regression + // when an old parser gets mixed with a new matcher. _Node_if* _Node = static_cast<_Node_if*>(_Nx); if (_Node->_Child) { @@ -4137,17 +4229,54 @@ _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg break; } + case _N_assert: + { + if (_Recursion_depth >= _Max_recursion_depth) { + return _First_arg; + } + + _Node_assert* _Node = static_cast<_Node_assert*>(_Nx); + _First_arg = _Skip(_First_arg, _Last, _Node->_Child); + _BidIt _Next; + for (;;) { + _Next = _Skip(_First_arg, _Last, _Node->_Next, _Recursion_depth + 1U); + if (_Next == _First_arg) { + return _First_arg; + } + + _First_arg = _Skip(_Next, _Last, _Node->_Child, _Recursion_depth + 1U); + if (_Next == _First_arg) { + return _First_arg; + } + } + } + + case _N_neg_assert: + // we skip the negated assertion body and continue examining the rest of the regex + break; + + case _N_wbound: + { + bool _Negated = (_Nx->_Flags & _Fl_negate) != 0; + bool _Prev_word = _STD _Is_word(*_STD _Prev_iter(_First_arg)); + for (; _First_arg != _Last; ++_First_arg) { + bool _Next_word = _STD _Is_word(*_First_arg); + if (_Negated == (_Next_word == _Prev_word)) { + break; + } + _Prev_word = _Next_word; + } + return _First_arg; + } + case _N_begin: + case _N_endif: break; case _N_end: case _N_none: - case _N_wbound: case _N_dot: - case _N_assert: - case _N_neg_assert: case _N_back: - case _N_endif: case _N_end_rep: default: return _First_arg; @@ -4160,19 +4289,19 @@ _BidIt _Matcher2<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Skip(_BidIt _First_arg } template -[[noreturn]] void _Parser<_FwdIt, _Elem, _RxTraits>::_Error(regex_constants::error_type _Code) { // handle error +[[noreturn]] void _Parser2<_FwdIt, _Elem, _RxTraits>::_Error(regex_constants::error_type _Code) { // handle error _Xregex_error(_Code); } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_Is_esc(_FwdIt _Ch0) const { // assumes _Ch0 != _End - return ++_Ch0 != _End +bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Is_esc(_FwdIt _Ch0) const { // assumes _Ch0 != _End + return _Mode == _Lex_mode::_Default && ++_Ch0 != _End && ((!(_L_flags & _L_nex_grp) && (*_Ch0 == _Meta_lpar || *_Ch0 == _Meta_rpar)) || (!(_L_flags & _L_nex_rep) && (*_Ch0 == _Meta_lbr || *_Ch0 == _Meta_rbr))); } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-character +void _Parser2<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-character static constexpr char _Meta_map[] = {_Meta_lpar, _Meta_rpar, _Meta_dlr, _Meta_caret, _Meta_dot, _Meta_star, _Meta_plus, _Meta_query, _Meta_lsq, _Meta_rsq, _Meta_bar, _Meta_esc, _Meta_dash, _Meta_lbr, _Meta_rbr, _Meta_comma, _Meta_colon, _Meta_equal, _Meta_exc, _Meta_nl, _Meta_cr, _Meta_bsp, 0}; // array of meta chars @@ -4278,7 +4407,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Trans() { // map character to meta-char } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_Next() { // advance to next input character +void _Parser2<_FwdIt, _Elem, _RxTraits>::_Next() { // advance to next input character if (_Pat != _End) { // advance if (*_Pat == _Meta_esc && _Is_esc(_Pat)) { ++_Pat; @@ -4290,7 +4419,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Next() { // advance to next input chara } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_Expect(_Meta_type _St, regex_constants::error_type _Code) { +void _Parser2<_FwdIt, _Elem, _RxTraits>::_Expect(_Meta_type _St, regex_constants::error_type _Code) { // check whether current meta-character is _St if (_Mchar != _St) { _Error(_Code); @@ -4300,7 +4429,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Expect(_Meta_type _St, regex_constants: } template -int _Parser<_FwdIt, _Elem, _RxTraits>::_Do_digits( +int _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_digits( int _Base, int _Initial, int _Count, regex_constants::error_type _Error_type) { // translate digits to numeric value int _Chv; _Val = _Initial; @@ -4317,25 +4446,25 @@ int _Parser<_FwdIt, _Elem, _RxTraits>::_Do_digits( } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_DecimalDigits3( +bool _Parser2<_FwdIt, _Elem, _RxTraits>::_DecimalDigits( const regex_constants::error_type _Error_type, const int _Initial /* = 0 */) { // check for decimal value return _Do_digits(10, _Initial, INT_MAX, _Error_type) != INT_MAX; } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_HexDigits(int _Count) { // check for _Count hex digits +void _Parser2<_FwdIt, _Elem, _RxTraits>::_HexDigits(int _Count) { // check for _Count hex digits if (_Do_digits(16, 0, _Count, regex_constants::error_escape) != 0) { _Error(regex_constants::error_escape); } } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_OctalDigits() { // check for up to 3 octal digits +bool _Parser2<_FwdIt, _Elem, _RxTraits>::_OctalDigits() { // check for up to 3 octal digits return _Do_digits(8, 0, 3, regex_constants::error_escape) != 3; } template -_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ex_class2( +_Prs_ret _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_ex_class( _Meta_type _End_arg) { // handle delimited expressions within bracket expression const regex_constants::error_type _Errtype = _End_arg == _Meta_colon ? regex_constants::error_ctype : regex_constants::error_collate; @@ -4391,20 +4520,20 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ex_class2( } if (_End_arg == _Meta_equal) { // process equivalence - _Nfa._Add_equiv2(_Coll_elem_first, _Coll_elem_last); + _Nfa._Add_equiv(_Coll_elem_first, _Coll_elem_last); return _Prs_set; } else { // process collating element // Character ranges with multi-character bounds cannot be represented in NFA nodes yet (see GH-5391). // Provisionally treat multi-character collating elements as character sets. - _Nfa._Add_coll2(_Coll_elem_first, _Coll_elem_last); + _Nfa._Add_coll(_Coll_elem_first, _Coll_elem_last); return _Prs_set; } } } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterClassEscape(bool _Addit) { // check for character class escape +bool _Parser2<_FwdIt, _Elem, _RxTraits>::_CharacterClassEscape(bool _Addit) { // check for character class escape typename _RxTraits::char_class_type _Cls; _FwdIt _Ch0 = _Pat; if (_Ch0 == _End || (_Cls = _Traits.lookup_classname(_Pat, ++_Ch0, (_Flags & regex_constants::icase) != 0)) == 0) { @@ -4433,7 +4562,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterClassEscape(bool _Addit) { // } template -_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape3() { // check for class escape +_Prs_ret _Parser2<_FwdIt, _Elem, _RxTraits>::_ClassEscape() { // check for class escape if ((_L_flags & _L_esc_bsp) && _Char == _Esc_ctrl_b) { // handle backspace escape _Next(); _Val = _Meta_bsp; @@ -4456,16 +4585,16 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassEscape3() { // check for class } template -_Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom(const bool _Initial) { // check for class atom - if (_Mchar == _Meta_esc && (_L_flags & (_L_grp_esc | _L_ident_awk))) { // check for valid escape sequence +_Prs_ret _Parser2<_FwdIt, _Elem, _RxTraits>::_ClassAtom(const bool _Initial) { // check for class atom + if (_Mchar == _Meta_esc && (_L_flags & _L_grp_esc)) { // check for valid escape sequence _Next(); - return _ClassEscape3(); + return _ClassEscape(); } else if (_Mchar == _Meta_lsq) { // check for valid delimited expression _Next(); if (_Mchar == _Meta_colon || _Mchar == _Meta_equal || _Mchar == _Meta_dot) { // handle delimited expression _Meta_type _St = _Mchar; _Next(); - return _Do_ex_class2(_St); + return _Do_ex_class(_St); } else { // handle ordinary [ _Val = _Meta_lsq; return _Prs_chr; @@ -4483,7 +4612,7 @@ _Prs_ret _Parser<_FwdIt, _Elem, _RxTraits>::_ClassAtom(const bool _Initial) { // } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid class ranges +void _Parser2<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid class ranges _Prs_ret _Ret; bool _Initial = true; @@ -4524,7 +4653,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid clas _Chr2 = _Traits.translate(_Chr2); } - _Nfa._Add_range3(_Chr1, _Chr2); + _Nfa._Add_range(_Chr1, _Chr2); } else if (_Ret == _Prs_chr) { _Nfa._Add_char_to_class(static_cast<_Elem>(_Val)); } @@ -4532,7 +4661,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_ClassRanges() { // check for valid clas } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterClass() { // add bracket expression +void _Parser2<_FwdIt, _Elem, _RxTraits>::_CharacterClass() { // add bracket expression _Nfa._Add_class(); if (_Mchar == _Meta_caret) { // negate bracket expression _Nfa._Negate(); @@ -4543,7 +4672,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterClass() { // add bracket expre } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_Do_capture_group() { // add capture group +void _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_capture_group() { // add capture group ++_Grp_idx; if (_Grp_idx >= 1000) { // hardcoded limit @@ -4558,40 +4687,40 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Do_capture_group() { // add capture gro } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_Do_noncapture_group() { // add non-capture group +void _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_noncapture_group() { // add non-capture group _Node_base* _Pos1 = _Nfa._Begin_group(); _Disjunction(); _Nfa._End_group(_Pos1); } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_Do_assert_group(bool _Neg) { // add assert group +void _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_assert_group(bool _Neg) { // add assert group _Node_base* _Pos1 = _Nfa._Begin_assert_group(_Neg); _Disjunction(); _Nfa._End_assert_group(_Pos1); } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_Wrapped_disjunction() { // add disjunction inside group +bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Wrapped_disjunction() { // add disjunction inside group ++_Disj_count; if (!(_L_flags & _L_empty_grp) && _Mchar == _Meta_rpar) { _Error(regex_constants::error_paren); - } else if ((_L_flags & _L_nc_grp) && _Mchar == _Meta_query) { // check for valid ECMAScript (?x ... ) group + } else if ((_L_flags & _L_nc_asrt) && _Mchar == _Meta_query) { // check for valid ECMAScript (?x ... ) group _Next(); _Meta_type _Ch = _Mchar; _Next(); - if (_Ch == _Meta_colon) { + if (_Ch == _Meta_colon) { // process non-capture group (?:meow) _Do_noncapture_group(); - } else if (_Ch == _Meta_exc) { // process assert group, negating + } else if (_Ch == _Meta_exc) { // process negative lookahead assertion (?!meow) _Do_assert_group(true); --_Disj_count; return false; - } else if (_Ch == _Meta_equal) { // process assert group + } else if (_Ch == _Meta_equal) { // process positive lookahead assertion (?=meow) _Do_assert_group(false); --_Disj_count; return false; } else { - _Error(regex_constants::error_syntax); + _Error(regex_constants::error_badrepeat); } } else if (_Flags & regex_constants::nosubs) { _Do_noncapture_group(); @@ -4604,7 +4733,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Wrapped_disjunction() { // add disjunct } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape(bool _In_character_class) const { +bool _Parser2<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape(bool _In_character_class) const { // check for valid identity escape if (_L_flags & _L_ident_ECMA) { @@ -4654,7 +4783,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_IsIdentityEscape(bool _In_character_cla } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_IdentityEscape(bool _In_character_class) { +bool _Parser2<_FwdIt, _Elem, _RxTraits>::_IdentityEscape(bool _In_character_class) { // check whether an escape is valid, and process it if so if (_IsIdentityEscape(_In_character_class)) { _Val = _Char; @@ -4666,7 +4795,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_IdentityEscape(bool _In_character_class } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ffn(_Elem _Ch) { // check for limited file format escape characters +bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_ffn(_Elem _Ch) { // check for limited file format escape characters if (_Ch == _Esc_ctrl_f) { _Val = '\f'; } else if (_Ch == _Esc_ctrl_n) { @@ -4685,7 +4814,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ffn(_Elem _Ch) { // check for limite } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ffnx(_Elem _Ch) { // check for the remaining file format escape characters +bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Do_ffnx(_Elem _Ch) { // check for the remaining file format escape characters if (_Ch == _Esc_ctrl_a) { _Val = '\a'; } else if (_Ch == _Esc_ctrl_b) { @@ -4698,7 +4827,8 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Do_ffnx(_Elem _Ch) { // check for the r } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterEscape(bool _In_character_class) { // check for valid character escape +bool _Parser2<_FwdIt, _Elem, _RxTraits>::_CharacterEscape(bool _In_character_class) { + // check for valid character escape if (_Mchar == _Meta_eos) { _Error(regex_constants::error_escape); } @@ -4707,7 +4837,11 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterEscape(bool _In_character_clas _Next(); } else if (_Char == _Esc_ctrl && (_L_flags & _L_esc_ctrl)) { // handle control escape sequence _Next(); - if (!_Traits.isctype(_Char, _RxTraits::_Ch_alpha)) { + + using _Uelem = typename _RxTraits::_Uelem; + _Uelem _UCh = static_cast<_Uelem>(_Char); + if (!((static_cast<_Uelem>('a') <= _UCh && _UCh <= static_cast<_Uelem>('z')) + || (static_cast<_Uelem>('A') <= _UCh && _UCh <= static_cast<_Uelem>('Z')))) { _Error(regex_constants::error_escape); } @@ -4736,17 +4870,17 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_CharacterEscape(bool _In_character_clas } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom escape +void _Parser2<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom escape if ((_L_flags & (_L_bzr_chr | _L_bckr)) && (_Val = _Traits.value(_Char, 10)) != -1) { // escaped decimal sequence _Next(); if ((_L_flags & _L_bzr_chr) && _Val == 0) { // handle \0 if (_Traits.value(_Char, 10) != -1) { _Error(regex_constants::error_escape); } - _Nfa._Add_char2(_Elem{}); + _Nfa._Add_char(_Elem{}); } else if (_L_flags & _L_bckr) { // check for valid backreference if (!(_L_flags & _L_lim_bckr)) { - (void) _DecimalDigits3(regex_constants::error_backref, _Val); + (void) _DecimalDigits(regex_constants::error_backref, _Val); } if (_Val == 0) { @@ -4759,14 +4893,14 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_AtomEscape() { // check for valid atom } } } else if (_CharacterEscape(false)) { - _Nfa._Add_char2(static_cast<_Elem>(_Val)); + _Nfa._Add_char(static_cast<_Elem>(_Val)); } else if (!(_L_flags & _L_esc_wsd) || !_CharacterClassEscape(true)) { _Error(regex_constants::error_escape); } } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier following atom +void _Parser2<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier following atom int _Min = 0; int _Max = -1; if (_Mchar != _Meta_star) { @@ -4776,7 +4910,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier _Max = 1; } else if (_Mchar == _Meta_lbr) { // check for valid bracketed value _Next(); - if (!_DecimalDigits3(regex_constants::error_badbrace)) { + if (!_DecimalDigits(regex_constants::error_badbrace)) { _Error(regex_constants::error_badbrace); } @@ -4786,7 +4920,7 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier } else { // check for decimal constant following comma _Next(); if (_Mchar != _Meta_rbr) { - if (!_DecimalDigits3(regex_constants::error_badbrace)) { + if (!_DecimalDigits(regex_constants::error_badbrace)) { _Error(regex_constants::error_badbrace); } @@ -4803,16 +4937,16 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Quantifier() { // check for quantifier } _Next(); - const bool _Greedy = !(_L_flags & _L_ngr_rep) || _Mchar != _Meta_query; + const bool _Greedy = !(_L_flags & _L_non_greedy) || _Mchar != _Meta_query; if (!_Greedy) { // add non-greedy repeat node _Next(); } - _Nfa._Add_rep2(_Min, _Max, _Greedy); + _Nfa._Add_rep(_Min, _Max, _Greedy); } template -bool _Parser<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alternative +bool _Parser2<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alternative bool _Found = false; for (;;) { // concatenate valid elements bool _Quant = true; @@ -4838,8 +4972,10 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alte _AtomEscape(); } } else if (_Mchar == _Meta_lsq) { // add bracket expression + _Mode = _Lex_mode::_Character_class; _Next(); _CharacterClass(); + _Mode = _Lex_mode::_Default; _Expect(_Meta_rsq, regex_constants::error_brack); } else if (_Mchar == _Meta_lpar) { // check for valid group _Next(); @@ -4849,7 +4985,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alte _Nfa._Add_bol(); _Next(); if ((_L_flags & _L_star_beg) && _Mchar == _Meta_star && !_Found) { - _Nfa._Add_char2(_Char); + _Nfa._Add_char(_Char); _Next(); } else { _Quant = false; @@ -4866,7 +5002,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alte } else if (_Mchar == _Meta_rsq && !(_L_flags & _L_brk_bal)) { _Error(regex_constants::error_brack); } else { // add character - _Nfa._Add_char2(_Char); + _Nfa._Add_char(_Char); _Next(); } @@ -4879,7 +5015,7 @@ bool _Parser<_FwdIt, _Elem, _RxTraits>::_Alternative() { // check for valid alte } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_Disjunction() { // check for valid disjunction +void _Parser2<_FwdIt, _Elem, _RxTraits>::_Disjunction() { // check for valid disjunction _Node_base* _Pos1 = _Nfa._Getmark(); if (!_Alternative()) { if (_Mchar != _Meta_bar) { @@ -4891,20 +5027,22 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Disjunction() { // check for valid disj _Nfa._End_group(_Pos3); } - _Node_base* _Pos2 = _Nfa._Begin_if(_Pos1); - while (_Mchar == _Meta_bar) { // append terms as long as we keep finding | characters - _Next(); - if (!_Alternative()) { // zero-length trailing alternative - _Node_base* _Pos3 = _Nfa._Begin_group(); - _Nfa._End_group(_Pos3); - } + if (_Mchar == _Meta_bar) { + _Node_base* _Pos2 = _Nfa._Begin_if(_Pos1); + do { // append terms as long as we keep finding | characters + _Next(); + if (!_Alternative()) { // zero-length trailing alternative + _Node_base* _Pos3 = _Nfa._Begin_group(); + _Nfa._End_group(_Pos3); + } - _Nfa._Else_if(_Pos1, _Pos2); + _Nfa._Else_if(_Pos1, _Pos2); + } while (_Mchar == _Meta_bar); } } template -void _Parser<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity( +void _Parser2<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity( _Node_base* _Nx, _Node_base* _Ne, _Node_rep* _Outer_rep) { // walks regex NFA, calculates values of _Node_rep::_Simple_loop for (; _Nx != _Ne && _Nx; _Nx = _Nx->_Next) { @@ -4967,6 +5105,17 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity( } } break; + + case _N_group: + case _N_capture: + // TRANSITION, requires more research to decide on the subset of loops that we can make simple: + // - Simple mode can square the running time when matching a regex to an input string in the current matcher + // - The optimal subset of simple loops for a non-recursive rewrite of the matcher aren't clear yet + if (_Outer_rep) { + _Outer_rep->_Simple_loop = 0; + } + break; + case _N_none: case _N_nop: case _N_bol: @@ -4974,10 +5123,8 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity( case _N_wbound: case _N_dot: case _N_str: - case _N_group: case _N_end_group: case _N_end_assert: - case _N_capture: case _N_end_capture: case _N_back: case _N_endif: @@ -4990,14 +5137,12 @@ void _Parser<_FwdIt, _Elem, _RxTraits>::_Calculate_loop_simplicity( } template -_Root_node* _Parser<_FwdIt, _Elem, _RxTraits>::_Compile() { // compile regular expression +_Root_node* _Parser2<_FwdIt, _Elem, _RxTraits>::_Compile() { // compile regular expression _Root_node* _Res = nullptr; _Tidy_guard _Guard{_STD addressof(_Nfa)}; _Node_base* _Pos1 = _Nfa._Begin_capture_group(0); _Disjunction(); - if (_Pat != _End) { - _Error(regex_constants::error_syntax); - } + _STL_INTERNAL_CHECK(_Pat == _End); _Nfa._End_group(_Pos1); _Res = _Nfa._End_pattern(); @@ -5009,26 +5154,27 @@ _Root_node* _Parser<_FwdIt, _Elem, _RxTraits>::_Compile() { // compile regular e } template -_Parser<_FwdIt, _Elem, _RxTraits>::_Parser( +_Parser2<_FwdIt, _Elem, _RxTraits>::_Parser2( const _RxTraits& _Tr, _FwdIt _Pfirst, _FwdIt _Plast, regex_constants::syntax_option_type _Fx) - : _Pat(_Pfirst), _Begin(_Pfirst), _End(_Plast), _Nfa(_Tr, _Fx), _Traits(_Tr), _Flags(_Fx) { + : _Pat(_Pfirst), _End(_Plast), _Nfa(_Tr, _Fx), _Traits(_Tr), _Flags(_Fx) { - constexpr unsigned int _ECMA_flags = _L_ext_rep | _L_alt_pipe | _L_nex_grp | _L_nex_rep | _L_nc_grp | _L_asrt_gen - | _L_asrt_wrd | _L_bckr | _L_ngr_rep | _L_esc_uni | _L_esc_hex | _L_esc_bsp - | _L_esc_ffn | _L_esc_wsd | _L_esc_ctrl | _L_bzr_chr | _L_grp_esc | _L_ident_ECMA - | _L_empty_grp; + constexpr unsigned long long _ECMA_flags = _L_ext_rep | _L_alt_pipe | _L_nex_grp | _L_nex_rep | _L_nc_asrt + | _L_asrt_wrd | _L_bckr | _L_non_greedy | _L_esc_uni | _L_esc_hex + | _L_esc_bsp | _L_esc_ffn | _L_esc_wsd | _L_esc_ctrl | _L_bzr_chr + | _L_grp_esc | _L_ident_ECMA | _L_empty_grp; - constexpr unsigned int _Basic_flags = - _L_bckr | _L_lim_bckr | _L_anch_rstr | _L_star_beg | _L_empty_grp | _L_brk_bal | _L_brk_rstr | _L_mtch_long; + constexpr unsigned long long _Basic_flags = _L_bckr | _L_lim_bckr | _L_anch_rstr | _L_star_beg | _L_empty_grp + | _L_brk_bal | _L_brk_rstr | _L_dsh_rstr | _L_mtch_long; - constexpr unsigned int _Grep_flags = _Basic_flags | _L_alt_nl | _L_no_nl; + constexpr unsigned long long _Grep_flags = _Basic_flags | _L_alt_nl | _L_no_nl; - constexpr unsigned int _Extended_flags = _L_ext_rep | _L_alt_pipe | _L_nex_grp | _L_nex_rep | _L_ident_ERE - | _L_paren_bal | _L_brk_bal | _L_brk_rstr | _L_mtch_long; + constexpr unsigned long long _Extended_flags = _L_ext_rep | _L_alt_pipe | _L_nex_grp | _L_nex_rep | _L_ident_ERE + | _L_paren_bal | _L_brk_bal | _L_brk_rstr | _L_dsh_rstr | _L_mtch_long; - constexpr unsigned int _Awk_flags = _Extended_flags | _L_esc_oct | _L_esc_ffn | _L_esc_ffnx | _L_ident_awk; + constexpr unsigned long long _Awk_flags = + _Extended_flags | _L_esc_oct | _L_esc_ffn | _L_esc_ffnx | _L_grp_esc | _L_ident_awk; - constexpr unsigned int _Egrep_flags = _Extended_flags | _L_alt_nl | _L_no_nl; + constexpr unsigned long long _Egrep_flags = _Extended_flags | _L_alt_nl | _L_no_nl; const regex_constants::syntax_option_type _Masked = _Flags & regex_constants::_Gmask; diff --git a/stl/inc/semaphore b/stl/inc/semaphore index 6825f53519..e012ece735 100644 --- a/stl/inc/semaphore +++ b/stl/inc/semaphore @@ -29,12 +29,6 @@ _STL_DISABLE_CLANG_WARNINGS _STD_BEGIN -template -_NODISCARD unsigned long long _Semaphore_deadline(const chrono::duration<_Rep, _Period>& _Rel_time) { - return __std_atomic_wait_get_deadline( - chrono::duration_cast>(_Rel_time).count()); -} - template _NODISCARD unsigned long _Semaphore_remaining_timeout(const chrono::time_point<_Clock, _Duration>& _Abs_time) { const auto _Now = _Clock::now(); @@ -145,11 +139,11 @@ public: template _NODISCARD_TRY_CHANGE_STATE bool try_acquire_for(const chrono::duration<_Rep, _Period>& _Rel_time) { - auto _Deadline = _Semaphore_deadline(_Rel_time); + auto _Deadline = _STD chrono::steady_clock::now() + _Rel_time; ptrdiff_t _Current = _Counter.load(memory_order_relaxed); for (;;) { while (_Current == 0) { - const auto _Remaining_timeout = __std_atomic_wait_get_remaining_timeout(_Deadline); + const auto _Remaining_timeout = _Semaphore_remaining_timeout(_Deadline); if (_Remaining_timeout == 0) { return false; } @@ -257,7 +251,7 @@ public: template _NODISCARD_TRY_CHANGE_STATE bool try_acquire_for(const chrono::duration<_Rep, _Period>& _Rel_time) { - auto _Deadline = _Semaphore_deadline(_Rel_time); + auto _Deadline = _STD chrono::steady_clock::now() + _Rel_time; for (;;) { // "happens after release" ordering is provided by this exchange, so loads and waits can be relaxed // TRANSITION, GH-1133: should be memory_order_acquire @@ -267,7 +261,7 @@ public: } _STL_VERIFY(_Prev == 0, "Invariant: semaphore counter is non-negative and doesn't exceed max(), " "possibly caused by memory corruption"); - const auto _Remaining_timeout = __std_atomic_wait_get_remaining_timeout(_Deadline); + const auto _Remaining_timeout = _Semaphore_remaining_timeout(_Deadline); if (_Remaining_timeout == 0) { return false; } diff --git a/stl/inc/utility b/stl/inc/utility index 4570a7bb79..326b5f8978 100644 --- a/stl/inc/utility +++ b/stl/inc/utility @@ -958,9 +958,6 @@ _NODISCARD constexpr underlying_type_t<_Ty> to_underlying(_Ty _Value) noexcept { _EXPORT_STD [[noreturn]] __forceinline void unreachable() noexcept /* strengthened */ { _STL_UNREACHABLE; -#ifdef _DEBUG - _CSTD abort(); // likely to be called in debug mode, but can't be relied upon - already entered the UB territory -#endif // defined(_DEBUG) } template (_STD _Max_limit()); const size_type _Ints_max = this->_Myvec.max_size(); - if (_Ints_max > _Diff_max / _VBITS) { // max_size bound by difference_type limits + + // Use a const bool to avoid warning C4127 "conditional expression is constant" with Defect Report P2280R4 + const bool _Would_overflow = _Ints_max > _Diff_max / _VBITS; + if (_Would_overflow) { // max_size bound by difference_type limits return _Diff_max; } // max_size bound by underlying storage limits - return _Ints_max * _VBITS; + return static_cast(_Ints_max * _VBITS); } _NODISCARD_EMPTY_MEMBER _CONSTEXPR20 bool empty() const noexcept { diff --git a/stl/inc/xatomic_wait.h b/stl/inc/xatomic_wait.h index 5058335d97..0f67e66a81 100644 --- a/stl/inc/xatomic_wait.h +++ b/stl/inc/xatomic_wait.h @@ -40,12 +40,6 @@ int __stdcall __std_atomic_wait_indirect(const void* _Storage, void* _Comparand, void __stdcall __std_atomic_notify_one_indirect(const void* _Storage) noexcept; void __stdcall __std_atomic_notify_all_indirect(const void* _Storage) noexcept; -// These functions convert a duration into a time point in order to tolerate spurious wakes in atomic wait, and then -// convert back from the time point to individual wait attempts (which are limited by DWORD milliseconds to a length of -// ~49 days) -unsigned long long __stdcall __std_atomic_wait_get_deadline(unsigned long long _Timeout) noexcept; -unsigned long __stdcall __std_atomic_wait_get_remaining_timeout(unsigned long long _Deadline) noexcept; - } // extern "C" #pragma pop_macro("new") diff --git a/stl/inc/xcall_once.h b/stl/inc/xcall_once.h index 7d48489916..17d32e4680 100644 --- a/stl/inc/xcall_once.h +++ b/stl/inc/xcall_once.h @@ -95,7 +95,7 @@ void(call_once)(once_flag& _Once, _Fn&& _Fx, _Args&&... _Ax) // parentheses against common "#define call_once(flag,func) pthread_once(flag,func)" int _Pending; if (!_RENAME_WINDOWS_API(__std_init_once_begin_initialize)(&_Once._Opaque, 0, &_Pending, nullptr)) { - _CSTD abort(); + _STL_REPORT_ERROR("InitOnceBeginInitialize() failed"); } if (_Pending != 0) { diff --git a/stl/inc/xlocale b/stl/inc/xlocale index 77c8d1b4c9..b6c77fd7fa 100644 --- a/stl/inc/xlocale +++ b/stl/inc/xlocale @@ -446,7 +446,7 @@ const _Facet& __CRTDECL use_facet(const locale& _Loc) { // get facet reference f #if _HAS_EXCEPTIONS _Throw_bad_cast(); // lazy disallowed #else - _CSTD abort(); // lazy disallowed + _STL_REPORT_ERROR("lazy facet disallowed"); #endif } else { // queue up lazy facet for destruction auto _Pfmod = const_cast(_Psave); diff --git a/stl/inc/xutility b/stl/inc/xutility index e659ec5f5b..d570b508c8 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -63,6 +63,14 @@ _STL_DISABLE_CLANG_WARNINGS #endif // _USE_STD_VECTOR_FLOATING_ALGORITHMS && !_USE_STD_VECTOR_ALGORITHMS #endif // ^^^ defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) ^^^ +#ifndef _USE_BUILTIN_IS_TRIVIALLY_EQUALITY_COMPARABLE +#if defined(__clang__) && __clang_major__ >= 19 +#define _USE_BUILTIN_IS_TRIVIALLY_EQUALITY_COMPARABLE 1 +#else // ^^^ defined(__clang__) && __clang_major__ >= 19 / !defined(__clang__) || __clang_major__ < 19 vvv +#define _USE_BUILTIN_IS_TRIVIALLY_EQUALITY_COMPARABLE 0 +#endif // ^^^ !defined(__clang__) || __clang_major__ < 19 ^^^ +#endif // ^^^ !defined(_USE_BUILTIN_IS_TRIVIALLY_EQUALITY_COMPARABLE) ^^^ + #if _USE_STD_VECTOR_ALGORITHMS extern "C" { // The "noalias" attribute tells the compiler optimizer that pointers going into these hand-vectorized algorithms @@ -1113,7 +1121,8 @@ struct _Projected_impl { using value_type = remove_cvref_t>; [[noreturn]] indirect_result_t<_Proj&, _It> operator*() const { - _CSTD abort(); // shouldn't be called, see GH-3888 + _STL_REPORT_ERROR("std::projected::operator*() can't be called (N5008 [projected]/1)"); + _STL_UNREACHABLE; // no return value available for "continue on error" } }; }; @@ -4686,6 +4695,25 @@ constexpr bool _Is_pointer_address_convertible = is_void_v<_Source> #endif // defined(__cpp_lib_is_pointer_interconvertible) ; +#pragma warning(push) +#pragma warning(disable : 4242) // '%s': conversion from '%s' to '%s', possible loss of data +#pragma warning(disable : 4244) // '%s': conversion from '%s' to '%s', possible loss of data (Yes, duplicated message.) +#pragma warning(disable : 4267) // '%s': conversion from '%s' to '%s', possible loss of data (Yes, duplicated message!) +#pragma warning(disable : 4365) // '%s': conversion from '%s' to '%s', signed/unsigned mismatch +#pragma warning(disable : 5267) // definition of implicit assignment operator for '%s' is deprecated because it has a + // user-provided copy constructor + +// Determines whether _DestRef is trivially assignable from _SourceRef, and if _Remove_cvref_t<_DestRef> is a class, the +// assignment uses _Remove_cvref_t<_DestRef>::operator=. +// Not pedantically reliable for vectorization, but working due to bugs of MSVC, Clang, and EDG. See LLVM-37038. +template > +constexpr bool _Is_trivially_assignable_returning_same_reference_v = + is_same_v<_DestRef, decltype(_STD declval<_DestRef>() = _STD declval<_SourceRef>())>; +template +constexpr bool _Is_trivially_assignable_returning_same_reference_v<_DestRef, _SourceRef, false> = false; + +#pragma warning(pop) + template struct _Trivial_cat { using _USource = _Unwrap_enum_t<_Source>; @@ -4704,7 +4732,7 @@ struct _Trivial_cat { _Same_size_and_compatible && is_trivially_constructible_v<_Dest, _SourceRef>; static constexpr bool _Bitcopy_assignable = - _Same_size_and_compatible && is_trivially_assignable_v<_DestRef, _SourceRef>; + _Same_size_and_compatible && _Is_trivially_assignable_returning_same_reference_v<_DestRef, _SourceRef>; }; template @@ -4713,7 +4741,8 @@ struct _Trivial_cat<_Source*, _Dest*, _SourceRef, _DestRef> { _Is_pointer_address_convertible<_Source, _Dest> && is_trivially_constructible_v<_Dest*, _SourceRef>; static constexpr bool _Bitcopy_assignable = - _Is_pointer_address_convertible<_Source, _Dest> && is_trivially_assignable_v<_DestRef, _SourceRef>; + _Is_pointer_address_convertible<_Source, _Dest> + && _Is_trivially_assignable_returning_same_reference_v<_DestRef, _SourceRef>; }; struct _False_trivial_cat { @@ -5484,8 +5513,14 @@ inline constexpr bool _Can_memcmp_elements = true; template constexpr bool _Can_memcmp_elements<_Ty1*, _Ty2*, false> = _Is_pointer_address_comparable<_Ty1, _Ty2>; +#if _USE_BUILTIN_IS_TRIVIALLY_EQUALITY_COMPARABLE +template +constexpr bool _Can_memcmp_elements<_Elem1, _Elem2, false> = + is_same_v<_Elem1, _Elem2> && __is_trivially_equality_comparable(_Elem1); +#else // ^^^ _USE_BUILTIN_IS_TRIVIALLY_EQUALITY_COMPARABLE / !_USE_BUILTIN_IS_TRIVIALLY_EQUALITY_COMPARABLE vvv template constexpr bool _Can_memcmp_elements<_Elem1, _Elem2, false> = false; +#endif // ^^^ !_USE_BUILTIN_IS_TRIVIALLY_EQUALITY_COMPARABLE ^^^ // _Can_memcmp_elements_with_pred<_Elem1, _Elem2, _Pr> reports whether the memcmp optimization is applicable, // given contiguously stored elements. (This avoids having to repeat the metaprogramming that finds the element types.) @@ -5522,9 +5557,15 @@ template constexpr bool _Equal_memcmp_is_safe = _Equal_memcmp_is_safe_helper, remove_const_t<_Iter2>, remove_const_t<_Pr>>; +#if _USE_STD_VECTOR_ALGORITHMS +template +constexpr bool _Is_vector_element_size = _Size == 1 || _Size == 2 || _Size == 4 || _Size == 8; + // Can we activate the vector algorithms for std::search? template -constexpr bool _Vector_alg_in_search_is_safe = _Equal_memcmp_is_safe<_It1, _It2, _Pr>; +constexpr bool _Vector_alg_in_search_is_safe = + _Equal_memcmp_is_safe<_It1, _It2, _Pr> && _Is_vector_element_size)>; +#endif // _USE_STD_VECTOR_ALGORITHMS template _NODISCARD int _Memcmp_count(_CtgIt1 _First1, _CtgIt2 _First2, const size_t _Count) { @@ -5682,7 +5723,7 @@ namespace ranges { _It1 _First1, _It2 _First2, iter_difference_t<_It1> _Count, _Pr _Pred, _Pj1 _Proj1, _Pj2 _Proj2) { _STL_INTERNAL_CHECK(_Count >= 0); #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Equal_memcmp_is_safe<_It1, _It2, _Pr> && is_same_v<_Pj1, identity> + if constexpr (_Vector_alg_in_search_is_safe<_It1, _It2, _Pr> && is_same_v<_Pj1, identity> && is_same_v<_Pj2, identity>) { if (!_STD is_constant_evaluated()) { constexpr size_t _Elem_size = sizeof(iter_value_t<_It1>); @@ -6832,7 +6873,7 @@ namespace ranges { } #if _USE_STD_VECTOR_ALGORITHMS - if constexpr (_Equal_memcmp_is_safe<_It, _It, _Pr> && sized_sentinel_for<_Se, _It> + if constexpr (_Vector_alg_in_search_is_safe<_It, _It, _Pr> && sized_sentinel_for<_Se, _It> && is_same_v<_Pj, identity>) { if (!_STD is_constant_evaluated()) { const auto _First_ptr = _STD _To_address(_First); diff --git a/stl/inc/yvals.h b/stl/inc/yvals.h index a1ebe2fd07..a971b51fb2 100644 --- a/stl/inc/yvals.h +++ b/stl/inc/yvals.h @@ -253,7 +253,9 @@ _EMIT_STL_ERROR(STL1008, "_STL_CALL_ABORT_INSTEAD_OF_INVALID_PARAMETER has been #define _MSVC_STL_DOOM_FUNCTION(mesg) ::_invoke_watson(nullptr, nullptr, nullptr, 0, 0) #else // Use the MSVC __fastfail intrinsic: extern "C" __declspec(noreturn) void __fastfail(unsigned int); // declared by -#define _MSVC_STL_DOOM_FUNCTION(mesg) __fastfail(5) // __fastfail(FAST_FAIL_INVALID_ARG), value defined by +#define _MSVC_STL_DOOM_FUNCTION(mesg) \ + __fastfail(5); /* __fastfail(FAST_FAIL_INVALID_ARG), value defined by */ \ + _STL_UNREACHABLE /* TRANSITION, DevCom-10914110 */ #endif // choose "doom function" #endif // ^^^ !defined(_MSVC_STL_DOOM_FUNCTION) ^^^ diff --git a/stl/inc/yvals_core.h b/stl/inc/yvals_core.h index 203e5b1520..be6c2ee43f 100644 --- a/stl/inc/yvals_core.h +++ b/stl/inc/yvals_core.h @@ -909,7 +909,7 @@ #define _CPPLIB_VER 650 #define _MSVC_STL_VERSION 145 -#define _MSVC_STL_UPDATE 202505L +#define _MSVC_STL_UPDATE 202506L #ifndef _ALLOW_COMPILER_AND_STL_VERSION_MISMATCH #if defined(__CUDACC__) && defined(__CUDACC_VER_MAJOR__) @@ -2012,15 +2012,11 @@ compiler option, or define _ALLOW_RTCc_IN_STL to suppress this error. #define _STATIC_CALL_OPERATOR #define _CONST_CALL_OPERATOR const #define _STATIC_LAMBDA -#elif defined(__clang__) || defined(__EDG__) // no workaround +#else // ^^^ workaround / no workaround vvv #define _STATIC_CALL_OPERATOR static #define _CONST_CALL_OPERATOR #define _STATIC_LAMBDA static -#else // TRANSITION, VSO-2383148, fixed in VS 2022 17.14 Preview 3 -#define _STATIC_CALL_OPERATOR static -#define _CONST_CALL_OPERATOR -#define _STATIC_LAMBDA -#endif // ^^^ workaround ^^^ +#endif // ^^^ no workaround ^^^ #ifdef __CUDACC__ // TRANSITION, CUDA 12.4 doesn't recognize MSVC __restrict; CUDA __restrict__ is not usable in C++ #define _RESTRICT diff --git a/stl/src/atomic_wait.cpp b/stl/src/atomic_wait.cpp index 4bafb1a86b..b20c6f5cc3 100644 --- a/stl/src/atomic_wait.cpp +++ b/stl/src/atomic_wait.cpp @@ -177,6 +177,7 @@ int __stdcall __std_atomic_wait_indirect(const void* _Storage, void* _Comparand, } } +// TRANSITION, ABI: preserved for binary compatibility unsigned long long __stdcall __std_atomic_wait_get_deadline(const unsigned long long _Timeout) noexcept { if (_Timeout == _Atomic_wait_no_deadline) { return _Atomic_wait_no_deadline; @@ -185,6 +186,7 @@ unsigned long long __stdcall __std_atomic_wait_get_deadline(const unsigned long } } +// TRANSITION, ABI: preserved for binary compatibility unsigned long __stdcall __std_atomic_wait_get_remaining_timeout(unsigned long long _Deadline) noexcept { static_assert(__std_atomic_wait_no_timeout == INFINITE, "__std_atomic_wait_no_timeout is passed directly to underlying API, so should match it"); diff --git a/stl/src/filesystem.cpp b/stl/src/filesystem.cpp index 07f6417430..4dbf0183b8 100644 --- a/stl/src/filesystem.cpp +++ b/stl/src/filesystem.cpp @@ -789,18 +789,21 @@ struct __std_fs_file_id { // typedef struct _FILE_ID_INFO { namespace { _Success_(return > 0 && return < nBufferLength) DWORD WINAPI _Stl_GetTempPath2W( _In_ DWORD nBufferLength, _Out_writes_to_opt_(nBufferLength, return +1) LPWSTR lpBuffer) noexcept { +#if !defined(_ONECORE) // See GH-3011: This is intentionally not attempting to cache the function pointer. // TRANSITION, ABI: This should use __crtGetTempPath2W after this code is moved into the STL's DLL. - using _Fun_ptr = decltype(&::GetTempPath2W); + // use GetTempPath2W if it is available (only on Windows 11+)... const auto _Kernel32 = ::GetModuleHandleW(L"kernel32.dll"); _Analysis_assume_(_Kernel32); - _Fun_ptr _PfGetTempPath2W = reinterpret_cast<_Fun_ptr>(::GetProcAddress(_Kernel32, "GetTempPath2W")); - if (!_PfGetTempPath2W) { - _PfGetTempPath2W = &::GetTempPathW; + const auto _Pf = reinterpret_cast(::GetProcAddress(_Kernel32, "GetTempPath2W")); + if (_Pf) { + return _Pf(nBufferLength, lpBuffer); } +#endif // ^^^ !defined(_ONECORE) ^^^ - return _PfGetTempPath2W(nBufferLength, lpBuffer); + // ...otherwise use GetTempPathW. + return GetTempPathW(nBufferLength, lpBuffer); } } // unnamed namespace diff --git a/stl/src/tzdb.cpp b/stl/src/tzdb.cpp index 720064fefc..511951451b 100644 --- a/stl/src/tzdb.cpp +++ b/stl/src/tzdb.cpp @@ -567,7 +567,31 @@ void __stdcall __std_tzdb_delete_current_zone(__std_tzdb_current_zone_info* cons _Info->_Abbrev = _Allocate_wide_to_narrow(_Abbrev.get(), _Abbrev_len, _Info->_Err); if (_Info->_Abbrev == nullptr) { - return _Propagate_error(_Info); + const auto _Abs_offset = _Info->_Offset < 0 ? -_Info->_Offset : _Info->_Offset; + const auto _Offset_in_minutes = _Abs_offset / (60 * 1000); + const auto _Hours = _Offset_in_minutes / 60; + const auto _Mins = _Offset_in_minutes % 60; + + _STD unique_ptr _Fallback_abbrev{new (_STD nothrow) char[_Mins == 0 ? 4 : 6]}; + + if (_Fallback_abbrev == nullptr) { + return nullptr; + } + + _Fallback_abbrev[0] = _Info->_Offset < 0 ? '-' : '+'; + _Fallback_abbrev[1] = static_cast('0' + _Hours / 10); + _Fallback_abbrev[2] = static_cast('0' + _Hours % 10); + + if (_Mins == 0) { + _Fallback_abbrev[3] = '\0'; + } else { + _Fallback_abbrev[3] = static_cast('0' + _Mins / 10); + _Fallback_abbrev[4] = static_cast('0' + _Mins % 10); + _Fallback_abbrev[5] = '\0'; + } + + _Info->_Err = __std_tzdb_error::_Success; + _Info->_Abbrev = _Fallback_abbrev.release(); } return _Info.release(); diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index fb20d8d71c..b83b5fe396 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -144,13 +144,12 @@ __declspec(noalias) void __cdecl __std_swap_ranges_trivially_swappable_noalias( #endif #endif // ^^^ !defined(_M_ARM64EC) ^^^ - auto _First1c = static_cast(_First1); - const auto _Last1c = static_cast(_Last1); - auto _First2c = static_cast(_First2); - for (; _First1c != _Last1c; ++_First1c, ++_First2c) { - unsigned char _Ch = *_First1c; - *_First1c = *_First2c; - *_First2c = _Ch; + auto _First1c = static_cast(_First1); + auto _First2c = static_cast(_First2); + for (; _First1c != _Last1; ++_First1c, ++_First2c) { + const unsigned char _Ch = *_First1c; + *_First1c = *_First2c; + *_First2c = _Ch; } } @@ -163,6 +162,210 @@ void* __cdecl __std_swap_ranges_trivially_swappable( } // extern "C" +namespace { + namespace _Rotating { + void _Swap_3_ranges(void* _First1, void* const _Last1, void* _First2, void* _First3) noexcept { +#ifndef _M_ARM64EC + constexpr size_t _Mask_32 = ~((static_cast(1) << 5) - 1); + if (_Byte_length(_First1, _Last1) >= 32 && _Use_avx2()) { + const void* _Stop_at = _First1; + _Advance_bytes(_Stop_at, _Byte_length(_First1, _Last1) & _Mask_32); + do { + const __m256i _Val1 = _mm256_loadu_si256(static_cast<__m256i*>(_First1)); + const __m256i _Val2 = _mm256_loadu_si256(static_cast<__m256i*>(_First2)); + const __m256i _Val3 = _mm256_loadu_si256(static_cast<__m256i*>(_First3)); + _mm256_storeu_si256(static_cast<__m256i*>(_First1), _Val2); + _mm256_storeu_si256(static_cast<__m256i*>(_First2), _Val3); + _mm256_storeu_si256(static_cast<__m256i*>(_First3), _Val1); + _Advance_bytes(_First1, 32); + _Advance_bytes(_First2, 32); + _Advance_bytes(_First3, 32); + } while (_First1 != _Stop_at); + + _mm256_zeroupper(); // TRANSITION, DevCom-10331414 + } + + constexpr size_t _Mask_16 = ~((static_cast(1) << 4) - 1); + if (_Byte_length(_First1, _Last1) >= 16 && _Use_sse42()) { + const void* _Stop_at = _First1; + _Advance_bytes(_Stop_at, _Byte_length(_First1, _Last1) & _Mask_16); + do { + const __m128i _Val1 = _mm_loadu_si128(static_cast<__m128i*>(_First1)); + const __m128i _Val2 = _mm_loadu_si128(static_cast<__m128i*>(_First2)); + const __m128i _Val3 = _mm_loadu_si128(static_cast<__m128i*>(_First3)); + _mm_storeu_si128(static_cast<__m128i*>(_First1), _Val2); + _mm_storeu_si128(static_cast<__m128i*>(_First2), _Val3); + _mm_storeu_si128(static_cast<__m128i*>(_First3), _Val1); + _Advance_bytes(_First1, 16); + _Advance_bytes(_First2, 16); + _Advance_bytes(_First3, 16); + } while (_First1 != _Stop_at); + } + +#if defined(_M_X64) // NOTE: UNALIGNED MEMORY ACCESSES + constexpr size_t _Mask_8 = ~((static_cast(1) << 3) - 1); + if (_Byte_length(_First1, _Last1) >= 8) { + const void* _Stop_at = _First1; + _Advance_bytes(_Stop_at, _Byte_length(_First1, _Last1) & _Mask_8); + do { + const unsigned long long _Val1 = *static_cast(_First1); + const unsigned long long _Val2 = *static_cast(_First2); + const unsigned long long _Val3 = *static_cast(_First3); + *static_cast(_First1) = _Val2; + *static_cast(_First2) = _Val3; + *static_cast(_First3) = _Val1; + _Advance_bytes(_First1, 8); + _Advance_bytes(_First2, 8); + _Advance_bytes(_First3, 8); + } while (_First1 != _Stop_at); + } +#elif defined(_M_IX86) // NOTE: UNALIGNED MEMORY ACCESSES + constexpr size_t _Mask_4 = ~((static_cast(1) << 2) - 1); + if (_Byte_length(_First1, _Last1) >= 4) { + const void* _Stop_at = _First1; + _Advance_bytes(_Stop_at, _Byte_length(_First1, _Last1) & _Mask_4); + do { + const unsigned long _Val1 = *static_cast(_First1); + const unsigned long _Val2 = *static_cast(_First2); + const unsigned long _Val3 = *static_cast(_First3); + *static_cast(_First1) = _Val2; + *static_cast(_First2) = _Val3; + *static_cast(_First3) = _Val1; + _Advance_bytes(_First1, 4); + _Advance_bytes(_First2, 4); + _Advance_bytes(_First3, 4); + } while (_First1 != _Stop_at); + } +#else +#error Unsupported architecture +#endif +#endif // ^^^ !defined(_M_ARM64EC) ^^^ + + auto _First1c = static_cast(_First1); + auto _First2c = static_cast(_First2); + auto _First3c = static_cast(_First3); + for (; _First1c != _Last1; ++_First1c, ++_First2c, ++_First3c) { + const unsigned char _Ch = *_First1c; + *_First1c = *_First2c; + *_First2c = *_First3c; + *_First3c = _Ch; + } + } + + + // TRANSITION, GH-5506 "VCRuntime: memmove() is surprisingly slow for more than 8 KB on certain CPUs": + // As a workaround, the following code calls memmove() for 8 KB portions. + constexpr size_t _Portion_size = 8192; + constexpr size_t _Portion_mask = _Portion_size - 1; + static_assert((_Portion_size & _Portion_mask) == 0); + + void _Move_to_lower_address(void* _Dest, const void* _Src, const size_t _Size) noexcept { + const size_t _Whole_portions_size = _Size & ~_Portion_mask; + + void* _Dest_end = _Dest; + _Advance_bytes(_Dest_end, _Whole_portions_size); + + while (_Dest != _Dest_end) { + memmove(_Dest, _Src, _Portion_size); + _Advance_bytes(_Dest, _Portion_size); + _Advance_bytes(_Src, _Portion_size); + } + + if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0) { + memmove(_Dest, _Src, _Tail); + } + } + + void _Move_to_higher_address(void* const _Dest, const void* const _Src, const size_t _Size) noexcept { + const size_t _Whole_portions_size = _Size & ~_Portion_mask; + + void* _Dest_end = _Dest; + _Advance_bytes(_Dest_end, _Whole_portions_size); + const void* _Src_end = _Src; + _Advance_bytes(_Src_end, _Whole_portions_size); + + if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0) { + memmove(_Dest_end, _Src_end, _Tail); + } + + while (_Dest_end != _Dest) { + _Rewind_bytes(_Dest_end, _Portion_size); + _Rewind_bytes(_Src_end, _Portion_size); + memmove(_Dest_end, _Src_end, _Portion_size); + } + } + + constexpr size_t _Buf_size = 512; + + bool _Use_buffer(const size_t _Smaller, const size_t _Larger) noexcept { + return _Smaller <= _Buf_size && (_Smaller <= 128 || _Larger >= _Smaller * 2); + } + } // namespace _Rotating +} // unnamed namespace + +extern "C" { + +__declspec(noalias) void __stdcall __std_rotate(void* _First, void* const _Mid, void* _Last) noexcept { + unsigned char _Buf[_Rotating::_Buf_size]; + + for (;;) { + const size_t _Left = _Byte_length(_First, _Mid); + const size_t _Right = _Byte_length(_Mid, _Last); + + if (_Left <= _Right) { + if (_Left == 0) { + break; + } + + if (_Rotating::_Use_buffer(_Left, _Right)) { + memcpy(_Buf, _First, _Left); + _Rotating::_Move_to_lower_address(_First, _Mid, _Right); + _Advance_bytes(_First, _Right); + memcpy(_First, _Buf, _Left); + break; + } + + void* _Mid2 = _Last; + _Rewind_bytes(_Mid2, _Left); + if (_Left * 2 > _Right) { + __std_swap_ranges_trivially_swappable_noalias(_Mid2, _Last, _First); + _Last = _Mid2; + } else { + void* _Mid3 = _Mid2; + _Rewind_bytes(_Mid3, _Left); + _Rotating::_Swap_3_ranges(_Mid2, _Last, _First, _Mid3); + _Last = _Mid3; + } + } else { + if (_Right == 0) { + break; + } + + if (_Rotating::_Use_buffer(_Right, _Left)) { + _Rewind_bytes(_Last, _Right); + memcpy(_Buf, _Last, _Right); + void* _Mid2 = _First; + _Advance_bytes(_Mid2, _Right); + _Rotating::_Move_to_higher_address(_Mid2, _First, _Left); + memcpy(_First, _Buf, _Right); + break; + } + + if (_Right * 2 > _Left) { + __std_swap_ranges_trivially_swappable_noalias(_Mid, _Last, _First); + _Advance_bytes(_First, _Right); + } else { + void* _Mid2 = _First; + _Advance_bytes(_Mid2, _Right); + _Rotating::_Swap_3_ranges(_Mid, _Last, _Mid2, _First); + _Advance_bytes(_First, _Right * 2); + } + } + } +} + +} // extern "C" + namespace { namespace _Reversing { #ifdef _M_ARM64EC @@ -382,107 +585,6 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8( } // extern "C" -namespace { - namespace _Rotating { - // TRANSITION, GH-5506 "VCRuntime: memmove() is surprisingly slow for more than 8 KB on certain CPUs": - // As a workaround, the following code calls memmove() for 8 KB portions. - constexpr size_t _Portion_size = 8192; - constexpr size_t _Portion_mask = _Portion_size - 1; - static_assert((_Portion_size & _Portion_mask) == 0); - - void _Move_to_lower_address(void* _Dest, const void* _Src, const size_t _Size) noexcept { - const size_t _Whole_portions_size = _Size & ~_Portion_mask; - - void* _Dest_end = _Dest; - _Advance_bytes(_Dest_end, _Whole_portions_size); - - while (_Dest != _Dest_end) { - memmove(_Dest, _Src, _Portion_size); - _Advance_bytes(_Dest, _Portion_size); - _Advance_bytes(_Src, _Portion_size); - } - - if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0) { - memmove(_Dest, _Src, _Tail); - } - } - - void _Move_to_higher_address(void* const _Dest, const void* const _Src, const size_t _Size) noexcept { - const size_t _Whole_portions_size = _Size & ~_Portion_mask; - - void* _Dest_end = _Dest; - _Advance_bytes(_Dest_end, _Whole_portions_size); - const void* _Src_end = _Src; - _Advance_bytes(_Src_end, _Whole_portions_size); - - if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0) { - memmove(_Dest_end, _Src_end, _Tail); - } - - while (_Dest_end != _Dest) { - _Rewind_bytes(_Dest_end, _Portion_size); - _Rewind_bytes(_Src_end, _Portion_size); - memmove(_Dest_end, _Src_end, _Portion_size); - } - } - - constexpr size_t _Buf_size = 512; - - bool _Use_buffer(const size_t _Smaller, const size_t _Larger) noexcept { - return _Smaller <= _Buf_size && (_Smaller <= 128 || _Larger >= _Smaller * 2); - } - } // namespace _Rotating -} // unnamed namespace - -extern "C" { - -__declspec(noalias) void __stdcall __std_rotate(void* _First, void* const _Mid, void* _Last) noexcept { - unsigned char _Buf[_Rotating::_Buf_size]; - - for (;;) { - const size_t _Left = _Byte_length(_First, _Mid); - const size_t _Right = _Byte_length(_Mid, _Last); - - if (_Left <= _Right) { - if (_Left == 0) { - break; - } - - if (_Rotating::_Use_buffer(_Left, _Right)) { - memcpy(_Buf, _First, _Left); - _Rotating::_Move_to_lower_address(_First, _Mid, _Right); - _Advance_bytes(_First, _Right); - memcpy(_First, _Buf, _Left); - break; - } - - void* _Mid2 = _Last; - _Rewind_bytes(_Mid2, _Left); - __std_swap_ranges_trivially_swappable_noalias(_Mid2, _Last, _First); - _Last = _Mid2; - } else { - if (_Right == 0) { - break; - } - - if (_Rotating::_Use_buffer(_Right, _Left)) { - _Rewind_bytes(_Last, _Right); - memcpy(_Buf, _Last, _Right); - void* _Mid2 = _First; - _Advance_bytes(_Mid2, _Right); - _Rotating::_Move_to_higher_address(_Mid2, _First, _Left); - memcpy(_First, _Buf, _Right); - break; - } - - __std_swap_ranges_trivially_swappable_noalias(_Mid, _Last, _First); - _Advance_bytes(_First, _Right); - } - } -} - -} // extern "C" - namespace { namespace _Sorting { enum _Min_max_mode { @@ -499,6 +601,7 @@ namespace { #ifndef _M_ARM64EC struct _Traits_sse_base { + using _Guard = char; static constexpr bool _Vectorized = true; static constexpr size_t _Vec_size = 16; static constexpr size_t _Vec_mask = 0xF; @@ -524,6 +627,7 @@ namespace { }; struct _Traits_avx_base { + using _Guard = _Zeroupper_on_exit; static constexpr bool _Vectorized = true; static constexpr size_t _Vec_size = 32; static constexpr size_t _Vec_mask = 0x1F; @@ -2307,6 +2411,8 @@ namespace { #ifdef _M_ARM64EC static_assert(false, "No vectorization for _M_ARM64EC yet"); #else // ^^^ defined(_M_ARM64EC) / !defined(_M_ARM64EC) vvv + [[maybe_unused]] typename _Traits::_Guard _Guard; // TRANSITION, DevCom-10331414 + constexpr bool _Sign_cor = static_cast<_Ty>(-1) > _Ty{0}; const size_t _Total_size_bytes = _Byte_length(_First, _Last); @@ -2368,8 +2474,6 @@ namespace { _Advance_bytes(_First, _Tail_byte_size); } } - - _Traits::_Exit_vectorized(); // TRANSITION, DevCom-10331414 #endif // ^^^ !defined(_M_ARM64EC) ^^^ } @@ -2782,7 +2886,7 @@ namespace { do { const __m256i _Data = _mm256_loadu_si256(static_cast(_First)); - int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); + unsigned int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); if constexpr (_Pred == _Predicate::_Not_equal) { _Bingo ^= 0xFFFF'FFFF; @@ -2801,7 +2905,7 @@ namespace { const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size); const __m256i _Data = _mm256_maskload_epi32(static_cast(_First), _Tail_mask); const __m256i _Cmp = _Traits::_Cmp_avx(_Data, _Comparand); - int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); + unsigned int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); if constexpr (_Pred == _Predicate::_Not_equal) { _Bingo ^= (1 << _Avx_tail_size) - 1; @@ -2826,7 +2930,7 @@ namespace { do { const __m128i _Data = _mm_loadu_si128(static_cast(_First)); - int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); + unsigned int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); if constexpr (_Pred == _Predicate::_Not_equal) { _Bingo ^= 0xFFFF; @@ -2873,7 +2977,7 @@ namespace { do { _Rewind_bytes(_Last, 32); const __m256i _Data = _mm256_loadu_si256(static_cast(_Last)); - int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); + unsigned int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); if constexpr (_Pred == _Predicate::_Not_equal) { _Bingo ^= 0xFFFF'FFFF; @@ -2891,7 +2995,7 @@ namespace { const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size); const __m256i _Data = _mm256_maskload_epi32(static_cast(_Last), _Tail_mask); const __m256i _Cmp = _Traits::_Cmp_avx(_Data, _Comparand); - int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); + unsigned int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); if constexpr (_Pred == _Predicate::_Not_equal) { _Bingo ^= (1 << _Avx_tail_size) - 1; @@ -2915,7 +3019,7 @@ namespace { do { _Rewind_bytes(_Last, 16); const __m128i _Data = _mm_loadu_si128(static_cast(_Last)); - int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); + unsigned int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); if constexpr (_Pred == _Predicate::_Not_equal) { _Bingo ^= 0xFFFF; @@ -2979,9 +3083,9 @@ namespace { const void* _Next = _First; _Advance_bytes(_Next, sizeof(_Ty)); - const __m256i _Data = _mm256_loadu_si256(static_cast(_First)); - const __m256i _Comparand = _mm256_loadu_si256(static_cast(_Next)); - const int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); + const __m256i _Data = _mm256_loadu_si256(static_cast(_First)); + const __m256i _Comparand = _mm256_loadu_si256(static_cast(_Next)); + const unsigned int _Bingo = _mm256_movemask_epi8(_Traits::_Cmp_avx(_Data, _Comparand)); if (_Bingo != 0) { const unsigned long _Offset = _tzcnt_u32(_Bingo); @@ -2996,11 +3100,11 @@ namespace { const void* _Next = _First; _Advance_bytes(_Next, sizeof(_Ty)); - const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size); - const __m256i _Data = _mm256_maskload_epi32(static_cast(_First), _Tail_mask); - const __m256i _Comparand = _mm256_maskload_epi32(static_cast(_Next), _Tail_mask); - const __m256i _Cmp = _Traits::_Cmp_avx(_Data, _Comparand); - const int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); + const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size); + const __m256i _Data = _mm256_maskload_epi32(static_cast(_First), _Tail_mask); + const __m256i _Comparand = _mm256_maskload_epi32(static_cast(_Next), _Tail_mask); + const __m256i _Cmp = _Traits::_Cmp_avx(_Data, _Comparand); + const unsigned int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Cmp, _Tail_mask)); if (_Bingo != 0) { const unsigned long _Offset = _tzcnt_u32(_Bingo); @@ -3022,9 +3126,9 @@ namespace { const void* _Next = _First; _Advance_bytes(_Next, sizeof(_Ty)); - const __m128i _Data = _mm_loadu_si128(static_cast(_First)); - const __m128i _Comparand = _mm_loadu_si128(static_cast(_Next)); - const int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); + const __m128i _Data = _mm_loadu_si128(static_cast(_First)); + const __m128i _Comparand = _mm_loadu_si128(static_cast(_Next)); + const unsigned int _Bingo = _mm_movemask_epi8(_Traits::_Cmp_sse(_Data, _Comparand)); if (_Bingo != 0) { unsigned long _Offset; @@ -3081,8 +3185,8 @@ namespace { do { const __m256i _Data = _mm256_loadu_si256(reinterpret_cast(_First)); - const __m256i _Cmp = _Traits::_Cmp_avx(_Comparand, _Data); - const auto _Mask = static_cast(_mm256_movemask_epi8(_Cmp)); + const __m256i _Cmp = _Traits::_Cmp_avx(_Comparand, _Data); + const uint32_t _Mask = _mm256_movemask_epi8(_Cmp); uint64_t _MskX = uint64_t{_Carry} | (uint64_t{_Mask} << 32); @@ -3483,11 +3587,11 @@ namespace { } if (const size_t _Avx_tail_size = _Size_bytes & 0x1C; _Avx_tail_size != 0) { - const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size); - const __m256i _Data = _mm256_maskload_epi32(static_cast(_First), _Tail_mask); - const __m256i _Mask = _mm256_and_si256(_Traits::_Cmp_avx(_Data, _Comparand), _Tail_mask); - const int _Bingo = _mm256_movemask_epi8(_Mask); - const size_t _Tail_count = __popcnt(_Bingo); // Assume available with SSE4.2 + const __m256i _Tail_mask = _Avx2_tail_mask_32(_Avx_tail_size); + const __m256i _Data = _mm256_maskload_epi32(static_cast(_First), _Tail_mask); + const __m256i _Mask = _mm256_and_si256(_Traits::_Cmp_avx(_Data, _Comparand), _Tail_mask); + const unsigned int _Bingo = _mm256_movemask_epi8(_Mask); + const size_t _Tail_count = __popcnt(_Bingo); // Assume available with SSE4.2 _Result += _Tail_count / sizeof(_Ty); _Advance_bytes(_First, _Avx_tail_size); } @@ -4190,8 +4294,8 @@ namespace { _Found = _mm_and_si128(_Found, _Found_part); } - const int _Bingo = _mm_cvtsi128_si32(_Found); - int _Found_pos = _Found_pos_init; + const unsigned int _Bingo = _mm_cvtsi128_si32(_Found); + int _Found_pos = _Found_pos_init; if (_Bingo != 0) { unsigned long _Tmp; @@ -4376,7 +4480,7 @@ namespace { } } - if (const int _Bingo = _mm256_movemask_epi8(_Eq); _Bingo != 0) { + if (const uint32_t _Bingo = _mm256_movemask_epi8(_Eq); _Bingo != 0) { const unsigned long _Offset = _tzcnt_u32(_Bingo); _Advance_bytes(_First1, _Offset); return _First1; @@ -4395,7 +4499,7 @@ namespace { } } - if (const int _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Eq, _Tail_mask)); _Bingo != 0) { + if (const uint32_t _Bingo = _mm256_movemask_epi8(_mm256_and_si256(_Eq, _Tail_mask)); _Bingo != 0) { const unsigned long _Offset = _tzcnt_u32(_Bingo); _Advance_bytes(_First1, _Offset); return _First1; @@ -4730,8 +4834,8 @@ namespace { _Advance_bytes(_Cur_needle, 16); } - const int _Bingo = _mm_cvtsi128_si32(_Found); - int _Found_pos = _Not_found; + const unsigned int _Bingo = _mm_cvtsi128_si32(_Found); + int _Found_pos = _Not_found; if (_Bingo != 0) { unsigned long _Tmp; @@ -4899,215 +5003,499 @@ __declspec(noalias) size_t __stdcall __std_find_last_not_of_trivial_pos_2(const namespace { namespace _Find_seq { #ifdef _M_ARM64EC - using _Find_seq_traits_1 = void; - using _Find_seq_traits_2 = void; - using _Find_seq_traits_4 = void; - using _Find_seq_traits_8 = void; + using _Find_seq_traits_avx_1 = void; + using _Find_seq_traits_avx_2 = void; + using _Find_seq_traits_avx_4 = void; + using _Find_seq_traits_avx_8 = void; + using _Find_seq_traits_sse_4 = void; + using _Find_seq_traits_sse_8 = void; #else // ^^^ defined(_M_ARM64EC) / !defined(_M_ARM64EC) vvv - struct _Find_seq_traits_1 { - static __m256i _Broadcast_avx(const __m128i _Data) noexcept { - return _mm256_broadcastb_epi8(_Data); - } + struct _Find_seq_traits_avx { + using _Guard = _Zeroupper_on_exit; - static unsigned long _Cmp_avx(const __m256i _Lhs, const __m256i _Rhs) noexcept { - return _mm256_movemask_epi8(_mm256_cmpeq_epi8(_Lhs, _Rhs)); - } - }; + static constexpr size_t _Vec_size = 32; - struct _Find_seq_traits_2 { - static __m256i _Broadcast_avx(const __m128i _Data) noexcept { - return _mm256_broadcastw_epi16(_Data); + static __m256i _Mask(const size_t _Count_in_bytes) noexcept { + return _Avx2_tail_mask_32(_Count_in_bytes); } - static unsigned long _Cmp_avx(const __m256i _Lhs, const __m256i _Rhs) noexcept { - return _mm256_movemask_epi8(_mm256_cmpeq_epi16(_Lhs, _Rhs)) & 0x55555555; + static __m256i _Load(const void* const _Src) noexcept { + return _mm256_loadu_si256(reinterpret_cast(_Src)); } - }; - struct _Find_seq_traits_4 { - static __m256i _Broadcast_avx(const __m128i _Data) noexcept { - return _mm256_broadcastd_epi32(_Data); + static __m256i _Xor(const __m256i _Val1, const __m256i _Val2) noexcept { + return _mm256_xor_si256(_Val1, _Val2); } - static unsigned long _Cmp_avx(const __m256i _Lhs, const __m256i _Rhs) noexcept { - return _mm256_movemask_epi8(_mm256_cmpeq_epi32(_Lhs, _Rhs)) & 0x11111111; + static bool _TestZ(const __m256i _Val1, const __m256i _Val2) noexcept { + return _mm256_testz_si256(_Val1, _Val2); } - }; - struct _Find_seq_traits_8 { - static __m256i _Broadcast_avx(const __m128i _Data) noexcept { - return _mm256_broadcastq_epi64(_Data); + static unsigned int _Bsf(const unsigned long _Mask) noexcept { + return _tzcnt_u32(_Mask); } - static unsigned long _Cmp_avx(const __m256i _Lhs, const __m256i _Rhs) noexcept { - return _mm256_movemask_epi8(_mm256_cmpeq_epi64(_Lhs, _Rhs)) & 0x01010101; + static unsigned int _Bsr(const unsigned long _Mask) noexcept { + return 31 - _lzcnt_u32(_Mask); } }; - template - __m256i _Avx2_load_tail(const void* const _Src, const size_t _Size_bytes, const __m256i _Mask) noexcept { - if constexpr (sizeof(_Ty) >= 4) { - return _mm256_maskload_epi32(reinterpret_cast(_Src), _Mask); - } else { + struct _Find_seq_traits_avx_1_2 : _Find_seq_traits_avx { + static __m256i _Load_tail( + const void* const _Src, const size_t _Size_bytes, __m256i = _mm256_undefined_si256()) noexcept { unsigned char _Tmp[32]; memcpy(_Tmp, _Src, _Size_bytes); - return _mm256_loadu_si256(reinterpret_cast<__m256i*>(_Tmp)); + return _mm256_loadu_si256(reinterpret_cast(_Tmp)); } - } + }; - template - __m256i _Avx2_load_tail(const void* const _Src, const size_t _Size_bytes) noexcept { - if constexpr (sizeof(_Ty) >= 4) { - const __m256i _Mask = _Avx2_tail_mask_32(_Size_bytes); + struct _Find_seq_traits_avx_4_8 : _Find_seq_traits_avx { + static __m256i _Load_tail(const void* const _Src, size_t, const __m256i _Mask) noexcept { return _mm256_maskload_epi32(reinterpret_cast(_Src), _Mask); - } else { - unsigned char _Tmp[32]; + } + + static __m256i _Load_tail(const void* const _Src, const size_t _Size_bytes) noexcept { + const __m256i _Mask = _Avx2_tail_mask_32(_Size_bytes); + return _mm256_maskload_epi32(reinterpret_cast(_Src), _Mask); + } + }; + + struct _Find_seq_traits_avx_1 : _Find_seq_traits_avx_1_2 { + static __m256i _Broadcast(const __m256i _Data) noexcept { + return _mm256_broadcastb_epi8(_mm256_castsi256_si128(_Data)); + } + + static unsigned long _Cmp(const __m256i _Lhs, const __m256i _Rhs) noexcept { + return _mm256_movemask_epi8(_mm256_cmpeq_epi8(_Lhs, _Rhs)); + } + }; + + struct _Find_seq_traits_avx_2 : _Find_seq_traits_avx_1_2 { + static __m256i _Broadcast(const __m256i _Data) noexcept { + return _mm256_broadcastw_epi16(_mm256_castsi256_si128(_Data)); + } + + static unsigned long _Cmp(const __m256i _Lhs, const __m256i _Rhs) noexcept { + return _mm256_movemask_epi8(_mm256_cmpeq_epi16(_Lhs, _Rhs)) & 0x55555555; + } + }; + + struct _Find_seq_traits_avx_4 : _Find_seq_traits_avx_4_8 { + static __m256i _Broadcast(const __m256i _Data) noexcept { + return _mm256_broadcastd_epi32(_mm256_castsi256_si128(_Data)); + } + + static unsigned long _Cmp(const __m256i _Lhs, const __m256i _Rhs) noexcept { + return _mm256_movemask_epi8(_mm256_cmpeq_epi32(_Lhs, _Rhs)) & 0x11111111; + } + }; + + struct _Find_seq_traits_avx_8 : _Find_seq_traits_avx_4_8 { + static __m256i _Broadcast(const __m256i _Data) noexcept { + return _mm256_broadcastq_epi64(_mm256_castsi256_si128(_Data)); + } + + static unsigned long _Cmp(const __m256i _Lhs, const __m256i _Rhs) noexcept { + return _mm256_movemask_epi8(_mm256_cmpeq_epi64(_Lhs, _Rhs)) & 0x01010101; + } + }; + + struct _Find_seq_traits_sse_4_8 { + using _Guard = char; + + static constexpr size_t _Vec_size = 16; + + static __m128i _Mask(const size_t _Count_in_bytes) noexcept { + // _Count_in_bytes must be within [0, 16]. + static constexpr unsigned int _Tail_masks[8] = {~0u, ~0u, ~0u, ~0u, 0, 0, 0, 0}; + return _mm_loadu_si128(reinterpret_cast( + reinterpret_cast(_Tail_masks) + (16 - _Count_in_bytes))); + } + + static __m128i _Load(const void* const _Src) noexcept { + return _mm_loadu_si128(reinterpret_cast(_Src)); + } + + static __m128i _Xor(const __m128i _Val1, const __m128i _Val2) noexcept { + return _mm_xor_si128(_Val1, _Val2); + } + + static bool _TestZ(const __m128i _Val1, const __m128i _Val2) noexcept { + return _mm_testz_si128(_Val1, _Val2); + } + + static __m128i _Load_tail( + const void* const _Src, const size_t _Size_bytes, __m128i = _mm_undefined_si128()) noexcept { + unsigned char _Tmp[16]; memcpy(_Tmp, _Src, _Size_bytes); - return _mm256_loadu_si256(reinterpret_cast<__m256i*>(_Tmp)); + return _mm_loadu_si128(reinterpret_cast(_Tmp)); } - } -#endif // ^^^ !defined(_M_ARM64EC) ^^^ - template - const void* __stdcall _Search_impl( - const void* _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept { - if (_Count2 == 0) { - return _First1; + static unsigned int _Bsf(const unsigned long _Mask) noexcept { + unsigned long _Index; + // CodeQL [SM02313] _Index is always initialized: we checked _Mask != 0 on every call site + _BitScanForward(&_Index, _Mask); + return _Index; } - if (_Count2 == 1) { - return _Finding::_Find_impl<_FindTraits, _Finding::_Predicate::_Equal>( - _First1, _Last1, *static_cast(_First2)); + static unsigned int _Bsr(const unsigned long _Mask) noexcept { + unsigned long _Index; + // CodeQL [SM02313] _Index is always initialized: we checked _Mask != 0 on every call site + _BitScanReverse(&_Index, _Mask); + return _Index; } + }; - const size_t _Size_bytes_1 = _Byte_length(_First1, _Last1); - const size_t _Size_bytes_2 = _Count2 * sizeof(_Ty); + struct _Find_seq_traits_sse_4 : _Find_seq_traits_sse_4_8 { + static __m128i _Broadcast(const __m128i _Data) noexcept { + return _mm_shuffle_epi32(_Data, _MM_SHUFFLE(0, 0, 0, 0)); + } - if (_Size_bytes_1 < _Size_bytes_2) { - return _Last1; + static unsigned long _Cmp(const __m128i _Lhs, const __m128i _Rhs) noexcept { + return _mm_movemask_epi8(_mm_cmpeq_epi32(_Lhs, _Rhs)) & 0x1111; } + }; -#ifndef _M_ARM64EC - if (_Use_avx2() && _Size_bytes_1 >= 32) { - _Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414 + struct _Find_seq_traits_sse_8 : _Find_seq_traits_sse_4_8 { + static __m128i _Broadcast(const __m128i _Data) noexcept { + return _mm_shuffle_epi32(_Data, _MM_SHUFFLE(1, 0, 1, 0)); + } - if (_Size_bytes_2 <= 32) { - const __m256i _Mask2 = _Avx2_tail_mask_32(_Size_bytes_2); - const __m256i _Data2 = _Avx2_load_tail<_Ty>(_First2, _Size_bytes_2, _Mask2); - const __m256i _Start2 = _Traits::_Broadcast_avx(_mm256_castsi256_si128(_Data2)); + static unsigned long _Cmp(const __m128i _Lhs, const __m128i _Rhs) noexcept { + return _mm_movemask_epi8(_mm_cmpeq_epi64(_Lhs, _Rhs)) & 0x0101; + } + }; - const void* _Stop1 = _First1; - _Advance_bytes(_Stop1, _Size_bytes_1 & ~size_t{0x1F}); - do { - const __m256i _Data1 = _mm256_loadu_si256(static_cast(_First1)); - unsigned long _Bingo = _Traits::_Cmp_avx(_Data1, _Start2); - - while (_Bingo != 0) { - const unsigned int _Pos = _tzcnt_u32(_Bingo); - - const void* _Match = _First1; - _Advance_bytes(_Match, _Pos); - - __m256i _Cmp; - if (const size_t _Left_match = _Byte_length(_Match, _Last1); _Left_match >= 32) { - const __m256i _Match_val = _mm256_loadu_si256(reinterpret_cast(_Match)); - _Cmp = _mm256_xor_si256(_Data2, _Match_val); - } else if (_Left_match >= _Size_bytes_2) { - const __m256i _Match_val = _Avx2_load_tail<_Ty>(_Match, _Left_match); - _Cmp = _mm256_xor_si256(_Data2, _Match_val); - } else { - break; - } + template + const void* _Search_cmpeq(const void* _First1, const void* const _Last1, const void* const _First2, + const size_t _Size_bytes_2) noexcept { + [[maybe_unused]] typename _Traits::_Guard _Guard; // TRANSITION, DevCom-10331414 + const size_t _Size_bytes_1 = _Byte_length(_First1, _Last1); + constexpr size_t _Vec_size = _Traits::_Vec_size; + constexpr size_t _Vec_mask = _Vec_size - 1; - if (_mm256_testz_si256(_Cmp, _Mask2)) { - return _Match; - } + if (_Size_bytes_2 <= _Vec_size) { + const auto _Mask2 = _Traits::_Mask(_Size_bytes_2); + const auto _Data2 = _Traits::_Load_tail(_First2, _Size_bytes_2, _Mask2); + const auto _Start2 = _Traits::_Broadcast(_Data2); - _Bingo ^= 1 << _Pos; + const void* _Stop1 = _First1; + _Advance_bytes(_Stop1, _Size_bytes_1 & ~_Vec_mask); + do { + const auto _Data1 = _Traits::_Load(_First1); + unsigned long _Bingo = _Traits::_Cmp(_Data1, _Start2); + + while (_Bingo != 0) { + const unsigned int _Pos = _Traits::_Bsf(_Bingo); + + const void* _Match = _First1; + _Advance_bytes(_Match, _Pos); + + decltype(_Traits::_Load(_Match)) _Cmp; + if (const size_t _Left_match = _Byte_length(_Match, _Last1); _Left_match >= _Vec_size) { + const auto _Match_val = _Traits::_Load(_Match); + _Cmp = _Traits::_Xor(_Data2, _Match_val); + } else if (_Left_match >= _Size_bytes_2) { + const auto _Match_val = _Traits::_Load_tail(_Match, _Left_match); + _Cmp = _Traits::_Xor(_Data2, _Match_val); + } else { + break; } - _Advance_bytes(_First1, 32); + if (_Traits::_TestZ(_Cmp, _Mask2)) { + return _Match; + } - } while (_First1 != _Stop1); + _Bingo ^= 1 << _Pos; + } - if (const size_t _Left1 = _Byte_length(_First1, _Last1); _Left1 >= _Size_bytes_2) { - const __m256i _Data1 = _Avx2_load_tail<_Ty>(_First1, _Left1); - unsigned long _Bingo = _Traits::_Cmp_avx(_Data1, _Start2); + _Advance_bytes(_First1, _Vec_size); - while (_Bingo != 0) { - const unsigned int _Pos = _tzcnt_u32(_Bingo); + } while (_First1 != _Stop1); - if (_Pos > _Left1 - _Size_bytes_2) { - break; - } + if (const size_t _Left1 = _Byte_length(_First1, _Last1); _Left1 >= _Size_bytes_2) { + const auto _Data1 = _Traits::_Load_tail(_First1, _Left1); + unsigned long _Bingo = _Traits::_Cmp(_Data1, _Start2); - const void* _Match = _First1; - _Advance_bytes(_Match, _Pos); + while (_Bingo != 0) { + const unsigned int _Pos = _Traits::_Bsf(_Bingo); - const size_t _Left_match = _Byte_length(_Match, _Last1); - const __m256i _Match_val = _Avx2_load_tail<_Ty>(_Match, _Left_match); - const __m256i _Cmp = _mm256_xor_si256(_Data2, _Match_val); + if (_Pos > _Left1 - _Size_bytes_2) { + break; + } + + const void* _Match = _First1; + _Advance_bytes(_Match, _Pos); + + const size_t _Left_match = _Byte_length(_Match, _Last1); + const auto _Match_val = _Traits::_Load_tail(_Match, _Left_match); + const auto _Cmp = _Traits::_Xor(_Data2, _Match_val); + + if (_Traits::_TestZ(_Cmp, _Mask2)) { + return _Match; + } + + _Bingo ^= 1 << _Pos; + } + } + + return _Last1; + } else { // _Size_bytes_2 is greater than _Vec_size bytes + const auto _Data2 = _Traits::_Load(_First2); + const auto _Start2 = _Traits::_Broadcast(_Data2); - if (_mm256_testz_si256(_Cmp, _Mask2)) { + const size_t _Max_pos = _Size_bytes_1 - _Size_bytes_2; + + const void* _Stop1 = _First1; + _Advance_bytes(_Stop1, _Max_pos); + + const void* _Tail2 = _First2; + _Advance_bytes(_Tail2, _Vec_size); + + do { + const auto _Data1 = _Traits::_Load(_First1); + unsigned long _Bingo = _Traits::_Cmp(_Data1, _Start2); + + while (_Bingo != 0) { + const unsigned int _Pos = _Traits::_Bsf(_Bingo); + + const void* _Match = _First1; + _Advance_bytes(_Match, _Pos); + + if (_Match > _Stop1) { + break; // Oops, doesn't fit + } + + const auto _Match_val = _Traits::_Load(_Match); + const auto _Cmp = _Traits::_Xor(_Data2, _Match_val); + + if (_Traits::_TestZ(_Cmp, _Cmp)) { + const void* _Tail1 = _Match; + _Advance_bytes(_Tail1, _Vec_size); + + if (memcmp(_Tail1, _Tail2, _Size_bytes_2 - _Vec_size) == 0) { return _Match; } + } + + _Bingo ^= 1 << _Pos; + } + + _Advance_bytes(_First1, _Vec_size); + + } while (_First1 <= _Stop1); + + return _Last1; + } + } + + template + const void* _Find_end_cmpeq(const void* const _First1, const void* const _Last1, const void* const _First2, + const size_t _Size_bytes_2) noexcept { + [[maybe_unused]] typename _Traits::_Guard _Guard; // TRANSITION, DevCom-10331414 + const size_t _Size_bytes_1 = _Byte_length(_First1, _Last1); + constexpr size_t _Vec_size = _Traits::_Vec_size; + constexpr size_t _Vec_mask = _Vec_size - 1; + + if (_Size_bytes_2 <= _Vec_size) { + const unsigned int _Needle_fit_mask = (1 << (_Vec_size - _Size_bytes_2 + sizeof(_Ty))) - 1; + + const void* _Stop1 = _First1; + _Advance_bytes(_Stop1, _Size_bytes_1 & _Vec_mask); + + const auto _Mask2 = _Traits::_Mask(_Size_bytes_2); + const auto _Data2 = _Traits::_Load_tail(_First2, _Size_bytes_2, _Mask2); + const auto _Start2 = _Traits::_Broadcast(_Data2); - _Bingo ^= 1 << _Pos; + const void* _Mid1 = _Last1; + _Rewind_bytes(_Mid1, _Vec_size); + +#pragma warning(push) +#pragma warning(disable : 4324) // structure was padded due to alignment specifier + const auto _Check_first = [=, &_Mid1](unsigned long _Match) noexcept { + while (_Match != 0) { + const unsigned int _Pos = _Traits::_Bsr(_Match); + + const void* _Tmp1 = _Mid1; + _Advance_bytes(_Tmp1, _Pos); + + const auto _Match_data = _Traits::_Load_tail(_Tmp1, _Byte_length(_Tmp1, _Last1)); + const auto _Cmp_result = _Traits::_Xor(_Data2, _Match_data); + + if (_Traits::_TestZ(_Cmp_result, _Mask2)) { + _Mid1 = _Tmp1; + return true; } + + _Match ^= 1 << _Pos; } - return _Last1; - } else { // _Size_bytes_2 is greater than 32 bytes - const __m256i _Data2 = _mm256_loadu_si256(reinterpret_cast(_First2)); - const __m256i _Start2 = _Traits::_Broadcast_avx(_mm256_castsi256_si128(_Data2)); + return false; + }; - const size_t _Max_pos = _Size_bytes_1 - _Size_bytes_2; + const auto _Check = [=, &_Mid1](unsigned long _Match) noexcept { + while (_Match != 0) { + const unsigned int _Pos = _Traits::_Bsr(_Match); - const void* _Stop1 = _First1; - _Advance_bytes(_Stop1, _Max_pos); + const void* _Tmp1 = _Mid1; + _Advance_bytes(_Tmp1, _Pos); - const void* _Tail2 = _First2; - _Advance_bytes(_Tail2, 32); + const auto _Match_data = _Traits::_Load(_Tmp1); + const auto _Cmp_result = _Traits::_Xor(_Data2, _Match_data); - do { - const __m256i _Data1 = _mm256_loadu_si256(static_cast(_First1)); - unsigned long _Bingo = _Traits::_Cmp_avx(_Data1, _Start2); + if (_Traits::_TestZ(_Cmp_result, _Mask2)) { + _Mid1 = _Tmp1; + return true; + } - while (_Bingo != 0) { - const unsigned int _Pos = _tzcnt_u32(_Bingo); + _Match ^= 1 << _Pos; + } - const void* _Match = _First1; - _Advance_bytes(_Match, _Pos); + return false; + }; +#pragma warning(pop) - if (_Match > _Stop1) { - break; // Oops, doesn't fit - } + // The very last part, for any match needle should fit, otherwise false match + const auto _Data1_last = _Traits::_Load(_Mid1); + const unsigned long _Match_last_val = _Traits::_Cmp(_Data1_last, _Start2); + if (_Check_first(_Match_last_val & _Needle_fit_mask)) { + return _Mid1; + } - const __m256i _Match_val = _mm256_loadu_si256(reinterpret_cast(_Match)); - const __m256i _Cmp = _mm256_xor_si256(_Data2, _Match_val); + // The middle part, fit and unfit needle + while (_Mid1 != _Stop1) { + _Rewind_bytes(_Mid1, _Vec_size); + const auto _Data1 = _Traits::_Load(_Mid1); + const unsigned long _Match_val = _Traits::_Cmp(_Data1, _Start2); + if (_Check(_Match_val)) { + return _Mid1; + } + } - if (_mm256_testz_si256(_Cmp, _Cmp)) { - const void* _Tail1 = _Match; - _Advance_bytes(_Tail1, 32); + // The first part, fit and unfit needle, mask out already processed positions + if (const size_t _Tail_bytes_1 = _Size_bytes_1 & _Vec_mask; _Tail_bytes_1 != 0) { + _Mid1 = _First1; + const auto _Data1 = _Traits::_Load(_Mid1); + const unsigned long _Match_val = _Traits::_Cmp(_Data1, _Start2); + if (_Match_val != 0 && _Check(_Match_val & ((1 << _Tail_bytes_1) - 1))) { + return _Mid1; + } + } - if (memcmp(_Tail1, _Tail2, _Size_bytes_2 - 32) == 0) { - return _Match; - } - } + return _Last1; + } else { // _Size_bytes_2 is greater than _Vec_size bytes + const auto _Data2 = _Traits::_Load(_First2); + const auto _Start2 = _Traits::_Broadcast(_Data2); - _Bingo ^= 1 << _Pos; + const void* _Tail2 = _First2; + _Advance_bytes(_Tail2, _Vec_size); + + const void* _Mid1 = _Last1; + _Rewind_bytes(_Mid1, _Size_bytes_2); + + const size_t _Size_diff_bytes = _Size_bytes_1 - _Size_bytes_2; + const void* _Stop1 = _First1; + _Advance_bytes(_Stop1, _Size_diff_bytes & _Vec_mask); + +#pragma warning(push) +#pragma warning(disable : 4324) // structure was padded due to alignment specifier + const auto _Check = [=, &_Mid1](unsigned long _Match) noexcept { + while (_Match != 0) { + const unsigned int _Pos = _Traits::_Bsr(_Match); + + const void* _Tmp1 = _Mid1; + _Advance_bytes(_Tmp1, _Pos); + + const auto _Match_data = _Traits::_Load(_Tmp1); + const auto _Cmp_result = _Traits::_Xor(_Data2, _Match_data); + + if (_Traits::_TestZ(_Cmp_result, _Cmp_result)) { + const void* _Tail1 = _Tmp1; + _Advance_bytes(_Tail1, _Vec_size); + + if (memcmp(_Tail1, _Tail2, _Size_bytes_2 - _Vec_size) == 0) { + _Mid1 = _Tmp1; + return true; + } } - _Advance_bytes(_First1, 32); + _Match ^= 1 << _Pos; + } - } while (_First1 <= _Stop1); + return false; + }; +#pragma warning(pop) + // The very last part, just compare, as true match must start with first symbol + const auto _Data1_last = _Traits::_Load(_Mid1); + const auto _Match_last = _Traits::_Xor(_Data2, _Data1_last); - return _Last1; + if (_Traits::_TestZ(_Match_last, _Match_last)) { + // Matched _Vec_size bytes, check the rest + const void* _Tail1 = _Mid1; + _Advance_bytes(_Tail1, _Vec_size); + + if (memcmp(_Tail1, _Tail2, _Size_bytes_2 - _Vec_size) == 0) { + return _Mid1; + } + } + + // The main part, match all characters + while (_Mid1 != _Stop1) { + _Rewind_bytes(_Mid1, _Vec_size); + + const auto _Data1 = _Traits::_Load(_Mid1); + const unsigned long _Match_val = _Traits::_Cmp(_Data1, _Start2); + if (_Check(_Match_val)) { + return _Mid1; + } + } + + // The first part, mask out already processed positions + if (const size_t _Tail_bytes_1 = _Size_diff_bytes & _Vec_mask; _Tail_bytes_1 != 0) { + _Mid1 = _First1; + const auto _Data1 = _Traits::_Load(_Mid1); + const unsigned long _Match_val = _Traits::_Cmp(_Data1, _Start2); + if (_Match_val != 0 && _Check(_Match_val & ((1 << _Tail_bytes_1) - 1))) { + return _Mid1; + } + } + + return _Last1; + } + } +#endif // ^^^ !defined(_M_ARM64EC) ^^^ + + template + const void* __stdcall _Search_impl( + const void* _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept { + if (_Count2 == 0) { + return _First1; + } + + if (_Count2 == 1) { + return _Finding::_Find_impl<_FindTraits, _Finding::_Predicate::_Equal>( + _First1, _Last1, *static_cast(_First2)); + } + + const size_t _Size_bytes_1 = _Byte_length(_First1, _Last1); + const size_t _Size_bytes_2 = _Count2 * sizeof(_Ty); + + if (_Size_bytes_1 < _Size_bytes_2) { + return _Last1; + } + +#ifndef _M_ARM64EC + // The AVX2 path for 8-bit elements is not necessarily more efficient than the SSE4.2 cmpestri path + if constexpr (sizeof(_Ty) != 1) { + if (_Use_avx2() && _Size_bytes_1 >= 32) { + return _Search_cmpeq<_Traits_avx, _Ty>(_First1, _Last1, _First2, _Size_bytes_2); } } - if constexpr (sizeof(_Ty) <= 2) { - if (_Use_sse42() && _Size_bytes_1 >= 16) { + if (_Use_sse42() && _Size_bytes_1 >= 16) { + if constexpr (sizeof(_Ty) >= 4) { + return _Search_cmpeq<_Traits_sse, _Ty>(_First1, _Last1, _First2, _Size_bytes_2); + } else { constexpr int _Op = (sizeof(_Ty) == 1 ? _SIDD_UBYTE_OPS : _SIDD_UWORD_OPS) | _SIDD_CMP_EQUAL_ORDERED; constexpr int _Part_size_el = sizeof(_Ty) == 1 ? 16 : 8; @@ -5240,7 +5628,7 @@ namespace { return _Last1; } - template + template const void* __stdcall _Find_end_impl(const void* const _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept { if (_Count2 == 0) { @@ -5261,176 +5649,13 @@ namespace { #ifndef _M_ARM64EC if (_Use_avx2() && _Size_bytes_1 >= 32) { - _Zeroupper_on_exit _Guard; // TRANSITION, DevCom-10331414 - - if (_Size_bytes_2 <= 32) { - const unsigned int _Needle_fit_mask = (1 << (32 - _Size_bytes_2 + sizeof(_Ty))) - 1; - - const void* _Stop1 = _First1; - _Advance_bytes(_Stop1, _Size_bytes_1 & 0x1F); - - const __m256i _Mask2 = _Avx2_tail_mask_32(_Size_bytes_2); - const __m256i _Data2 = _Avx2_load_tail<_Ty>(_First2, _Size_bytes_2, _Mask2); - const __m256i _Start2 = _Traits::_Broadcast_avx(_mm256_castsi256_si128(_Data2)); - - const void* _Mid1 = _Last1; - _Rewind_bytes(_Mid1, 32); - -#pragma warning(push) -#pragma warning(disable : 4324) // structure was padded due to alignment specifier - const auto _Check_first = [=, &_Mid1](long _Match) noexcept { - while (_Match != 0) { - const unsigned int _Pos = 31 - _lzcnt_u32(_Match); - - const void* _Tmp1 = _Mid1; - _Advance_bytes(_Tmp1, _Pos); - - const __m256i _Match_data = _Avx2_load_tail<_Ty>(_Tmp1, _Byte_length(_Tmp1, _Last1)); - const __m256i _Cmp_result = _mm256_xor_si256(_Data2, _Match_data); - - if (_mm256_testz_si256(_Cmp_result, _Mask2)) { - _Mid1 = _Tmp1; - return true; - } - - _Match ^= 1 << _Pos; - } - - return false; - }; - - const auto _Check = [=, &_Mid1](long _Match) noexcept { - while (_Match != 0) { - const unsigned int _Pos = 31 - _lzcnt_u32(_Match); - - const void* _Tmp1 = _Mid1; - _Advance_bytes(_Tmp1, _Pos); - - const __m256i _Match_data = _mm256_loadu_si256(reinterpret_cast(_Tmp1)); - const __m256i _Cmp_result = _mm256_xor_si256(_Data2, _Match_data); - - if (_mm256_testz_si256(_Cmp_result, _Mask2)) { - _Mid1 = _Tmp1; - return true; - } - - _Match ^= 1 << _Pos; - } - - return false; - }; -#pragma warning(pop) - - // The very last part, for any match needle should fit, otherwise false match - const __m256i _Data1_last = _mm256_loadu_si256(reinterpret_cast(_Mid1)); - const unsigned long _Match_last_val = _Traits::_Cmp_avx(_Data1_last, _Start2); - if (_Check_first(_Match_last_val & _Needle_fit_mask)) { - return _Mid1; - } - - // The middle part, fit and unfit needle - while (_Mid1 != _Stop1) { - _Rewind_bytes(_Mid1, 32); - const __m256i _Data1 = _mm256_loadu_si256(reinterpret_cast(_Mid1)); - const unsigned long _Match_val = _Traits::_Cmp_avx(_Data1, _Start2); - if (_Check(_Match_val)) { - return _Mid1; - } - } - - // The first part, fit and unfit needle, mask out already processed positions - if (const size_t _Tail_bytes_1 = _Size_bytes_1 & 0x1F; _Tail_bytes_1 != 0) { - _Mid1 = _First1; - const __m256i _Data1 = _mm256_loadu_si256(reinterpret_cast(_Mid1)); - const unsigned long _Match_val = _Traits::_Cmp_avx(_Data1, _Start2); - if (_Match_val != 0 && _Check(_Match_val & ((1 << _Tail_bytes_1) - 1))) { - return _Mid1; - } - } - - return _Last1; - } else { // _Size_bytes_2 is greater than 32 bytes - const __m256i _Data2 = _mm256_loadu_si256(reinterpret_cast(_First2)); - const __m256i _Start2 = _Traits::_Broadcast_avx(_mm256_castsi256_si128(_Data2)); - - const void* _Tail2 = _First2; - _Advance_bytes(_Tail2, 32); - - const void* _Mid1 = _Last1; - _Rewind_bytes(_Mid1, _Size_bytes_2); - - const size_t _Size_diff_bytes = _Size_bytes_1 - _Size_bytes_2; - const void* _Stop1 = _First1; - _Advance_bytes(_Stop1, _Size_diff_bytes & 0x1F); - -#pragma warning(push) -#pragma warning(disable : 4324) // structure was padded due to alignment specifier - const auto _Check = [=, &_Mid1](long _Match) noexcept { - while (_Match != 0) { - const unsigned int _Pos = 31 - _lzcnt_u32(_Match); - - const void* _Tmp1 = _Mid1; - _Advance_bytes(_Tmp1, _Pos); - - const __m256i _Match_data = _mm256_loadu_si256(reinterpret_cast(_Tmp1)); - const __m256i _Cmp_result = _mm256_xor_si256(_Data2, _Match_data); - - if (_mm256_testz_si256(_Cmp_result, _Cmp_result)) { - const void* _Tail1 = _Tmp1; - _Advance_bytes(_Tail1, 32); - - if (memcmp(_Tail1, _Tail2, _Size_bytes_2 - 32) == 0) { - _Mid1 = _Tmp1; - return true; - } - } - - _Match ^= 1 << _Pos; - } - - return false; - }; -#pragma warning(pop) - // The very last part, just compare, as true match must start with first symbol - const __m256i _Data1_last = _mm256_loadu_si256(reinterpret_cast(_Mid1)); - const __m256i _Match_last = _mm256_xor_si256(_Data2, _Data1_last); - if (_mm256_testz_si256(_Match_last, _Match_last)) { - // Matched 32 bytes, check the rest - const void* _Tail1 = _Mid1; - _Advance_bytes(_Tail1, 32); - - if (memcmp(_Tail1, _Tail2, _Size_bytes_2 - 32) == 0) { - return _Mid1; - } - } - - // The main part, match all characters - while (_Mid1 != _Stop1) { - _Rewind_bytes(_Mid1, 32); - - const __m256i _Data1 = _mm256_loadu_si256(reinterpret_cast(_Mid1)); - const unsigned long _Match_val = _Traits::_Cmp_avx(_Data1, _Start2); - if (_Check(_Match_val)) { - return _Mid1; - } - } - - // The first part, mask out already processed positions - if (const size_t _Tail_bytes_1 = _Size_diff_bytes & 0x1F; _Tail_bytes_1 != 0) { - _Mid1 = _First1; - const __m256i _Data1 = _mm256_loadu_si256(reinterpret_cast(_Mid1)); - const unsigned long _Match_val = _Traits::_Cmp_avx(_Data1, _Start2); - if (_Match_val != 0 && _Check(_Match_val & ((1 << _Tail_bytes_1) - 1))) { - return _Mid1; - } - } - - return _Last1; - } + return _Find_end_cmpeq<_Traits_avx, _Ty>(_First1, _Last1, _First2, _Size_bytes_2); } - if constexpr (sizeof(_Ty) <= 2) { - if (_Use_sse42() && _Size_bytes_1 >= 16) { + if (_Use_sse42() && _Size_bytes_1 >= 16) { + if constexpr (sizeof(_Ty) >= 4) { + return _Find_end_cmpeq<_Traits_sse, _Ty>(_First1, _Last1, _First2, _Size_bytes_2); + } else { constexpr int _Op = (sizeof(_Ty) == 1 ? _SIDD_UBYTE_OPS : _SIDD_UWORD_OPS) | _SIDD_CMP_EQUAL_ORDERED; constexpr int _Part_size_el = sizeof(_Ty) == 1 ? 16 : 8; @@ -5473,7 +5698,7 @@ namespace { #pragma warning(push) #pragma warning(disable : 4324) // structure was padded due to alignment specifier const auto _Check_unfit = [=, &_Mid1](const unsigned int _Match) noexcept { - long _Unfit_match = _Match & _Needle_unfit_mask; + unsigned long _Unfit_match = _Match & _Needle_unfit_mask; while (_Unfit_match != 0) { const void* _Tmp1 = _Mid1; unsigned long _Match_last_pos; @@ -5493,7 +5718,7 @@ namespace { return true; } - _bittestandreset(&_Unfit_match, _Match_last_pos); + _Unfit_match ^= 1 << _Match_last_pos; } return false; @@ -5550,7 +5775,7 @@ namespace { #pragma warning(push) #pragma warning(disable : 4324) // structure was padded due to alignment specifier - const auto _Check = [=, &_Mid1](long _Match) noexcept { + const auto _Check = [=, &_Mid1](unsigned long _Match) noexcept { while (_Match != 0) { const void* _Tmp1 = _Mid1; unsigned long _Match_last_pos; @@ -5582,7 +5807,7 @@ namespace { } } - _bittestandreset(&_Match, _Match_last_pos); + _Match ^= 1 << _Match_last_pos; } return false; @@ -5666,51 +5891,50 @@ extern "C" { const void* __stdcall __std_search_1( const void* const _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept { - return _Find_seq::_Search_impl<_Finding::_Find_traits_1, _Find_seq::_Find_seq_traits_1, uint8_t>( - _First1, _Last1, _First2, _Count2); + return _Find_seq::_Search_impl<_Finding::_Find_traits_1, void, void, uint8_t>(_First1, _Last1, _First2, _Count2); } const void* __stdcall __std_search_2( const void* const _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept { - return _Find_seq::_Search_impl<_Finding::_Find_traits_2, _Find_seq::_Find_seq_traits_2, uint16_t>( + return _Find_seq::_Search_impl<_Finding::_Find_traits_2, _Find_seq::_Find_seq_traits_avx_2, void, uint16_t>( _First1, _Last1, _First2, _Count2); } const void* __stdcall __std_search_4( const void* const _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept { - return _Find_seq::_Search_impl<_Finding::_Find_traits_4, _Find_seq::_Find_seq_traits_4, uint32_t>( - _First1, _Last1, _First2, _Count2); + return _Find_seq::_Search_impl<_Finding::_Find_traits_4, _Find_seq::_Find_seq_traits_avx_4, + _Find_seq::_Find_seq_traits_sse_4, uint32_t>(_First1, _Last1, _First2, _Count2); } const void* __stdcall __std_search_8( const void* const _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept { - return _Find_seq::_Search_impl<_Finding::_Find_traits_8, _Find_seq::_Find_seq_traits_8, uint64_t>( - _First1, _Last1, _First2, _Count2); + return _Find_seq::_Search_impl<_Finding::_Find_traits_8, _Find_seq::_Find_seq_traits_avx_8, + _Find_seq::_Find_seq_traits_sse_8, uint64_t>(_First1, _Last1, _First2, _Count2); } const void* __stdcall __std_find_end_1( const void* const _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept { - return _Find_seq::_Find_end_impl<_Finding::_Find_traits_1, _Find_seq::_Find_seq_traits_1, uint8_t>( + return _Find_seq::_Find_end_impl<_Finding::_Find_traits_1, _Find_seq::_Find_seq_traits_avx_1, void, uint8_t>( _First1, _Last1, _First2, _Count2); } const void* __stdcall __std_find_end_2( const void* const _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept { - return _Find_seq::_Find_end_impl<_Finding::_Find_traits_2, _Find_seq::_Find_seq_traits_2, uint16_t>( + return _Find_seq::_Find_end_impl<_Finding::_Find_traits_2, _Find_seq::_Find_seq_traits_avx_2, void, uint16_t>( _First1, _Last1, _First2, _Count2); } const void* __stdcall __std_find_end_4( const void* const _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept { - return _Find_seq::_Find_end_impl<_Finding::_Find_traits_4, _Find_seq::_Find_seq_traits_4, uint32_t>( - _First1, _Last1, _First2, _Count2); + return _Find_seq::_Find_end_impl<_Finding::_Find_traits_4, _Find_seq::_Find_seq_traits_avx_4, + _Find_seq::_Find_seq_traits_sse_4, uint32_t>(_First1, _Last1, _First2, _Count2); } const void* __stdcall __std_find_end_8( const void* const _First1, const void* const _Last1, const void* const _First2, const size_t _Count2) noexcept { - return _Find_seq::_Find_end_impl<_Finding::_Find_traits_8, _Find_seq::_Find_seq_traits_8, uint64_t>( - _First1, _Last1, _First2, _Count2); + return _Find_seq::_Find_end_impl<_Finding::_Find_traits_8, _Find_seq::_Find_seq_traits_avx_8, + _Find_seq::_Find_seq_traits_sse_8, uint64_t>(_First1, _Last1, _First2, _Count2); } } // extern "C" @@ -6803,7 +7027,8 @@ namespace { using _Traits_2_sse = void; #else // ^^^ defined(_M_ARM64EC) / !defined(_M_ARM64EC) vvv struct _Traits_avx { - using _Vec = __m256i; + using _Guard = _Zeroupper_on_exit; + using _Vec = __m256i; static __m256i _Load(const void* _Src) noexcept { return _mm256_loadu_si256(reinterpret_cast(_Src)); @@ -6823,7 +7048,8 @@ namespace { }; struct _Traits_sse { - using _Vec = __m128i; + using _Guard = char; + using _Vec = __m128i; static __m128i _Load(const void* _Src) noexcept { return _mm_loadu_si128(reinterpret_cast(_Src)); @@ -6952,6 +7178,7 @@ namespace { template bool _Impl(void* const _Dest, const _Elem* const _Src, const size_t _Size_bytes, const size_t _Size_bits, const size_t _Size_chars, const _Elem _Elem0, const _Elem _Elem1) noexcept { + [[maybe_unused]] typename _Traits::_Guard _Guard; // TRANSITION, DevCom-10331414 const auto _Dx0 = _Traits::_Set(_Elem0); const auto _Dx1 = _Traits::_Set(_Elem1); @@ -6968,14 +7195,12 @@ namespace { // Convert characters to bits if (!_Loop<_Traits>(_Src, _Src + _Size_convert, _Dx0, _Dx1, _Out)) { - _Traits::_Exit_vectorized(); // TRANSITION, DevCom-10331414 return false; } // Verify remaining characters, if any if (_Size_convert != _Size_chars && !_Loop<_Traits>(_Src + _Size_convert, _Src + _Size_chars, _Dx0, _Dx1, [](_Traits::_Vec) {})) { - _Traits::_Exit_vectorized(); // TRANSITION, DevCom-10331414 return false; } @@ -6984,8 +7209,6 @@ namespace { memset(_Dst_words, 0, _Byte_length(_Dst_words, _Dst_words_end)); } - _Traits::_Exit_vectorized(); // TRANSITION, DevCom-10331414 - return true; } #endif // ^^^ !defined(_M_ARM64EC) ^^^ diff --git a/tests/libcxx/expected_results.txt b/tests/libcxx/expected_results.txt index 3cbfcfa454..bab0724cbf 100644 --- a/tests/libcxx/expected_results.txt +++ b/tests/libcxx/expected_results.txt @@ -370,12 +370,6 @@ std/utilities/meta/meta.unary/meta.unary.prop/is_implicit_lifetime.pass.cpp FAIL # *** MISSING LWG ISSUE RESOLUTIONS *** -# LWG-2503 "multiline option should be added to syntax_option_type" -std/re/re.alg/re.alg.search/no_update_pos.pass.cpp FAIL -std/re/re.const/re.matchflag/match_multiline.pass.cpp FAIL -std/re/re.const/re.matchflag/match_not_eol.pass.cpp FAIL -std/re/re.const/re.synopt/syntax_option_type.pass.cpp FAIL - # LWG-2532 "Satisfying a promise at thread exit" (Open) std/thread/futures/futures.promise/set_exception_at_thread_exit.pass.cpp FAIL std/thread/futures/futures.promise/set_lvalue_at_thread_exit.pass.cpp FAIL diff --git a/tests/std/include/test_regex_support.hpp b/tests/std/include/test_regex_support.hpp index 0cbd643aca..8193d4a2c6 100644 --- a/tests/std/include/test_regex_support.hpp +++ b/tests/std/include/test_regex_support.hpp @@ -181,6 +181,25 @@ class regex_fixture { } } } + + void should_throw(const std::wstring& pattern, const std::regex_constants::error_type expectedCode, + const std::regex_constants::syntax_option_type syntax = std::regex_constants::ECMAScript) { + try { + const std::wregex r(pattern, syntax); + wprintf(LR"(wregex r("%s", 0x%X) succeeded (which is bad).)" + L"\n", + pattern.c_str(), static_cast(syntax)); + fail_regex(); + } catch (const std::regex_error& e) { + if (e.code() != expectedCode) { + wprintf(LR"(wregex r("%s", 0x%X) threw 0x%X; expected 0x%X)" + L"\n", + pattern.c_str(), static_cast(syntax), static_cast(e.code()), + static_cast(expectedCode)); + fail_regex(); + } + } + } }; class test_regex { diff --git a/tests/std/test.lst b/tests/std/test.lst index bdc489656f..e0d87d8b8e 100644 --- a/tests/std/test.lst +++ b/tests/std/test.lst @@ -154,6 +154,7 @@ tests\Dev11_1140665_unique_ptr_array_conversions tests\Dev11_1150223_shared_mutex tests\Dev11_1158803_regex_thread_safety tests\Dev11_1180290_filesystem_error_code +tests\GH_000073_regex_multiline_escape_hatch tests\GH_000140_adl_proof_comparison tests\GH_000140_adl_proof_construction tests\GH_000140_adl_proof_views @@ -252,6 +253,7 @@ tests\GH_004609_heterogeneous_cmp_overloads tests\GH_004618_mixed_operator_usage_keeps_statistical_properties tests\GH_004618_normal_distribution_avoids_resets tests\GH_004657_expected_constraints_permissive +tests\GH_004686_vectorization_on_trivial_assignability tests\GH_004845_logical_operator_traits_with_non_bool_constant tests\GH_004929_internal_tag_constructors tests\GH_004930_char_traits_user_specialization @@ -264,6 +266,7 @@ tests\GH_005315_destructor_tombstones tests\GH_005402_string_with_volatile_range tests\GH_005421_vector_algorithms_integer_class_type_iterator tests\GH_005472_do_not_overlap +tests\GH_005553_regex_character_translation tests\LWG2381_num_get_floating_point tests\LWG2597_complex_branch_cut tests\LWG3018_shared_ptr_function diff --git a/tests/std/tests/GH_000073_regex_multiline_escape_hatch/env.lst b/tests/std/tests/GH_000073_regex_multiline_escape_hatch/env.lst new file mode 100644 index 0000000000..19f025bd0e --- /dev/null +++ b/tests/std/tests/GH_000073_regex_multiline_escape_hatch/env.lst @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +RUNALL_INCLUDE ..\usual_matrix.lst diff --git a/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp b/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp new file mode 100644 index 0000000000..31968afa26 --- /dev/null +++ b/tests/std/tests/GH_000073_regex_multiline_escape_hatch/test.cpp @@ -0,0 +1,107 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#define _REGEX_LEGACY_MULTILINE_MODE 1 + +#include +#include +#include +#include + +#include + +using namespace std; +using namespace std::regex_constants; + +regex_fixture g_regexTester; + +void test_VSO_225160_match_bol_flag() { + // Old tests for caret anchor in default multiline mode + for (syntax_option_type syntax : {syntax_option_type{}, ECMAScript, basic, grep, extended, egrep, awk}) { + const test_regex emptyAnchor(&g_regexTester, R"(^)", syntax); + emptyAnchor.should_search_match("", ""); + emptyAnchor.should_search_fail("", match_not_bol); + emptyAnchor.should_search_match("\n", ""); + emptyAnchor.should_search_match("\n", "", match_not_bol); + + const test_regex beginCd(&g_regexTester, R"(^cd)", syntax); + beginCd.should_search_match("ab\ncdefg", "cd"); + beginCd.should_search_match("ab\ncdefg", "cd", match_not_bol); + + beginCd.should_search_match("cdefg", "cd"); + beginCd.should_search_fail("cdefg", match_not_bol); + beginCd.should_search_match("\ncdefg", "cd"); + beginCd.should_search_match("\ncdefg", "cd", match_not_bol); + + beginCd.should_search_fail("ab\nxcdefg"); + beginCd.should_search_fail("ab\nxcdefg", match_not_bol); + } +} + +void test_VSO_225160_match_eol_flag() { + // Old tests for dollar anchor in default multiline mode + for (syntax_option_type syntax : {syntax_option_type{}, ECMAScript, basic, grep, extended, egrep, awk}) { + const test_regex emptyAnchor(&g_regexTester, R"($)", syntax); + emptyAnchor.should_search_match("", ""); + emptyAnchor.should_search_fail("", match_not_eol); + emptyAnchor.should_search_match("\n", ""); + emptyAnchor.should_search_match("\n", "", match_not_eol); + + const test_regex cdEnd(&g_regexTester, R"(cd$)", syntax); + cdEnd.should_search_match("abcd\nefg", "cd"); + cdEnd.should_search_match("abcd\nefg", "cd", match_not_eol); + + cdEnd.should_search_match("abcd", "cd"); + cdEnd.should_search_fail("abcd", match_not_eol); + cdEnd.should_search_match("abcd\n", "cd"); + cdEnd.should_search_match("abcd\n", "cd", match_not_eol); + + cdEnd.should_search_fail("abcdx\nefg"); + cdEnd.should_search_fail("abcdx\nefg", match_not_eol); + } +} + +void test_gh_73() { + for (syntax_option_type syntax : {syntax_option_type{}, ECMAScript, basic, grep, extended, egrep, awk}) { + { + test_regex a_anchored_on_both_sides(&g_regexTester, "^a$", syntax); + a_anchored_on_both_sides.should_search_match("a", "a"); + a_anchored_on_both_sides.should_search_match("b\na", "a"); + a_anchored_on_both_sides.should_search_match("a\nb", "a"); + a_anchored_on_both_sides.should_search_fail("a\nb", match_not_bol); + a_anchored_on_both_sides.should_search_fail("b\na", match_not_eol); + } + + { + test_regex a_anchored_front(&g_regexTester, "^a", syntax); + a_anchored_front.should_search_match("a", "a"); + a_anchored_front.should_search_match("a\n", "a"); + a_anchored_front.should_search_match("a\nb", "a"); + a_anchored_front.should_search_match("b\na", "a"); + a_anchored_front.should_search_match("\na", "a"); + a_anchored_front.should_search_fail("a", match_not_bol); + a_anchored_front.should_search_match("\na", "a", match_not_bol); + a_anchored_front.should_search_match("b\na", "a", match_not_bol); + } + + { + test_regex a_anchored_back(&g_regexTester, "a$", syntax); + a_anchored_back.should_search_match("a", "a"); + a_anchored_back.should_search_match("\na", "a"); + a_anchored_back.should_search_match("b\na", "a"); + a_anchored_back.should_search_match("a\nb", "a"); + a_anchored_back.should_search_match("a\n", "a"); + a_anchored_back.should_search_fail("a", match_not_eol); + a_anchored_back.should_search_match("a\n", "a", match_not_eol); + a_anchored_back.should_search_match("a\nb", "a", match_not_eol); + } + } +} + +int main() { + test_VSO_225160_match_bol_flag(); + test_VSO_225160_match_eol_flag(); + test_gh_73(); + + return g_regexTester.result(); +} diff --git a/tests/std/tests/GH_000431_equal_memcmp_is_safe/test.compile.pass.cpp b/tests/std/tests/GH_000431_equal_memcmp_is_safe/test.compile.pass.cpp index 0d19218dbf..5dabfa6499 100644 --- a/tests/std/tests/GH_000431_equal_memcmp_is_safe/test.compile.pass.cpp +++ b/tests/std/tests/GH_000431_equal_memcmp_is_safe/test.compile.pass.cpp @@ -12,6 +12,12 @@ #include #include +#ifdef __clang__ +constexpr bool magic = true; +#else +constexpr bool magic = false; +#endif + using namespace std; #define STATIC_ASSERT(...) static_assert(__VA_ARGS__, #__VA_ARGS__) @@ -137,6 +143,22 @@ struct StatefulDerived2 : EmptyBase, StatefulBase {}; struct StatefulPrivatelyDerived2 : private EmptyBase, private StatefulBase {}; +#if _HAS_CXX20 +struct DefaultComparison { + int i; + + bool operator==(const DefaultComparison&) const noexcept = default; +}; + +struct DefaultComparisonOddSize { + int i; + int j; + int k; + + bool operator==(const DefaultComparisonOddSize&) const noexcept = default; +}; +#endif // _HAS_CXX20 + #ifdef __cpp_lib_is_pointer_interconvertible STATIC_ASSERT(is_pointer_interconvertible_base_of_v); STATIC_ASSERT(is_pointer_interconvertible_base_of_v); @@ -485,21 +507,34 @@ STATIC_ASSERT(test_equal_memcmp_is_safe_for_types, void (*)(int), void*>()); STATIC_ASSERT(test_equal_memcmp_is_safe_for_types, void*, void (*)(int)>()); -// Don't allow member object pointers -STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); -STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); +// Don't allow member object pointers, unless magic happens +STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); +STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); -// Don't allow member function pointers -STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); -STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); +// Don't allow member function pointers, unless magic happens +STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); +STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); // Don't allow user-defined types STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); +#if _HAS_CXX20 +// Don't allow user-defined types with default comparison, unless magic happens +STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); +STATIC_ASSERT(test_equal_memcmp_is_safe_for_types()); +// The only difference between _Equal_memcmp_is_safe and _Vector_alg_in_search_is_safe is how the magic works +STATIC_ASSERT(_Equal_memcmp_is_safe> == magic); +STATIC_ASSERT(_Equal_memcmp_is_safe> == magic); +#if _USE_STD_VECTOR_ALGORITHMS +STATIC_ASSERT(_Vector_alg_in_search_is_safe> == magic); +STATIC_ASSERT(!_Vector_alg_in_search_is_safe>); +#endif // _USE_STD_VECTOR_ALGORITHMS +#endif // _HAS_CXX20 + // Test _Std_char_traits_eq STATIC_ASSERT(test_equal_memcmp_is_safe_for_pred>()); STATIC_ASSERT(test_equal_memcmp_is_safe_for_pred>()); diff --git a/tests/std/tests/GH_001103_countl_zero_correctness/test.cpp b/tests/std/tests/GH_001103_countl_zero_correctness/test.cpp index b2465871e1..d9b156c5f8 100644 --- a/tests/std/tests/GH_001103_countl_zero_correctness/test.cpp +++ b/tests/std/tests/GH_001103_countl_zero_correctness/test.cpp @@ -12,7 +12,7 @@ using namespace std; int main() { // This test is applicable only to x86 and x64 platforms -#if defined(_M_IX86) || defined(_M_X64) +#if (defined(_M_IX86) && !defined(_M_HYBRID_X86_ARM64)) || (defined(_M_X64) && !defined(_M_ARM64EC)) assert(_Countl_zero_bsr(static_cast(0x00)) == 8); assert(_Countl_zero_bsr(static_cast(0x13)) == 3); assert(_Countl_zero_bsr(static_cast(0x83)) == 0); @@ -67,5 +67,5 @@ int main() { assert(_Countr_zero_bsf(static_cast(0x8000'0000'0000'0002)) == 1); assert(_Countr_zero_bsf(static_cast(0x8000'0000'0000'0000)) == 63); assert(_Countr_zero_bsf(static_cast(0xF000'0000'0000'0008)) == 3); -#endif // ^^^ defined(_M_IX86) || defined(_M_X64) ^^^ +#endif // ^^^ (defined(_M_IX86) && !defined(_M_HYBRID_X86_ARM64)) || (defined(_M_X64) && !defined(_M_ARM64EC)) ^^^ } diff --git a/tests/std/tests/GH_002206_unreserved_names/test.compile.pass.cpp b/tests/std/tests/GH_002206_unreserved_names/test.compile.pass.cpp index 302200fc3e..781f42bb38 100644 --- a/tests/std/tests/GH_002206_unreserved_names/test.compile.pass.cpp +++ b/tests/std/tests/GH_002206_unreserved_names/test.compile.pass.cpp @@ -3,6 +3,7 @@ #define ISA_AVAILABILITY delete #define error_parse delete +#define error_syntax delete #define nsec delete #define sec delete #define xtime delete diff --git a/tests/std/tests/GH_004686_vectorization_on_trivial_assignability/env.lst b/tests/std/tests/GH_004686_vectorization_on_trivial_assignability/env.lst new file mode 100644 index 0000000000..19f025bd0e --- /dev/null +++ b/tests/std/tests/GH_004686_vectorization_on_trivial_assignability/env.lst @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +RUNALL_INCLUDE ..\usual_matrix.lst diff --git a/tests/std/tests/GH_004686_vectorization_on_trivial_assignability/test.cpp b/tests/std/tests/GH_004686_vectorization_on_trivial_assignability/test.cpp new file mode 100644 index 0000000000..370291f437 --- /dev/null +++ b/tests/std/tests/GH_004686_vectorization_on_trivial_assignability/test.cpp @@ -0,0 +1,256 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include + +#if _HAS_CXX20 +#define CONSTEXPR20 constexpr +#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv +#define CONSTEXPR20 inline +#endif // ^^^ !_HAS_CXX20 ^^^ + +using namespace std; + +struct Cat {}; +struct Leopard : Cat { + int spots_; + + Leopard() = default; + Leopard(const Leopard&) = default; + Leopard(Leopard&&) = default; + Leopard& operator=(Leopard&) && = delete; + using Cat::operator=; +}; + +constexpr pair expected_results[]{{5, 6}, {3, 4}, {1, 2}}; + +CONSTEXPR20 void test_reverse_copy() { + { + pair src[] = {{1, 2}, {3, 4}, {5, 6}}; + pair dst[] = {{3, 1}, {4, 1}, {5, 9}}; + pair srcref[] = { + {src[0].first, src[0].second}, {src[1].first, src[1].second}, {src[2].first, src[2].second}}; + pair dstref[] = { + {dst[0].first, dst[0].second}, {dst[1].first, dst[1].second}, {dst[2].first, dst[2].second}}; + + reverse_copy(begin(srcref), end(srcref), dstref); + assert(equal(begin(dst), end(dst), begin(expected_results), end(expected_results))); + } +#if _HAS_CXX20 + { + pair src[] = {{1, 2}, {3, 4}, {5, 6}}; + pair dst[] = {{3, 1}, {4, 1}, {5, 9}}; + pair srcref[] = { + {src[0].first, src[0].second}, {src[1].first, src[1].second}, {src[2].first, src[2].second}}; + pair dstref[] = { + {dst[0].first, dst[0].second}, {dst[1].first, dst[1].second}, {dst[2].first, dst[2].second}}; + + ranges::reverse_copy(srcref, dstref); + assert(ranges::equal(dst, expected_results)); + } +#endif // _HAS_CXX20 +#if _HAS_CXX23 + { + pair src[] = {{1, 2}, {3, 4}, {5, 6}}; + pair dst[] = {{3, 1}, {4, 1}, {5, 9}}; + pair srcref[] = {src[0], src[1], src[2]}; + pair dstref[] = {dst[0], dst[1], dst[2]}; + + reverse_copy(begin(srcref), end(srcref), dstref); + assert(equal(begin(dst), end(dst), begin(expected_results), end(expected_results))); + } + { + pair src[] = {{1, 2}, {3, 4}, {5, 6}}; + pair dst[] = {{3, 1}, {4, 1}, {5, 9}}; + pair srcref[] = {src[0], src[1], src[2]}; + pair dstref[] = {dst[0], dst[1], dst[2]}; + + ranges::reverse_copy(srcref, dstref); + assert(ranges::equal(dst, expected_results)); + } +#endif // _HAS_CXX23 +} + +constexpr Leopard make_leopard(const int n) noexcept { + Leopard result{}; + result.spots_ = n; + return result; +} + +CONSTEXPR20 void test_copy_move_leopards() { + constexpr Leopard expected_leopards[]{ + make_leopard(3), make_leopard(1), make_leopard(4), make_leopard(1), make_leopard(5), make_leopard(9)}; + constexpr Leopard zero_leopards[6]{}; + auto equal_leopard = [](const Leopard& lhs, const Leopard& rhs) { return lhs.spots_ == rhs.spots_; }; + { + Leopard dst[]{ + make_leopard(3), make_leopard(1), make_leopard(4), make_leopard(1), make_leopard(5), make_leopard(9)}; + copy(begin(zero_leopards), end(zero_leopards), begin(dst)); + assert(equal(begin(dst), end(dst), begin(expected_leopards), end(expected_leopards), equal_leopard)); + } + { + Leopard dst[]{ + make_leopard(3), make_leopard(1), make_leopard(4), make_leopard(1), make_leopard(5), make_leopard(9)}; + copy_n(begin(zero_leopards), size(zero_leopards), begin(dst)); + assert(equal(begin(dst), end(dst), begin(expected_leopards), end(expected_leopards), equal_leopard)); + } + { + Leopard dst[]{ + make_leopard(3), make_leopard(1), make_leopard(4), make_leopard(1), make_leopard(5), make_leopard(9)}; + copy_backward(begin(zero_leopards), end(zero_leopards), end(dst)); + assert(equal(begin(dst), end(dst), begin(expected_leopards), end(expected_leopards), equal_leopard)); + } + { + Leopard dst[]{ + make_leopard(3), make_leopard(1), make_leopard(4), make_leopard(1), make_leopard(5), make_leopard(9)}; + move(begin(zero_leopards), end(zero_leopards), begin(dst)); + assert(equal(begin(dst), end(dst), begin(expected_leopards), end(expected_leopards), equal_leopard)); + } + { + Leopard dst[]{ + make_leopard(3), make_leopard(1), make_leopard(4), make_leopard(1), make_leopard(5), make_leopard(9)}; + move_backward(begin(zero_leopards), end(zero_leopards), end(dst)); + assert(equal(begin(dst), end(dst), begin(expected_leopards), end(expected_leopards), equal_leopard)); + } +#if _HAS_CXX20 + { + Leopard dst[]{ + make_leopard(3), make_leopard(1), make_leopard(4), make_leopard(1), make_leopard(5), make_leopard(9)}; + ranges::copy(zero_leopards, dst); + assert(ranges::equal(dst, expected_leopards, equal_leopard)); + } + { + Leopard dst[]{ + make_leopard(3), make_leopard(1), make_leopard(4), make_leopard(1), make_leopard(5), make_leopard(9)}; + ranges::copy_n(ranges::begin(zero_leopards), ranges::distance(zero_leopards), dst); + assert(ranges::equal(dst, expected_leopards, equal_leopard)); + } + { + Leopard dst[]{ + make_leopard(3), make_leopard(1), make_leopard(4), make_leopard(1), make_leopard(5), make_leopard(9)}; + ranges::copy_backward(zero_leopards, ranges::end(dst)); + assert(ranges::equal(dst, expected_leopards, equal_leopard)); + } + { + Leopard dst[]{ + make_leopard(3), make_leopard(1), make_leopard(4), make_leopard(1), make_leopard(5), make_leopard(9)}; + ranges::move(zero_leopards, dst); + assert(ranges::equal(dst, expected_leopards, equal_leopard)); + } + { + Leopard dst[]{ + make_leopard(3), make_leopard(1), make_leopard(4), make_leopard(1), make_leopard(5), make_leopard(9)}; + ranges::move_backward(zero_leopards, ranges::end(dst)); + assert(ranges::equal(dst, expected_leopards, equal_leopard)); + } +#endif // _HAS_CXX20 +} + +// Pedantically, all of MSVC, Clang, and EDG are currently wrong on this, see LLVM-37038. +// However, if compilers get corrected, the assignment operators of `DerivedLeopard` and `LeopardHouse` will be trivial +// but no-op, and the library side can't correctly conclude that assignments for them shouldn't be vectorized. +// As a result, we keep this as a regression test. +CONSTEXPR20 void test_llvm_37038() { + struct DerivedLeopard : Leopard {}; + static_assert(is_trivially_move_assignable_v, ""); + + auto make_derived_leopard = [](int n) { + DerivedLeopard ret{}; + ret.spots_ = n; + return ret; + }; + + auto equal_derived = [](const DerivedLeopard& lhs, const DerivedLeopard& rhs) { return lhs.spots_ == rhs.spots_; }; + + { + DerivedLeopard src[]{ + make_derived_leopard(1), make_derived_leopard(7), make_derived_leopard(2), make_derived_leopard(9)}; + DerivedLeopard dst[4]{}; + move(begin(src), end(src), dst); + assert(equal(begin(dst), end(dst), begin(src), end(src), equal_derived)); + } + { + DerivedLeopard src[]{ + make_derived_leopard(1), make_derived_leopard(7), make_derived_leopard(2), make_derived_leopard(9)}; + DerivedLeopard dst[4]{}; + move_backward(begin(src), end(src), end(dst)); + assert(equal(begin(dst), end(dst), begin(src), end(src), equal_derived)); + } +#if _HAS_CXX20 + { + DerivedLeopard src[]{ + make_derived_leopard(1), make_derived_leopard(7), make_derived_leopard(2), make_derived_leopard(9)}; + DerivedLeopard dst[4]{}; + ranges::move(src, dst); + assert(ranges::equal(src, dst, equal_derived)); + } + { + DerivedLeopard src[]{ + make_derived_leopard(1), make_derived_leopard(7), make_derived_leopard(2), make_derived_leopard(9)}; + DerivedLeopard dst[4]{}; + ranges::move_backward(src, ranges::end(dst)); + assert(ranges::equal(src, dst, equal_derived)); + } +#endif // _HAS_CXX20 + + struct LeopardHouse { + Leopard bigcat_; + }; + static_assert(is_trivially_move_assignable_v, ""); + + auto make_leopard_house = [](int n) { + LeopardHouse ret{}; + ret.bigcat_.spots_ = n; + return ret; + }; + + auto equal_house = [](const LeopardHouse& lhs, const LeopardHouse& rhs) { + return lhs.bigcat_.spots_ == rhs.bigcat_.spots_; + }; + + { + LeopardHouse src[]{make_leopard_house(1), make_leopard_house(7), make_leopard_house(2), make_leopard_house(9)}; + LeopardHouse dst[4]{}; + move(begin(src), end(src), dst); + assert(equal(begin(dst), end(dst), begin(src), end(src), equal_house)); + } + { + LeopardHouse src[]{make_leopard_house(1), make_leopard_house(7), make_leopard_house(2), make_leopard_house(9)}; + LeopardHouse dst[4]{}; + move_backward(begin(src), end(src), end(dst)); + assert(equal(begin(dst), end(dst), begin(src), end(src), equal_house)); + } +#if _HAS_CXX20 + { + LeopardHouse src[]{make_leopard_house(1), make_leopard_house(7), make_leopard_house(2), make_leopard_house(9)}; + LeopardHouse dst[4]{}; + ranges::move(src, dst); + assert(ranges::equal(src, dst, equal_house)); + } + { + LeopardHouse src[]{make_leopard_house(1), make_leopard_house(7), make_leopard_house(2), make_leopard_house(9)}; + LeopardHouse dst[4]{}; + ranges::move_backward(src, ranges::end(dst)); + assert(ranges::equal(src, dst, equal_house)); + } +#endif // _HAS_CXX20 +} + +CONSTEXPR20 bool test() { + test_reverse_copy(); + test_copy_move_leopards(); + test_llvm_37038(); + + return true; +} + +#if _HAS_CXX20 +static_assert(test()); +#endif // _HAS_CXX20 + +int main() { + test(); +} diff --git a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp index 7747befd70..7e66ca640a 100644 --- a/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp +++ b/tests/std/tests/GH_005244_regex_escape_sequences/test.cpp @@ -25,9 +25,7 @@ class test_regex_traits { using char_class_type = typename rx_traits::char_class_type; // TRANSITION, GH-995 - using _Uelem = typename rx_traits::_Uelem; - static constexpr auto _Ch_upper = rx_traits::_Ch_upper; - static constexpr auto _Ch_alpha = rx_traits::_Ch_alpha; + using _Uelem = typename rx_traits::_Uelem; test_regex_traits() = default; @@ -193,6 +191,11 @@ void test_gh_5244_atomescape_ecmascript() { g_regexTester.should_not_match("ca", R"(\ca)", ECMAScript); g_regexTester.should_throw(R"(\c0)", error_escape, ECMAScript); g_regexTester.should_throw(R"(\c)", error_escape, ECMAScript); + g_regexTester.should_throw(R"(\c@)", error_escape, ECMAScript); + g_regexTester.should_throw(R"(\c[)", error_escape, ECMAScript); + g_regexTester.should_throw(R"(\c`)", error_escape, ECMAScript); + g_regexTester.should_throw(R"(\c{)", error_escape, ECMAScript); + g_regexTester.should_throw(L"\\c\u00C0", error_escape, ECMAScript); // U+00C0 LATIN CAPITAL LETTER A WITH GRAVE // AtomEscape :: CharacterEscape :: HexEscapeSequence g_regexTester.should_match("\x00"s, R"(\x00)", ECMAScript); @@ -632,22 +635,15 @@ void test_gh_5244_classescape_posix_not_awk(syntax_option_type option) { check_classescape_noescape("?", option); check_classescape_noescape("|", option); - // TRANSITION, GH-5379 - if (option & (extended | egrep)) { - check_classescape_noescape("(", option); - check_classescape_noescape(")", option); - check_classescape_noescape("{", option); - } + check_classescape_noescape("(", option); + check_classescape_noescape(")", option); + check_classescape_noescape("{", option); // closing characters that are not considered special g_regexTester.should_match("\\]", R"([\]])", option); g_regexTester.should_not_match("]", R"([\]])", option); g_regexTester.should_not_match("\\", R"([\]])", option); - - // TRANSITION, GH-5379 - if (option & (extended | egrep)) { - check_classescape_noescape("}", option); - } + check_classescape_noescape("}", option); // awk escape sequences check_classescape_noescape("a", option); @@ -882,8 +878,28 @@ void test_gh_5244() { test_gh_5244_classescape_awk(); } +void test_gh_5379() { + // GH-5379: Backslashes in character classes are sometimes not matched in basic regular expressions + + // Correct handling of these backslashes at the beginning of a character class is already covered by GH-5244 tests. + // The following tests check that backslashes are handled correctly immediately after a bracketed character class. + for (syntax_option_type syntax : {basic, grep}) { + g_regexTester.should_throw(R"([a]\b)", error_escape, syntax); + g_regexTester.should_match("a[b]", R"([a]\[b])", syntax); + g_regexTester.should_match("a", R"(\([a]\))", syntax); + g_regexTester.should_match("ab", R"([a]\(b\))", syntax); + g_regexTester.should_match("a", R"([a]\{1\})", syntax); + g_regexTester.should_throw(R"([a]\})", error_brace, syntax); + + // also check handling of identity escape "\]", + // which is supported as an extension following more recent POSIX standards + g_regexTester.should_match("a]", R"([a]\])", syntax); + } +} + int main() { test_gh_5244(); + test_gh_5379(); return g_regexTester.result(); } diff --git a/tests/std/tests/GH_005553_regex_character_translation/env.lst b/tests/std/tests/GH_005553_regex_character_translation/env.lst new file mode 100644 index 0000000000..19f025bd0e --- /dev/null +++ b/tests/std/tests/GH_005553_regex_character_translation/env.lst @@ -0,0 +1,4 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +RUNALL_INCLUDE ..\usual_matrix.lst diff --git a/tests/std/tests/GH_005553_regex_character_translation/test.cpp b/tests/std/tests/GH_005553_regex_character_translation/test.cpp new file mode 100644 index 0000000000..33c5521ad4 --- /dev/null +++ b/tests/std/tests/GH_005553_regex_character_translation/test.cpp @@ -0,0 +1,251 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include + +#include + +using namespace std; +using namespace std::regex_constants; + +regex_fixture g_regexTester; + +template +class nonidempotent_translate_regex_traits : private regex_traits { +private: + using rx_traits = regex_traits; + +public: + using char_type = typename rx_traits::char_type; + using string_type = typename rx_traits::string_type; + using locale_type = typename rx_traits::locale_type; + using char_class_type = typename rx_traits::char_class_type; + using uchar_type = make_unsigned_t; + + // TRANSITION, GH-995 + using _Uelem = typename rx_traits::_Uelem; + + nonidempotent_translate_regex_traits() = default; + + using rx_traits::length; + + charT translate(const charT c) const { + return static_cast(static_cast(rx_traits::translate(c)) / 2U); + } + + + charT translate_nocase(const charT c) const { + return static_cast(static_cast(rx_traits::translate_nocase(c)) / 2U); + } + + using rx_traits::getloc; + using rx_traits::imbue; + using rx_traits::isctype; + using rx_traits::lookup_classname; + using rx_traits::lookup_collatename; + using rx_traits::transform; + using rx_traits::transform_primary; + using rx_traits::value; +}; + +template +void check_match(const wstring& subject, const wstring& pattern, const Rx& re, match_flag_type flags = match_default, + bool matches = true) { + if (regex_match(subject, re, flags) != matches) { + wprintf(LR"(Expected regex_match("%s", regex("%s", 0x%X)) to be %s.)", subject.c_str(), pattern.c_str(), + static_cast(re.flags()), matches ? L"true" : L"false"); + g_regexTester.fail_regex(); + } +} + +template +void check_no_match( + const wstring& subject, const wstring& pattern, const Rx& re, match_flag_type flags = match_default) { + check_match(subject, pattern, re, flags, false); +} + +template +void check_search_match(const wstring& subject, const wstring& expected, const wstring& pattern, const Rx& re, + match_flag_type flags = match_default) { + wsmatch match; + const bool search_result = regex_search(subject, match, re, flags); + if (!search_result || match[0] != expected) { + wprintf(LR"(Expected regex_search("%s", regex("%s", 0x%X), 0x%X) to find "%s", )", subject.c_str(), + pattern.c_str(), static_cast(re.flags()), static_cast(flags), expected.c_str()); + if (search_result) { + wprintf(LR"(but it matched "%s")" + L"\n", + match.str().c_str()); + } else { + puts("but it failed to match"); + } + g_regexTester.fail_regex(); + } +} + +template +void check_search_fail( + const wstring& subject, const wstring& pattern, const Rx& re, match_flag_type flags = match_default) { + wsmatch match; + const bool search_result = regex_search(subject, match, re, flags); + if (search_result) { + wprintf(LR"(Expected regex_search("%s", regex("%s", 0x%X), 0x%X) to not match, but it found "%s")" + L"\n", + subject.c_str(), pattern.c_str(), static_cast(re.flags()), static_cast(flags), + match.str().c_str()); + g_regexTester.fail_regex(); + } +} + +void test_gh_5553() { + // GH-5553 ``: Correct character translation in `icase` and `collate` mode + { + wstring pattern = L"g"; + basic_regex> charcompare_icase_pattern{pattern, icase}; + check_match(L"f", pattern, charcompare_icase_pattern); + check_match(L"F", pattern, charcompare_icase_pattern); + check_match(L"g", pattern, charcompare_icase_pattern); + check_match(L"G", pattern, charcompare_icase_pattern); + check_no_match(L"e", pattern, charcompare_icase_pattern); + check_no_match(L"E", pattern, charcompare_icase_pattern); + check_no_match(L"h", pattern, charcompare_icase_pattern); + check_no_match(L"H", pattern, charcompare_icase_pattern); + + check_search_match(L"abcdefghijklmnopqrstuvwxyz", L"f", pattern, charcompare_icase_pattern); + check_search_match(L"ABCDEFGHIJKLMNOPQRSTUVWXYZ", L"F", pattern, charcompare_icase_pattern); + check_search_match(L"zyxwvutsrqponmlkjihgfedcba", L"g", pattern, charcompare_icase_pattern); + check_search_match(L"ZYXWVUTSRQPONMLKJIHGFEDCBA", L"G", pattern, charcompare_icase_pattern); + check_search_fail(L"zyxwvutsrqponmlkjihedcba", pattern, charcompare_icase_pattern); + check_search_fail(L"ABCDEHIJKLMNOPQRSTUVWXYZ", pattern, charcompare_icase_pattern); + } + + { + wstring pattern = L"g"; + basic_regex> charcompare_collate_pattern{ + pattern, regex_constants::collate}; + check_match(L"f", pattern, charcompare_collate_pattern); + check_no_match(L"F", pattern, charcompare_collate_pattern); + check_match(L"g", pattern, charcompare_collate_pattern); + check_no_match(L"G", pattern, charcompare_collate_pattern); + check_no_match(L"e", pattern, charcompare_collate_pattern); + check_no_match(L"E", pattern, charcompare_collate_pattern); + check_no_match(L"h", pattern, charcompare_collate_pattern); + check_no_match(L"H", pattern, charcompare_collate_pattern); + + check_search_match(L"abcdefghijklmnopqrstuvwxyz", L"f", pattern, charcompare_collate_pattern); + check_search_fail(L"ABCDEFGHIJKLMNOPQRSTUVWXYZ", pattern, charcompare_collate_pattern); + check_search_match(L"zyxwvutsrqponmlkjihgfedcba", L"g", pattern, charcompare_collate_pattern); + check_search_fail(L"ZYXWVUTSRQPONMLKJIHGFEDCBA", pattern, charcompare_collate_pattern); + check_search_fail(L"zyxwvutsrqponmlkjihedcba", pattern, charcompare_collate_pattern); + } + + { + wstring pattern = L"[g]"; + basic_regex> charclasscompare_icase_pattern{ + pattern, icase}; + check_match(L"f", pattern, charclasscompare_icase_pattern); + check_match(L"F", pattern, charclasscompare_icase_pattern); + check_match(L"g", pattern, charclasscompare_icase_pattern); + check_match(L"G", pattern, charclasscompare_icase_pattern); + check_no_match(L"e", pattern, charclasscompare_icase_pattern); + check_no_match(L"E", pattern, charclasscompare_icase_pattern); + check_no_match(L"h", pattern, charclasscompare_icase_pattern); + check_no_match(L"H", pattern, charclasscompare_icase_pattern); + } + + { + wstring pattern = L"[g]"; + basic_regex> charclasscompare_collate_pattern{ + pattern, regex_constants::collate}; + check_match(L"f", pattern, charclasscompare_collate_pattern); + check_no_match(L"F", pattern, charclasscompare_collate_pattern); + check_match(L"g", pattern, charclasscompare_collate_pattern); + check_no_match(L"G", pattern, charclasscompare_collate_pattern); + check_no_match(L"e", pattern, charclasscompare_collate_pattern); + check_no_match(L"E", pattern, charclasscompare_collate_pattern); + check_no_match(L"h", pattern, charclasscompare_collate_pattern); + check_no_match(L"H", pattern, charclasscompare_collate_pattern); + } + + { + wstring pattern = L"[g-i]"; + basic_regex> charrangecompare_icase_pattern{ + pattern, icase}; + check_match(L"f", pattern, charrangecompare_icase_pattern); + check_match(L"F", pattern, charrangecompare_icase_pattern); + check_match(L"g", pattern, charrangecompare_icase_pattern); + check_match(L"G", pattern, charrangecompare_icase_pattern); + check_match(L"i", pattern, charrangecompare_icase_pattern); + check_match(L"I", pattern, charrangecompare_icase_pattern); + check_no_match(L"e", pattern, charrangecompare_icase_pattern); + check_no_match(L"E", pattern, charrangecompare_icase_pattern); + check_no_match(L"j", pattern, charrangecompare_icase_pattern); + check_no_match(L"J", pattern, charrangecompare_icase_pattern); + } + + { + wstring pattern = L"[g-i]"; + basic_regex> charrangecompare_collate_pattern{ + pattern, regex_constants::collate}; + check_match(L"f", pattern, charrangecompare_collate_pattern); + check_no_match(L"F", pattern, charrangecompare_collate_pattern); + check_match(L"g", pattern, charrangecompare_collate_pattern); + check_no_match(L"G", pattern, charrangecompare_collate_pattern); + check_match(L"i", pattern, charrangecompare_collate_pattern); + check_no_match(L"I", pattern, charrangecompare_collate_pattern); + check_no_match(L"e", pattern, charrangecompare_collate_pattern); + check_no_match(L"E", pattern, charrangecompare_collate_pattern); + check_no_match(L"j", pattern, charrangecompare_collate_pattern); + check_no_match(L"J", pattern, charrangecompare_collate_pattern); + } + + { + wstring pattern = L"[\u022d-\u022f]"; // U+022D LATIN SMALL LETTER O WITH TILDE AND MACRON + // U+022F LATIN SMALL LETTER O WITH DOT ABOVE + + basic_regex> small_unicode_charrange_pattern{ + pattern, regex_constants::icase}; + // U+022C LATIN CAPITAL LETTER O WITH TILDE AND MACRON + check_match(L"\u022c", pattern, small_unicode_charrange_pattern); + // U+022D LATIN SMALL LETTER O WITH TILDE AND MACRON + check_match(L"\u022d", pattern, small_unicode_charrange_pattern); + // U+022E LATIN CAPITAL LETTER O WITH DOT ABOVE + check_match(L"\u022e", pattern, small_unicode_charrange_pattern); + // U+022F LATIN SMALL LETTER O WITH DOT ABOVE + check_match(L"\u022f", pattern, small_unicode_charrange_pattern); + // U+022B LATIN SMALL LETTER O WITH DIAERESIS AND MACRON + check_no_match(L"\u022b", pattern, small_unicode_charrange_pattern); + // U+0230 LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON + check_no_match(L"\u0230", pattern, small_unicode_charrange_pattern); + } + + { + wstring pattern = L"[\u022b-\u0230]"; // U+022B LATIN SMALL LETTER O WITH DIAERESIS AND MACRON + // U+0230 LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON + + basic_regex> big_unicode_charrange_pattern{ + pattern, regex_constants::icase}; + // U+022A LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON + check_match(L"\u022a", pattern, big_unicode_charrange_pattern); + // U+022B LATIN SMALL LETTER O WITH DIAERESIS AND MACRON + check_match(L"\u022b", pattern, big_unicode_charrange_pattern); + // U+0230 LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON + check_match(L"\u0230", pattern, big_unicode_charrange_pattern); + // U+0231 LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON + check_match(L"\u0231", pattern, big_unicode_charrange_pattern); + // U+0229 LATIN SMALL LETTER E WITH CEDILLA + check_no_match(L"\u0229", pattern, big_unicode_charrange_pattern); + // U+0232 LATIN CAPITAL LETTER Y WITH MACRON + check_no_match(L"\u0232", pattern, big_unicode_charrange_pattern); + } +} + +int main() { + test_gh_5553(); + + return g_regexTester.result(); +} diff --git a/tests/std/tests/P0355R7_calendars_and_time_zones_time_zones/test.cpp b/tests/std/tests/P0355R7_calendars_and_time_zones_time_zones/test.cpp index dc509bea95..fdfa456263 100644 --- a/tests/std/tests/P0355R7_calendars_and_time_zones_time_zones/test.cpp +++ b/tests/std/tests/P0355R7_calendars_and_time_zones_time_zones/test.cpp @@ -393,6 +393,15 @@ void timezone_precision_test() { } } +void timezone_sorted_vectors_test() { + // N5008 [time.zone.db.tzdb]/1: "Each vector in a tzdb object is sorted to enable fast lookup." + const auto& my_tzdb = get_tzdb(); + + assert(ranges::is_sorted(my_tzdb.zones)); + assert(ranges::is_sorted(my_tzdb.links)); + assert(ranges::is_sorted(my_tzdb.leap_seconds)); +} + void test() { timezone_tzdb_list_test(); timezone_version_test(); @@ -401,6 +410,7 @@ void test() { timezone_to_local_test(); timezone_local_info_test(); timezone_precision_test(); + timezone_sorted_vectors_test(); } int main() { diff --git a/tests/std/tests/P0466R5_layout_compatibility_and_pointer_interconvertibility_traits/test.cpp b/tests/std/tests/P0466R5_layout_compatibility_and_pointer_interconvertibility_traits/test.cpp index 1500983e01..2f8223c3ce 100644 --- a/tests/std/tests/P0466R5_layout_compatibility_and_pointer_interconvertibility_traits/test.cpp +++ b/tests/std/tests/P0466R5_layout_compatibility_and_pointer_interconvertibility_traits/test.cpp @@ -199,9 +199,7 @@ constexpr bool test() { ASSERT(!is_corresponding_member(&S5::v1, &S6::v2)); ASSERT(!is_corresponding_member(&S5::v2, &S6::v1)); ASSERT(!is_corresponding_member(&S5::v3, &S6::v3)); -#ifndef _M_CEE // TRANSITION, VSO-2417635 ASSERT(!is_corresponding_member(&NS::v1, &NS::w1)); -#endif // ^^^ no workaround ^^^ ASSERT(!is_corresponding_member(&S7::f1, &S7::f1)); ASSERT(!is_corresponding_member(static_cast(nullptr), static_cast(nullptr))); ASSERT(!is_corresponding_member(&S1::v1, static_cast(nullptr))); @@ -236,9 +234,7 @@ constexpr bool test() { ASSERT(is_pointer_interconvertible_with_class(&U::v2)); ASSERT(!is_pointer_interconvertible_with_class(&NS::a)); -#ifndef _M_CEE // TRANSITION, VSO-2417635 ASSERT(!is_pointer_interconvertible_with_class(&NS::b)); -#endif // ^^^ no workaround ^^^ ASSERT(!is_pointer_interconvertible_with_class(&C::f1)); ASSERT(!is_pointer_interconvertible_with_class(static_cast(nullptr))); } diff --git a/tests/std/tests/P0896R4_P1614R2_comparisons/test.cpp b/tests/std/tests/P0896R4_P1614R2_comparisons/test.cpp index 9582b548a2..baef549e32 100644 --- a/tests/std/tests/P0896R4_P1614R2_comparisons/test.cpp +++ b/tests/std/tests/P0896R4_P1614R2_comparisons/test.cpp @@ -425,13 +425,8 @@ constexpr void ordering_test_cases() { derived const some_deriveds[2] = {}; test_strongly_ordered(&some_deriveds[0], &some_deriveds[1]); -#if !defined(__clang__) && !defined(__EDG__) // TRANSITION, VSO-1168721 - if (!std::is_constant_evaluated()) -#endif // ^^^ workaround ^^^ - { - test_strongly_ordered(static_cast(&some_deriveds[0]), &some_deriveds[1]); - test_strongly_ordered(&some_deriveds[0], static_cast(&some_deriveds[1])); - } + test_strongly_ordered(static_cast(&some_deriveds[0]), &some_deriveds[1]); + test_strongly_ordered(&some_deriveds[0], static_cast(&some_deriveds[1])); if (!std::is_constant_evaluated()) { test_strongly_ordered(&some_ints[0], static_cast(&some_ints[1])); diff --git a/tests/std/tests/P1208R6_source_location/header.h b/tests/std/tests/P1208R6_source_location/header.h index 36eb6bfcce..8fe4f8d046 100644 --- a/tests/std/tests/P1208R6_source_location/header.h +++ b/tests/std/tests/P1208R6_source_location/header.h @@ -18,11 +18,11 @@ constexpr void header_test() { assert(x.column() == 37); #endif // ^^^ C1XX ^^^ #if _USE_DETAILED_FUNCTION_NAME_IN_SOURCE_LOCATION -#ifdef __EDG__ - assert(x.function_name() == "void header_test()"sv); -#else // ^^^ EDG / Other vvv +#ifdef __EDG__ // TRANSITION, EDG is changing to match C1XX's output + assert(x.function_name() == "void __cdecl header_test(void)"sv || x.function_name() == "void header_test()"sv); +#else // ^^^ workaround / no workaround vvv assert(x.function_name() == "void __cdecl header_test(void)"sv); -#endif // ^^^ Other ^^^ +#endif // ^^^ no workaround ^^^ #else // ^^^ detailed / basic vvv assert(x.function_name() == "header_test"sv); #endif // ^^^ basic ^^^ diff --git a/tests/std/tests/P1208R6_source_location/test.cpp b/tests/std/tests/P1208R6_source_location/test.cpp index e578bcd620..6ed3f67d98 100644 --- a/tests/std/tests/P1208R6_source_location/test.cpp +++ b/tests/std/tests/P1208R6_source_location/test.cpp @@ -68,11 +68,11 @@ constexpr void local_test() { assert(x.column() == 37); #endif // ^^^ C1XX ^^^ #if _USE_DETAILED_FUNCTION_NAME_IN_SOURCE_LOCATION -#ifdef __EDG__ - assert(x.function_name() == "void local_test()"sv); -#else // ^^^ EDG / Other vvv +#ifdef __EDG__ // TRANSITION, EDG is changing to match C1XX's output + assert(x.function_name() == "void __cdecl local_test(void)"sv || x.function_name() == "void local_test()"sv); +#else // ^^^ workaround / no workaround vvv assert(x.function_name() == "void __cdecl local_test(void)"sv); -#endif // ^^^ Other ^^^ +#endif // ^^^ no workaround ^^^ #else // ^^^ detailed / basic vvv assert(x.function_name() == "local_test"sv); #endif // ^^^ basic ^^^ @@ -84,11 +84,11 @@ constexpr void argument_test( assert(x.line() == line); assert(x.column() == column); #if _USE_DETAILED_FUNCTION_NAME_IN_SOURCE_LOCATION -#ifdef __EDG__ - assert(x.function_name() == "bool test()"sv); -#else // ^^^ EDG / Other vvv +#ifdef __EDG__ // TRANSITION, EDG is changing to match C1XX's output + assert(x.function_name() == "bool __cdecl test(void)"sv || x.function_name() == "bool test()"sv); +#else // ^^^ workaround / no workaround vvv assert(x.function_name() == "bool __cdecl test(void)"sv); -#endif // ^^^ Other ^^^ +#endif // ^^^ no workaround ^^^ #else // ^^^ detailed / basic vvv assert(x.function_name() == "test"sv); #endif // ^^^ basic ^^^ @@ -110,11 +110,12 @@ constexpr void sloc_constructor_test() { } else #endif // ^^^ workaround ^^^ { -#ifdef __EDG__ - assert(x.loc.function_name() == "void sloc_constructor_test()"sv); -#else // ^^^ EDG / Other vvv +#ifdef __EDG__ // TRANSITION, EDG is changing to match C1XX's output + assert(x.loc.function_name() == "void __cdecl sloc_constructor_test(void)"sv + || x.loc.function_name() == "void sloc_constructor_test()"sv); +#else // ^^^ workaround / no workaround vvv assert(x.loc.function_name() == "void __cdecl sloc_constructor_test(void)"sv); -#endif // ^^^ Other ^^^ +#endif // ^^^ no workaround ^^^ } #else // ^^^ detailed / basic vvv #if !defined(__clang__) && !defined(__EDG__) // TRANSITION, VSO-1285783 @@ -140,11 +141,11 @@ constexpr void different_constructor_test() { assert(x.loc.column() == 5); #endif // ^^^ C1XX ^^^ #if _USE_DETAILED_FUNCTION_NAME_IN_SOURCE_LOCATION -#ifdef __EDG__ - assert(x.loc.function_name() == "s::s(int)"sv); -#else // ^^^ EDG / Other vvv +#ifdef __EDG__ // TRANSITION, EDG is changing to almost match C1XX's output + assert(x.loc.function_name() == "__cdecl s::s(int)"sv || x.loc.function_name() == "s::s(int)"sv); +#else // ^^^ workaround / no workaround vvv assert(x.loc.function_name() == THISCALL_OR_CDECL " s::s(int)"sv); -#endif // ^^^ Other ^^^ +#endif // ^^^ no workaround ^^^ #else // ^^^ detailed / basic vvv assert(x.loc.function_name() == "s"sv); #endif // ^^^ basic ^^^ @@ -166,11 +167,12 @@ constexpr void sub_member_test() { } else #endif // ^^^ workaround ^^^ { -#ifdef __EDG__ - assert(s.x.loc.function_name() == "void sub_member_test()"sv); -#else // ^^^ EDG / Other vvv +#ifdef __EDG__ // TRANSITION, EDG is changing to match C1XX's output + assert(s.x.loc.function_name() == "void __cdecl sub_member_test(void)"sv + || s.x.loc.function_name() == "void sub_member_test()"sv); +#else // ^^^ workaround / no workaround vvv assert(s.x.loc.function_name() == "void __cdecl sub_member_test(void)"sv); -#endif // ^^^ Other ^^^ +#endif // ^^^ no workaround ^^^ } #else // ^^^ detailed / basic vvv #if !defined(__clang__) && !defined(__EDG__) // TRANSITION, VSO-1285783 @@ -194,11 +196,12 @@ constexpr void sub_member_test() { assert(s_i.x.loc.column() == 5); #endif // ^^^ C1XX ^^^ #if _USE_DETAILED_FUNCTION_NAME_IN_SOURCE_LOCATION -#ifdef __EDG__ - assert(s_i.x.loc.function_name() == "s2::s2(int)"sv); -#else // ^^^ EDG / Other vvv +#ifdef __EDG__ // TRANSITION, EDG is changing to match C1XX's output + assert(s_i.x.loc.function_name() == THISCALL_OR_CDECL " s2::s2(int)"sv + || s_i.x.loc.function_name() == "s2::s2(int)"sv); +#else // ^^^ workaround / no workaround vvv assert(s_i.x.loc.function_name() == THISCALL_OR_CDECL " s2::s2(int)"sv); -#endif // ^^^ Other ^^^ +#endif // ^^^ no workaround ^^^ #else // ^^^ detailed / basic vvv assert(s_i.x.loc.function_name() == "s2"sv); #endif // ^^^ basic ^^^ @@ -223,11 +226,11 @@ constexpr void lambda_test() { assert(x2.column() == 50); #endif // ^^^ C1XX ^^^ #if _USE_DETAILED_FUNCTION_NAME_IN_SOURCE_LOCATION -#ifdef __EDG__ - assert(x1.function_name() == "void lambda_test()"sv); -#else // ^^^ EDG / Other vvv +#ifdef __EDG__ // TRANSITION, EDG is changing to match C1XX's output + assert(x1.function_name() == "void __cdecl lambda_test(void)"sv || x1.function_name() == "void lambda_test()"sv); +#else // ^^^ workaround / no workaround vvv assert(x1.function_name() == "void __cdecl lambda_test(void)"sv); -#endif // ^^^ Other ^^^ +#endif // ^^^ no workaround ^^^ #else // ^^^ detailed / basic vvv assert(x1.function_name() == "lambda_test"sv); #endif // ^^^ basic ^^^ @@ -241,7 +244,9 @@ constexpr void lambda_test() { #elif defined(__clang__) // ^^^ basic / detailed Clang vvv assert(fun2 == "auto " THISCALL_OR_CDECL " lambda_test()::(anonymous class)::operator()(void) const"sv); #elif defined(__EDG__) // ^^^ detailed Clang / detailed __EDG__ vvv - assert(fun2 == "lambda []()->auto::operator()()->auto"sv); + // TRANSITION, EDG is changing to resemble C1XX's output somewhat more closely + assert(fun2 == THISCALL_OR_CDECL " lambda [](void)->auto::operator()(void)->auto"sv + || fun2 == "lambda []()->auto::operator()()->auto"sv); #else // ^^^ detailed __EDG__ / detailed C1XX vvv assert(fun2.starts_with("struct std::source_location " THISCALL_OR_CDECL " lambda_test::()"sv); + // TRANSITION, EDG is changing to almost match C1XX's output + assert(x1.function_name() == "std::source_location __cdecl function_template(void)"sv + || x1.function_name() == "std::source_location function_template()"sv); #else // ^^^ detailed __EDG__ / detailed C1XX vvv assert(x1.function_name() == "struct std::source_location __cdecl function_template(void)"sv); #endif // ^^^ detailed C1XX ^^^ @@ -284,7 +291,9 @@ constexpr void function_template_test() { #elif defined(__clang__) // ^^^ basic / detailed Clang vvv assert(x2.function_name() == "source_location __cdecl function_template(void) [T = int]"sv); #elif defined(__EDG__) // ^^^ detailed Clang / detailed __EDG__ vvv - assert(x2.function_name() == "std::source_location function_template()"sv); + // TRANSITION, EDG is changing to almost match C1XX's output + assert(x2.function_name() == "std::source_location __cdecl function_template(void)"sv + || x2.function_name() == "std::source_location function_template()"sv); #else // ^^^ detailed __EDG__ / detailed C1XX vvv assert(x2.function_name() == "struct std::source_location __cdecl function_template(void)"sv); #endif // ^^^ detailed C1XX ^^^ diff --git a/tests/std/tests/P1502R1_standard_library_header_units/custom_format.py b/tests/std/tests/P1502R1_standard_library_header_units/custom_format.py index bcb09e3ec3..6a2e45ade1 100644 --- a/tests/std/tests/P1502R1_standard_library_header_units/custom_format.py +++ b/tests/std/tests/P1502R1_standard_library_header_units/custom_format.py @@ -124,6 +124,8 @@ def getBuildSteps(self, test, litConfig, shared): if noisyProgress: print('Creating library...') cmd = ['lib.exe', '/nologo', f'/out:{libFilename}', *objFilenames] + if litConfig.target_arch.casefold() == 'arm64ec'.casefold(): + cmd.append('/machine:arm64ec') yield TestStep(cmd, shared.execDir, shared.env, False) if compileTestCppWithEdg: diff --git a/tests/std/tests/P1502R1_standard_library_header_units/custombuild.pl b/tests/std/tests/P1502R1_standard_library_header_units/custombuild.pl index 458338d1c4..6f622a7edd 100644 --- a/tests/std/tests/P1502R1_standard_library_header_units/custombuild.pl +++ b/tests/std/tests/P1502R1_standard_library_header_units/custombuild.pl @@ -173,6 +173,7 @@ () # For convenience, create a library file containing all of the object files that were produced. my $libFilename = "stl_header_units.lib"; Run::ExecuteCommand(join(" ", "lib.exe", "/nologo", "/out:$libFilename", @objFilenames)); + # TRANSITION, when we test ARM64EC internally, add "/machine:arm64ec" here to match custom_format.py. Run::ExecuteCL(join(" ", "test.cpp", "/Fe$cwd.exe", @consumeBuiltHeaderUnits, $libFilename)); } diff --git a/tests/std/tests/VSO_0000000_regex_interface/test.cpp b/tests/std/tests/VSO_0000000_regex_interface/test.cpp index c057696eb3..c05c4aa417 100644 --- a/tests/std/tests/VSO_0000000_regex_interface/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_interface/test.cpp @@ -387,9 +387,9 @@ void test_VSO_180466_regex_search_missing_Unchecked_call() { } void test_VSO_226914_match_prev_avail() { - // N.B. assumes our nonstandard multiline behavior. See also: LWG-2343, LWG-2503 + // test exercises multiline mode const char bol_haystack[] = {'\n', 'a'}; - const regex bol_anchor(R"(^a)"); + const regex bol_anchor(R"(^a)", regex_constants::multiline); assert(regex_match(bol_haystack + 1, end(bol_haystack), bol_anchor)); assert(!regex_match(bol_haystack + 1, end(bol_haystack), bol_anchor, match_not_bol)); assert(regex_match(bol_haystack + 1, end(bol_haystack), bol_anchor, match_prev_avail)); diff --git a/tests/std/tests/VSO_0000000_regex_use/test.cpp b/tests/std/tests/VSO_0000000_regex_use/test.cpp index 7160c9007f..a1e41b8442 100644 --- a/tests/std/tests/VSO_0000000_regex_use/test.cpp +++ b/tests/std/tests/VSO_0000000_regex_use/test.cpp @@ -234,8 +234,8 @@ void test_VSO_167760_nested_quantifiers_should_not_infinite_loop() { void test_DDB_153116_replacements() { g_regexTester.should_replace_to("abc def def ghi", "^", "X", format_default, "Xabc def def ghi"); g_regexTester.should_replace_to("abc def def ghi", "$", "X", format_default, "abc def def ghiX"); - g_regexTester.should_replace_to("abc def def ghi", "\\b", "X", format_default, "XabcX XdefX XdefX XghiX"); - g_regexTester.should_replace_to("abc def def ghi", "\\B", "X", format_default, "aXbXc dXeXf dXeXf gXhXi"); + g_regexTester.should_replace_to("abc def def ghi", "\\b", "X", format_default, "XabcX XdefX XdefX XghiX"); + g_regexTester.should_replace_to("abc def def ghi", "\\B", "X", format_default, "aXbXc X dXeXf dXeXf X gXhXi"); g_regexTester.should_replace_to("abc def def ghi", "(?=ef)", "X", format_default, "abc dXef dXef ghi"); g_regexTester.should_replace_to("abc def def ghi", "(?!ef)", "X", format_default, "XaXbXcX XdeXfX XdeXfX XgXhXiX"); } @@ -307,8 +307,8 @@ void test_dev10_897466_regex_should_support_more_than_31_capture_groups() { } void test_regex_should_throw_for_lookbehind() { - g_regexTester.should_throw(R"((?<=abc))", error_syntax); - g_regexTester.should_throw(R"((?: Incorrect behavior for capture groups + // GH-731: : Incorrect behavior for capture groups // GH-996: regex_search behaves incorrectly when the regex contains R"(\[)" // Several bugs were fixed in ECMAScript (depth-first) and POSIX (leftmost-longest) matching rules. @@ -1533,7 +1676,7 @@ void test_gh_5362_grep() { { const test_regex middle_nl_with_dollar(&g_regexTester, "a$\nb$", grep); middle_nl_with_dollar.should_search_match("a$\nb", "b"); - middle_nl_with_dollar.should_search_match("a\nb", "a"); + middle_nl_with_dollar.should_search_match("a\nb", "b"); middle_nl_with_dollar.should_search_match("ba", "a"); middle_nl_with_dollar.should_search_match("a", "a"); middle_nl_with_dollar.should_search_match("b", "b"); @@ -1913,16 +2056,28 @@ void test_gh_5509() { } { - test_regex anchored_string_plus_regex(&g_regexTester, "((?:^aw)+)"); - anchored_string_plus_regex.should_search_match_capture_groups( + test_regex anchored_string_plus_regex_multi(&g_regexTester, "((?:^aw)+)", multiline); + anchored_string_plus_regex_multi.should_search_match_capture_groups( "blwerofa\nawaweraf", "aw", match_default, {{9, 11}}); + anchored_string_plus_regex_multi.should_search_fail("blwerof\naerwaf"); + } + + { + test_regex anchored_string_plus_regex(&g_regexTester, "((?:^aw)+)"); + anchored_string_plus_regex.should_search_fail("blwerofa\nawaweraf"); anchored_string_plus_regex.should_search_fail("blwerof\naerwaf"); } { - test_regex anchored_string_plus_regex(&g_regexTester, "((?:$\naw)+)"); - anchored_string_plus_regex.should_search_match_capture_groups( + test_regex anchored_string_plus_regex_multi(&g_regexTester, "((?:$\naw)+)", multiline); + anchored_string_plus_regex_multi.should_search_match_capture_groups( "blwerofa\nawaweraf", "\naw", match_default, {{8, 11}}); + anchored_string_plus_regex_multi.should_search_fail("blwerof\naerwaf"); + } + + { + test_regex anchored_string_plus_regex(&g_regexTester, "((?:$\naw)+)"); + anchored_string_plus_regex.should_search_fail("blwerofa\nawaweraf"); anchored_string_plus_regex.should_search_fail("blwerof\naerwaf"); } @@ -1937,6 +2092,17 @@ void test_gh_5509() { } } +void test_gh_5576() { + // GH-5576 sped up searches for regexes that start with assertions + // by extending the skip heuristic in the matcher. + // We test here that the skip heuristic is correct + // for positive and negative lookahead assertions. + g_regexTester.should_replace_to("AbGweEfFllLLlffflElF", "(?=[[:lower:]][[:upper:]])[fFlL]{2}", R"(X$&)", + match_default, "AbGweEXfFlXlLLlffflEXlF"); + g_regexTester.should_replace_to("AbGweEfFllLLlffflElF", "(?![[:upper:]]|[[:lower:]]{2})[fFlL]{2}", R"(X$&)", + match_default, "AbGweEXfFlXlLLlffflEXlF"); +} + int main() { test_dev10_449367_case_insensitivity_should_work(); test_dev11_462743_regex_collate_should_not_disable_regex_icase(); @@ -1964,6 +2130,7 @@ int main() { test_VSO_225160_match_eol_flag(); test_VSO_226914_word_boundaries(); test_construction_from_nullptr_and_zero(); + test_gh_73(); test_gh_731(); test_gh_992(); test_gh_993(); @@ -1985,6 +2152,7 @@ int main() { test_gh_5377(); test_gh_5490(); test_gh_5509(); + test_gh_5576(); return g_regexTester.result(); } diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 1232bde41a..ed1ca6a12c 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -778,17 +778,17 @@ void test_case_rotate( } template -void test_rotate(mt19937_64& gen) { +void test_rotate(mt19937_64& gen, const size_t data_count = dataCount) { vector actual; vector actual_r; vector expected; vector tmp; - actual.reserve(dataCount); - actual_r.reserve(dataCount); - expected.reserve(dataCount); - tmp.reserve(dataCount); + actual.reserve(data_count); + actual_r.reserve(data_count); + expected.reserve(data_count); + tmp.reserve(data_count); test_case_rotate(actual, actual_r, expected, 0, tmp); - for (size_t attempts = 0; attempts < dataCount; ++attempts) { + for (size_t attempts = 0; attempts < data_count; ++attempts) { const T val = static_cast(gen()); // intentionally narrows actual.push_back(val); actual_r.push_back(val); @@ -1241,7 +1241,7 @@ void test_vector_algorithms(mt19937_64& gen) { test_reverse_copy(gen); test_reverse_copy(gen); - test_rotate(gen); + test_rotate(gen, 20000); // one real long rotate run, as for smaller arrays some strategies aren't executed test_rotate(gen); test_rotate(gen); test_rotate(gen); diff --git a/tests/tr1/tests/regex2/test.cpp b/tests/tr1/tests/regex2/test.cpp index 83706bb66b..c720774d3c 100644 --- a/tests/tr1/tests/regex2/test.cpp +++ b/tests/tr1/tests/regex2/test.cpp @@ -132,7 +132,7 @@ static const regex_test tests[] = { {__LINE__, T("a$"), T("ba"), "1 1 2", ALL}, {__LINE__, T("a$"), T("ab"), "0", ALL}, - {__LINE__, T("^a$"), T("b\na"), "1 2 3", ALL}, + {__LINE__, T("^a$"), T("b\na"), "0", ALL}, {__LINE__, T("\\b"), T("a"), "1 0 0", ECMA}, {__LINE__, T("\\b"), T(""), "-1", BASIC | GREP | EXTENDED | EGREP}, diff --git a/tests/utils/stl/test/features.py b/tests/utils/stl/test/features.py index f5a13551fc..6510a36af6 100644 --- a/tests/utils/stl/test/features.py +++ b/tests/utils/stl/test/features.py @@ -65,4 +65,7 @@ def getDefaultFeatures(config, litConfig): elif litConfig.target_arch.casefold() == 'arm64'.casefold(): DEFAULT_FEATURES.append(Feature(name='arm64')) + elif litConfig.target_arch.casefold() == 'arm64ec'.casefold(): + DEFAULT_FEATURES.append(Feature(name='arm64ec')) + return DEFAULT_FEATURES diff --git a/tests/utils/stl/test/tests.py b/tests/utils/stl/test/tests.py index ef51b790d4..7cc250e287 100644 --- a/tests/utils/stl/test/tests.py +++ b/tests/utils/stl/test/tests.py @@ -232,11 +232,11 @@ def _handleEnvlst(self, litConfig): self.compileFlags.extend(self.envlstEntry.getEnvVal('PM_CL', '').split()) self.linkFlags.extend(self.envlstEntry.getEnvVal('PM_LINK', '').split()) + targetArch = litConfig.target_arch.casefold() if ('clang'.casefold() in os.path.basename(cxx).casefold()): self._addCustomFeature('clang') self._addCustomFeature('gcc-style-warnings') - targetArch = litConfig.target_arch.casefold() if (targetArch == 'x64'.casefold()): self.compileFlags.append('-m64') elif (targetArch == 'x86'.casefold()): @@ -245,6 +245,9 @@ def _handleEnvlst(self, litConfig): return Result(UNSUPPORTED, 'clang targeting arm is not supported') elif (targetArch == 'arm64'.casefold()): self.compileFlags.append('--target=arm64-pc-windows-msvc') + elif (targetArch == 'arm64ec'.casefold()): + # TRANSITION, LLVM-116256 (fixed in Clang 20) + return Result(UNSUPPORTED, 'clang targeting arm64ec is not supported') elif ('nvcc'.casefold() in os.path.basename(cxx).casefold()): self._addCustomFeature('nvcc') @@ -253,6 +256,15 @@ def _handleEnvlst(self, litConfig): else: self._addCustomFeature('cl-style-warnings') + if (targetArch == 'arm64ec'.casefold()): + self.compileFlags.append('/arm64EC') + self.linkFlags.append('/machine:arm64ec') + + # TRANSITION, Windows SDK 10.0.26100.3916 emits + # "warning C28301: No annotations for first declaration of 'meow'" + # for various intrinsics when building for ARM64EC. + self.compileFlags.append('/wd28301') + self.cxx = os.path.normpath(cxx) return None diff --git a/tools/validate/validate.cpp b/tools/validate/validate.cpp index 0a7936b50a..55ed9a2d26 100644 --- a/tools/validate/validate.cpp +++ b/tools/validate/validate.cpp @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include @@ -54,15 +55,57 @@ class BinaryFile { FILE* m_file{nullptr}; }; +struct line_and_column { + size_t line = 1; + size_t column = 1; +}; + +struct character_line_column { + unsigned char ch = '?'; + line_and_column lc; +}; + +namespace std { + template <> + struct formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + + template + auto format(const line_and_column& lc, FormatContext& ctx) const { + return format_to(ctx.out(), "{}:{}", lc.line, lc.column); + } + }; + + template <> + struct formatter { + constexpr auto parse(format_parse_context& ctx) { + return ctx.begin(); + } + + template + auto format(const character_line_column& clc, FormatContext& ctx) const { + return format_to(ctx.out(), "0x{:02X} @ {}", static_cast(clc.ch), clc.lc); + } + }; +} // namespace std + template -void validation_failure( - bool& any_errors, const filesystem::path& filepath, format_string...> fmt, Args&&... args) { +void validation_failure(bool& any_errors, const filesystem::path& filepath, const line_and_column& lc, + format_string...> fmt, Args&&... args) { any_errors = true; - print(stderr, "##vso[task.logissue type=error;sourcepath={};linenumber=1;columnnumber=1]Validation failed: ", - filepath.string()); + print(stderr, "##vso[task.logissue type=error;sourcepath={};linenumber={};columnnumber={}]Validation failed: ", + filepath.string(), lc.line, lc.column); println(stderr, fmt, forward(args)...); } +template +void validation_failure( + bool& any_errors, const filesystem::path& filepath, format_string...> fmt, Args&&... args) { + validation_failure(any_errors, filepath, {1, 1}, fmt, forward(args)...); +} + enum class TabPolicy : bool { Forbidden, Allowed }; void scan_file( @@ -80,10 +123,18 @@ void scan_file( size_t tab_characters = 0; size_t trailing_whitespace_lines = 0; + constexpr size_t max_error_lines_per_file = 8; + + array overlength_locations{}; + array tab_character_locations{}; + array trailing_whitespace_locations{}; + array disallowed_character_locations{}; + unsigned char prev = '@'; unsigned char previous2 = '@'; unsigned char previous3 = '@'; + size_t lines = 0; size_t columns = 0; for (BinaryFile binary_file{filepath}; binary_file.read_next_block(buffer);) { @@ -94,13 +145,18 @@ void scan_file( } else { has_cr = true; } + ++lines; } else { if (ch == LF) { has_lf = true; + ++lines; } } if (ch == '\t') { + if (tab_characters < max_error_lines_per_file) { + tab_character_locations[tab_characters] = {lines + 1, columns + 1}; + } ++tab_characters; } else if (ch == 0xEF || ch == 0xBB || ch == 0xBF) { // 0xEF, 0xBB, and 0xBF are the UTF-8 BOM characters. @@ -109,20 +165,24 @@ void scan_file( } else if (ch != CR && ch != LF && !(ch >= 0x20 && ch <= 0x7E)) { // [0x20, 0x7E] are the printable characters, including the space character. // https://en.wikipedia.org/wiki/ASCII#Printable_characters - ++disallowed_characters; - constexpr size_t MaxErrorsForDisallowedCharacters = 10; - if (disallowed_characters <= MaxErrorsForDisallowedCharacters) { - validation_failure(any_errors, filepath, "file contains disallowed character 0x{:02X}.", - static_cast(ch)); + if (disallowed_characters < max_error_lines_per_file) { + disallowed_character_locations[disallowed_characters] = {ch, {lines + 1, columns + 1}}; } + ++disallowed_characters; } if (ch == CR || ch == LF) { if (prev == ' ' || prev == '\t') { + if (trailing_whitespace_lines < max_error_lines_per_file) { + trailing_whitespace_locations[trailing_whitespace_lines] = {lines + 1, columns + 1}; + } ++trailing_whitespace_lines; } if (columns > max_line_length) { + if (overlength_lines < max_error_lines_per_file) { + overlength_locations[overlength_lines] = {lines + 1, columns + 1}; + } ++overlength_lines; } columns = 0; @@ -135,6 +195,7 @@ void scan_file( if (prev == CR) { // file ends with CR has_cr = true; + ++lines; } if (has_cr) { @@ -164,12 +225,16 @@ void scan_file( } if (tab_policy == TabPolicy::Forbidden && tab_characters != 0) { - validation_failure(any_errors, filepath, "file contains {} tab characters.", tab_characters); + validation_failure(any_errors, filepath, tab_character_locations[0], + "file contains {} tab characters. Lines and columns (up to {}): {}.", tab_characters, + max_error_lines_per_file, tab_character_locations | views::take(tab_characters)); } if (trailing_whitespace_lines != 0) { - validation_failure( - any_errors, filepath, "file contains {} lines with trailing whitespace.", trailing_whitespace_lines); + validation_failure(any_errors, filepath, trailing_whitespace_locations[0], + "file contains {} lines with trailing whitespace. Lines and columns (up to {}): {}.", + trailing_whitespace_lines, max_error_lines_per_file, + trailing_whitespace_locations | views::take(trailing_whitespace_lines)); } if (overlength_lines != 0) { @@ -188,10 +253,17 @@ void scan_file( static_assert(ranges::is_sorted(checked_extensions)); if (ranges::binary_search(checked_extensions, filepath.extension().wstring())) { - validation_failure(any_errors, filepath, "file contains {} lines with more than {} columns.", - overlength_lines, max_line_length); + validation_failure(any_errors, filepath, overlength_locations[0], + "file contains {} lines with more than {} columns. Lines and columns (up to {}): {}.", overlength_lines, + max_line_length, max_error_lines_per_file, overlength_locations | views::take(overlength_lines)); } } + + if (disallowed_characters != 0) { + validation_failure(any_errors, filepath, disallowed_character_locations[0].lc, + "file contains {} disallowed characters. Locations (up to {}): {}.", disallowed_characters, + max_error_lines_per_file, disallowed_character_locations | views::take(disallowed_characters)); + } } int main() {