Skip to content

Commit 7dd898f

Browse files
committed
Getting vxsort working on Linux amd64
1 parent 366bff5 commit 7dd898f

11 files changed

+70
-53
lines changed

src/coreclr/dlls/mscoree/coreclr/CMakeLists.txt

+7
Original file line numberDiff line numberDiff line change
@@ -109,8 +109,15 @@ set(CORECLR_LIBRARIES
109109
interop
110110
coreclrminipal
111111
gc_pal
112+
gc_vxsort
112113
)
113114

115+
if(CLR_CMAKE_TARGET_ARCH_AMD64)
116+
list(APPEND CORECLR_LIBRARIES
117+
gc_vxsort
118+
)
119+
endif(CLR_CMAKE_TARGET_ARCH_AMD64)
120+
114121
if(CLR_CMAKE_TARGET_WIN32)
115122
list(APPEND CORECLR_LIBRARIES
116123
${STATIC_MT_CRT_LIB}

src/coreclr/gc/CMakeLists.txt

+5-16
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,9 @@ else()
3636
windows/Native.rc)
3737
endif(CLR_CMAKE_HOST_UNIX)
3838

39-
if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
40-
set (GC_SOURCES
41-
${GC_SOURCES}
42-
vxsort/isa_detection.cpp
43-
vxsort/do_vxsort_avx2.cpp
44-
vxsort/do_vxsort_avx512.cpp
45-
vxsort/machine_traits.avx2.cpp
46-
vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.cpp
47-
vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp
48-
vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp
49-
vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp
50-
vxsort/smallsort/avx2_load_mask_tables.cpp
51-
)
52-
endif (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
39+
if (CLR_CMAKE_TARGET_ARCH_AMD64)
40+
add_subdirectory(vxsort)
41+
endif (CLR_CMAKE_TARGET_ARCH_AMD64)
5342

5443
if (CLR_CMAKE_TARGET_WIN32)
5544
set(GC_HEADERS
@@ -87,7 +76,7 @@ if (CLR_CMAKE_TARGET_WIN32)
8776
handletablepriv.h
8877
objecthandle.h
8978
softwarewritewatch.h
90-
vxsort/do_vxsort.h)
79+
)
9180
endif(CLR_CMAKE_TARGET_WIN32)
9281

9382
if(CLR_CMAKE_HOST_WIN32)
@@ -98,7 +87,7 @@ if(CLR_CMAKE_HOST_WIN32)
9887
advapi32.lib)
9988
endif(CLR_CMAKE_HOST_WIN32)
10089

101-
set (GC_LINK_LIBRARIES ${GC_LINK_LIBRARIES} gc_pal)
90+
set (GC_LINK_LIBRARIES ${GC_LINK_LIBRARIES} gc_pal gc_vxsort)
10291

10392
list(APPEND GC_SOURCES ${GC_HEADERS})
10493

src/coreclr/gc/gc.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818

1919
#include "gcpriv.h"
2020

21-
#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
21+
#if defined(TARGET_AMD64)
2222
#define USE_VXSORT
2323
#else
2424
#define USE_INTROSORT

src/coreclr/gc/gcsvr.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
#define SERVER_GC 1
2222

23-
#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
23+
#if defined(TARGET_AMD64)
2424
#include "vxsort/do_vxsort.h"
2525
#endif
2626

src/coreclr/gc/gcwks.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
#undef SERVER_GC
2121
#endif
2222

23-
#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS)
23+
#if defined(TARGET_AMD64)
2424
#include "vxsort/do_vxsort.h"
2525
#endif
2626

src/coreclr/gc/sample/CMakeLists.txt

-15
Original file line numberDiff line numberDiff line change
@@ -24,21 +24,6 @@ set(SOURCES
2424
../softwarewritewatch.cpp
2525
)
2626

27-
if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
28-
set ( SOURCES
29-
${SOURCES}
30-
../vxsort/isa_detection.cpp
31-
../vxsort/do_vxsort_avx2.cpp
32-
../vxsort/do_vxsort_avx512.cpp
33-
../vxsort/machine_traits.avx2.cpp
34-
../vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.cpp
35-
../vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp
36-
../vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp
37-
../vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp
38-
../vxsort/smallsort/avx2_load_mask_tables.cpp
39-
)
40-
endif (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
41-
4227
if(CLR_CMAKE_TARGET_WIN32)
4328
set (GC_LINK_LIBRARIES
4429
${STATIC_MT_CRT_LIB}

src/coreclr/gc/vxsort/CMakeLists.txt

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
set(CMAKE_INCLUDE_CURRENT_DIR ON)
2+
include_directories("../env")
3+
4+
if(CLR_CMAKE_HOST_UNIX)
5+
set_source_files_properties(isa_detection.cpp PROPERTIES COMPILE_FLAGS -mavx2)
6+
set_source_files_properties(do_vxsort_avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
7+
set_source_files_properties(do_vxsort_avx512.cpp PROPERTIES COMPILE_FLAGS -mavx2)
8+
set_source_files_properties(machine_traits.avx2.cpp PROPERTIES COMPILE_FLAGS -mavx2)
9+
set_source_files_properties(smallsort/bitonic_sort.AVX2.int64_t.generated.cpp PROPERTIES COMPILE_FLAGS -mavx2)
10+
set_source_files_properties(smallsort/bitonic_sort.AVX2.int32_t.generated.cpp PROPERTIES COMPILE_FLAGS -mavx2)
11+
set_source_files_properties(smallsort/bitonic_sort.AVX512.int64_t.generated.cpp PROPERTIES COMPILE_FLAGS -mavx2)
12+
set_source_files_properties(smallsort/bitonic_sort.AVX512.int32_t.generated.cpp PROPERTIES COMPILE_FLAGS -mavx2)
13+
set_source_files_properties(smallsort/avx2_load_mask_tables.cpp PROPERTIES COMPILE_FLAGS -mavx2)
14+
endif(CLR_CMAKE_HOST_UNIX)
15+
16+
set (VXSORT_SOURCES
17+
isa_detection.cpp
18+
do_vxsort_avx2.cpp
19+
do_vxsort_avx512.cpp
20+
machine_traits.avx2.cpp
21+
smallsort/bitonic_sort.AVX2.int64_t.generated.cpp
22+
smallsort/bitonic_sort.AVX2.int32_t.generated.cpp
23+
smallsort/bitonic_sort.AVX512.int64_t.generated.cpp
24+
smallsort/bitonic_sort.AVX512.int32_t.generated.cpp
25+
smallsort/avx2_load_mask_tables.cpp
26+
do_vxsort.h
27+
)
28+
29+
add_library(gc_vxsort STATIC ${VXSORT_SOURCES})

src/coreclr/gc/vxsort/machine_traits.avx2.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#include <immintrin.h>
1414
#include <assert.h>
1515
#include <inttypes.h>
16+
#include <type_traits>
1617
#include "defs.h"
1718
#include "machine_traits.h"
1819

@@ -123,8 +124,7 @@ class vxsort_machine_traits<int64_t, AVX2> {
123124

124125
template <int Shift>
125126
static constexpr bool can_pack(T span) {
126-
const auto PACK_LIMIT = (((TU) std::numeric_limits<uint32_t>::Max() + 1)) << Shift;
127-
return ((TU) span) < PACK_LIMIT;
127+
return ((TU) span) < ((((TU) std::numeric_limits<uint32_t>::Max() + 1)) << Shift);
128128
}
129129

130130
static INLINE TV load_vec(TV* p) { return _mm256_lddqu_si256(p); }

src/coreclr/gc/vxsort/machine_traits.avx512.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "vxsort_targets_enable_avx512.h"
1212

1313
#include <immintrin.h>
14+
#include <type_traits>
1415
#include "defs.h"
1516
#include "machine_traits.h"
1617

@@ -92,8 +93,7 @@ class vxsort_machine_traits<int64_t, AVX512> {
9293

9394
template <int Shift>
9495
static constexpr bool can_pack(T span) {
95-
const auto PACK_LIMIT = (((TU) std::numeric_limits<uint32_t>::Max() + 1)) << Shift;
96-
return ((TU) span) < PACK_LIMIT;
96+
return ((TU) span) < ((((TU) std::numeric_limits<uint32_t>::Max() + 1)) << Shift);
9797
}
9898

9999
static INLINE TV load_vec(TV* p) { return _mm512_loadu_si512(p); }

src/coreclr/gc/vxsort/vxsort.h

+22
Original file line numberDiff line numberDiff line change
@@ -458,31 +458,53 @@ class vxsort {
458458

459459
switch (InnerUnroll) {
460460
case 12: d12 = MT::load_vec(nextPtr + InnerUnroll - 12);
461+
[[clang::fallthrough]];
461462
case 11: d11 = MT::load_vec(nextPtr + InnerUnroll - 11);
463+
[[clang::fallthrough]];
462464
case 10: d10 = MT::load_vec(nextPtr + InnerUnroll - 10);
465+
[[clang::fallthrough]];
463466
case 9: d09 = MT::load_vec(nextPtr + InnerUnroll - 9);
467+
[[clang::fallthrough]];
464468
case 8: d08 = MT::load_vec(nextPtr + InnerUnroll - 8);
469+
[[clang::fallthrough]];
465470
case 7: d07 = MT::load_vec(nextPtr + InnerUnroll - 7);
471+
[[clang::fallthrough]];
466472
case 6: d06 = MT::load_vec(nextPtr + InnerUnroll - 6);
473+
[[clang::fallthrough]];
467474
case 5: d05 = MT::load_vec(nextPtr + InnerUnroll - 5);
475+
[[clang::fallthrough]];
468476
case 4: d04 = MT::load_vec(nextPtr + InnerUnroll - 4);
477+
[[clang::fallthrough]];
469478
case 3: d03 = MT::load_vec(nextPtr + InnerUnroll - 3);
479+
[[clang::fallthrough]];
470480
case 2: d02 = MT::load_vec(nextPtr + InnerUnroll - 2);
481+
[[clang::fallthrough]];
471482
case 1: d01 = MT::load_vec(nextPtr + InnerUnroll - 1);
472483
}
473484

474485
switch (InnerUnroll) {
475486
case 12: partition_block(d12, P, writeLeft, writeRight);
487+
[[clang::fallthrough]];
476488
case 11: partition_block(d11, P, writeLeft, writeRight);
489+
[[clang::fallthrough]];
477490
case 10: partition_block(d10, P, writeLeft, writeRight);
491+
[[clang::fallthrough]];
478492
case 9: partition_block(d09, P, writeLeft, writeRight);
493+
[[clang::fallthrough]];
479494
case 8: partition_block(d08, P, writeLeft, writeRight);
495+
[[clang::fallthrough]];
480496
case 7: partition_block(d07, P, writeLeft, writeRight);
497+
[[clang::fallthrough]];
481498
case 6: partition_block(d06, P, writeLeft, writeRight);
499+
[[clang::fallthrough]];
482500
case 5: partition_block(d05, P, writeLeft, writeRight);
501+
[[clang::fallthrough]];
483502
case 4: partition_block(d04, P, writeLeft, writeRight);
503+
[[clang::fallthrough]];
484504
case 3: partition_block(d03, P, writeLeft, writeRight);
505+
[[clang::fallthrough]];
485506
case 2: partition_block(d02, P, writeLeft, writeRight);
507+
[[clang::fallthrough]];
486508
case 1: partition_block(d01, P, writeLeft, writeRight);
487509
}
488510
}

src/coreclr/vm/CMakeLists.txt

-15
Original file line numberDiff line numberDiff line change
@@ -490,21 +490,6 @@ set(GC_SOURCES_WKS
490490
../gc/softwarewritewatch.cpp
491491
../gc/handletablecache.cpp)
492492

493-
if (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
494-
set ( GC_SOURCES_WKS
495-
${GC_SOURCES_WKS}
496-
../gc/vxsort/isa_detection.cpp
497-
../gc/vxsort/do_vxsort_avx2.cpp
498-
../gc/vxsort/do_vxsort_avx512.cpp
499-
../gc/vxsort/machine_traits.avx2.cpp
500-
../gc/vxsort/smallsort/bitonic_sort.AVX2.int64_t.generated.cpp
501-
../gc/vxsort/smallsort/bitonic_sort.AVX2.int32_t.generated.cpp
502-
../gc/vxsort/smallsort/bitonic_sort.AVX512.int64_t.generated.cpp
503-
../gc/vxsort/smallsort/bitonic_sort.AVX512.int32_t.generated.cpp
504-
../gc/vxsort/smallsort/avx2_load_mask_tables.cpp
505-
)
506-
endif (CLR_CMAKE_TARGET_ARCH_AMD64 AND CLR_CMAKE_TARGET_WIN32)
507-
508493
set(GC_HEADERS_WKS
509494
${GC_HEADERS_DAC_AND_WKS_COMMON}
510495
../gc/gceventstatus.h

0 commit comments

Comments
 (0)