-
Notifications
You must be signed in to change notification settings - Fork 173
/
adler32_impl.h
136 lines (126 loc) · 4.9 KB
/
adler32_impl.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
/*
* x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm
*
* Copyright 2016 Eric Biggers
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef LIB_X86_ADLER32_IMPL_H
#define LIB_X86_ADLER32_IMPL_H
#include "cpu_features.h"
/* SSE2 and AVX2 implementations. Used on older CPUs. */
#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
# define adler32_x86_sse2 adler32_x86_sse2
# define SUFFIX _sse2
# define ATTRIBUTES _target_attribute("sse2")
# define VL 16
# define USE_VNNI 0
# define USE_AVX512 0
# include "adler32_template.h"
# define adler32_x86_avx2 adler32_x86_avx2
# define SUFFIX _avx2
# define ATTRIBUTES _target_attribute("avx2")
# define VL 32
# define USE_VNNI 0
# define USE_AVX512 0
# include "adler32_template.h"
#endif
/*
* AVX-VNNI implementation. This is used on CPUs that have AVX2 and AVX-VNNI
* but don't have AVX-512, for example Intel Alder Lake.
*
* Unusually for a new CPU feature, gcc added support for the AVX-VNNI
* intrinsics (in gcc 11.1) slightly before binutils added support for
* assembling AVX-VNNI instructions (in binutils 2.36). Distros can reasonably
* have gcc 11 with binutils 2.35. Because of this issue, we check for gcc 12
* instead of gcc 11. (libdeflate supports direct compilation without a
* configure step, so checking the binutils version is not always an option.)
*/
#if (GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)) && \
!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI)
# define adler32_x86_avx2_vnni adler32_x86_avx2_vnni
# define SUFFIX _avx2_vnni
# define ATTRIBUTES _target_attribute("avx2,avxvnni")
# define VL 32
# define USE_VNNI 1
# define USE_AVX512 0
# include "adler32_template.h"
#endif
#if (GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \
!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI)
/*
* AVX512VNNI implementation using 256-bit vectors. This is very similar to the
* AVX-VNNI implementation but takes advantage of masking and more registers.
* This is used on CPUs that support AVX-512 but where using 512-bit vectors
* causes downclocking. This should also be the optimal implementation on CPUs
* that support AVX10/256 but not AVX10/512.
*/
# define adler32_x86_avx512_vl256_vnni adler32_x86_avx512_vl256_vnni
# define SUFFIX _avx512_vl256_vnni
# define ATTRIBUTES _target_attribute("avx512bw,avx512vl,avx512vnni")
# define VL 32
# define USE_VNNI 1
# define USE_AVX512 1
# include "adler32_template.h"
/*
* AVX512VNNI implementation using 512-bit vectors. This is used on CPUs that
* have a good AVX-512 implementation including AVX512VNNI. This should also be
* the optimal implementation on CPUs that support AVX10/512.
*/
# define adler32_x86_avx512_vl512_vnni adler32_x86_avx512_vl512_vnni
# define SUFFIX _avx512_vl512_vnni
# define ATTRIBUTES _target_attribute("avx512bw,avx512vnni")
# define VL 64
# define USE_VNNI 1
# define USE_AVX512 1
# include "adler32_template.h"
#endif
static inline adler32_func_t
arch_select_adler32_func(void)
{
const u32 features MAYBE_UNUSED = get_x86_cpu_features();
#ifdef adler32_x86_avx512_vl512_vnni
if ((features & X86_CPU_FEATURE_ZMM) &&
HAVE_AVX512BW(features) && HAVE_AVX512VNNI(features))
return adler32_x86_avx512_vl512_vnni;
#endif
#ifdef adler32_x86_avx512_vl256_vnni
if (HAVE_AVX512BW(features) && HAVE_AVX512VL(features) &&
HAVE_AVX512VNNI(features))
return adler32_x86_avx512_vl256_vnni;
#endif
#ifdef adler32_x86_avx2_vnni
if (HAVE_AVX2(features) && HAVE_AVXVNNI(features))
return adler32_x86_avx2_vnni;
#endif
#ifdef adler32_x86_avx2
if (HAVE_AVX2(features))
return adler32_x86_avx2;
#endif
#ifdef adler32_x86_sse2
if (HAVE_SSE2(features))
return adler32_x86_sse2;
#endif
return NULL;
}
#define arch_select_adler32_func arch_select_adler32_func
#endif /* LIB_X86_ADLER32_IMPL_H */