lib/x86/adler32_impl.h

/*
 * x86/adler32_impl.h - x86 implementations of Adler-32 checksum algorithm
 *
 * Copyright 2016 Eric Biggers
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */

#ifndef LIB_X86_ADLER32_IMPL_H
#define LIB_X86_ADLER32_IMPL_H

#include "cpu_features.h"

/* SSE2 and AVX2 implementations.  Used on older CPUs. */
#if defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER)
#  define adler32_x86_sse2	adler32_x86_sse2
#  define SUFFIX			   _sse2
#  define ATTRIBUTES		_target_attribute("sse2")
#  define VL			16
#  define USE_VNNI		0
#  define USE_AVX512		0
#  include "adler32_template.h"

#  define adler32_x86_avx2	adler32_x86_avx2
#  define SUFFIX			   _avx2
#  define ATTRIBUTES		_target_attribute("avx2")
#  define VL			32
#  define USE_VNNI		0
#  define USE_AVX512		0
#  include "adler32_template.h"
#endif

/*
 * AVX-VNNI implementation.  This is used on CPUs that have AVX2 and AVX-VNNI
 * but don't have AVX-512, for example Intel Alder Lake.
 *
 * Unusually for a new CPU feature, gcc added support for the AVX-VNNI
 * intrinsics (in gcc 11.1) slightly before binutils added support for
 * assembling AVX-VNNI instructions (in binutils 2.36).  Distros can reasonably
 * have gcc 11 with binutils 2.35.  Because of this issue, we check for gcc 12
 * instead of gcc 11.  (libdeflate supports direct compilation without a
 * configure step, so checking the binutils version is not always an option.)
 */
#if (GCC_PREREQ(12, 1) || CLANG_PREREQ(12, 0, 13000000) || MSVC_PREREQ(1930)) && \
	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX_VNNI)
#  define adler32_x86_avx2_vnni	adler32_x86_avx2_vnni
#  define SUFFIX			   _avx2_vnni
#  define ATTRIBUTES		_target_attribute("avx2,avxvnni")
#  define VL			32
#  define USE_VNNI		1
#  define USE_AVX512		0
#  include "adler32_template.h"
#endif

#if (GCC_PREREQ(8, 1) || CLANG_PREREQ(6, 0, 10000000) || MSVC_PREREQ(1920)) && \
	!defined(LIBDEFLATE_ASSEMBLER_DOES_NOT_SUPPORT_AVX512VNNI)
/*
 * AVX512VNNI implementation using 256-bit vectors.  This is very similar to the
 * AVX-VNNI implementation but takes advantage of masking and more registers.
 * This is used on CPUs that support AVX-512 but where using 512-bit vectors
 * causes downclocking.  This should also be the optimal implementation on CPUs
 * that support AVX10/256 but not AVX10/512.
 */
#  define adler32_x86_avx512_vl256_vnni	adler32_x86_avx512_vl256_vnni
#  define SUFFIX				   _avx512_vl256_vnni
#  define ATTRIBUTES		_target_attribute("avx512bw,avx512vl,avx512vnni")
#  define VL			32
#  define USE_VNNI		1
#  define USE_AVX512		1
#  include "adler32_template.h"

/*
 * AVX512VNNI implementation using 512-bit vectors.  This is used on CPUs that
 * have a good AVX-512 implementation including AVX512VNNI.  This should also be
 * the optimal implementation on CPUs that support AVX10/512.
 */
#  define adler32_x86_avx512_vl512_vnni	adler32_x86_avx512_vl512_vnni
#  define SUFFIX				   _avx512_vl512_vnni
#  define ATTRIBUTES		_target_attribute("avx512bw,avx512vnni")
#  define VL			64
#  define USE_VNNI		1
#  define USE_AVX512		1
#  include "adler32_template.h"
#endif

static inline adler32_func_t
arch_select_adler32_func(void)
{
	const u32 features MAYBE_UNUSED = get_x86_cpu_features();

#ifdef adler32_x86_avx512_vl512_vnni
	if ((features & X86_CPU_FEATURE_ZMM) &&
	    HAVE_AVX512BW(features) && HAVE_AVX512VNNI(features))
		return adler32_x86_avx512_vl512_vnni;
#endif
#ifdef adler32_x86_avx512_vl256_vnni
	if (HAVE_AVX512BW(features) && HAVE_AVX512VL(features) &&
	    HAVE_AVX512VNNI(features))
		return adler32_x86_avx512_vl256_vnni;
#endif
#ifdef adler32_x86_avx2_vnni
	if (HAVE_AVX2(features) && HAVE_AVXVNNI(features))
		return adler32_x86_avx2_vnni;
#endif
#ifdef adler32_x86_avx2
	if (HAVE_AVX2(features))
		return adler32_x86_avx2;
#endif
#ifdef adler32_x86_sse2
	if (HAVE_SSE2(features))
		return adler32_x86_sse2;
#endif
	return NULL;
}
#define arch_select_adler32_func	arch_select_adler32_func

#endif /* LIB_X86_ADLER32_IMPL_H */