Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 22 additions & 31 deletions clang/lib/Headers/avx512bwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1083,48 +1083,39 @@ _mm512_maskz_mulhrs_epi16(__mmask32 __U, __m512i __A, __m512i __B)
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mulhi_epi16(__m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_pmulhw512((__v32hi) __A, (__v32hi) __B);
_mm512_mulhi_epi16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_pmulhw512((__v32hi)__A, (__v32hi)__B);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A,
__m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_mulhi_epi16(__A, __B),
(__v32hi)__W);
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512(
(__mmask32)__U, (__v32hi)_mm512_mulhi_epi16(__A, __B), (__v32hi)__W);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_mulhi_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512(
(__mmask32)__U, (__v32hi)_mm512_mulhi_epi16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mulhi_epu16(__m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_pmulhuw512((__v32hu) __A, (__v32hu) __B);
_mm512_mulhi_epu16(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_pmulhuw512((__v32hu)__A, (__v32hu)__B);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_mulhi_epu16(__A, __B),
(__v32hi)__W);
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512(
(__mmask32)__U, (__v32hi)_mm512_mulhi_epu16(__A, __B), (__v32hi)__W);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
_mm512_maskz_mulhi_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
{
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
(__v32hi)_mm512_mulhi_epu16(__A, __B),
(__v32hi)_mm512_setzero_si512());
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectw_512(
(__mmask32)__U, (__v32hi)_mm512_mulhi_epu16(__A, __B),
(__v32hi)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS512
Expand Down
65 changes: 35 additions & 30 deletions clang/lib/Headers/avx512vlbwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,14 @@
__target__("avx512vl,avx512bw,no-evex512"), \
__min_vector_width__(256)))

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
#else
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
#endif

/* Integer compare */

#define _mm_cmp_epi8_mask(a, b, p) \
Expand Down Expand Up @@ -1592,67 +1600,62 @@ _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
(__v16hi)_mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_mulhi_epu16(__A, __B),
(__v8hi)__W);
return (__m128i)__builtin_ia32_selectw_128(
(__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)__W);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_mulhi_epu16(__A, __B),
(__v8hi)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_mulhi_epu16(__A, __B),
(__v16hi)__W);
return (__m256i)__builtin_ia32_selectw_256(
(__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), (__v16hi)__W);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_mulhi_epu16(__A, __B),
(__v16hi)_mm256_setzero_si256());
return (__m256i)__builtin_ia32_selectw_256(
(__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B),
(__v16hi)_mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_mulhi_epi16(__A, __B),
(__v8hi)__W);
return (__m128i)__builtin_ia32_selectw_128(
(__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)__W);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
(__v8hi)_mm_mulhi_epi16(__A, __B),
(__v8hi)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_mulhi_epi16(__A, __B),
(__v16hi)__W);
return (__m256i)__builtin_ia32_selectw_256(
(__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), (__v16hi)__W);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
(__v16hi)_mm256_mulhi_epi16(__A, __B),
(__v16hi)_mm256_setzero_si256());
return (__m256i)__builtin_ia32_selectw_256(
(__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B),
(__v16hi)_mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
(__v16qi)_mm_unpackhi_epi8(__A, __B),
(__v16qi)__W);
return (__m128i)__builtin_ia32_selectb_128(
(__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)__W);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128
Expand Down Expand Up @@ -3163,5 +3166,7 @@ _mm256_mask_reduce_min_epu8(__mmask32 __M, __m256i __V) {

#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256
#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR

#endif /* __AVX512VLBWINTRIN_H */
12 changes: 8 additions & 4 deletions clang/test/CodeGen/X86/avx512bw-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1332,7 +1332,7 @@ __m512i test_mm512_mulhrs_epi16(__m512i __A, __m512i __B) {
// CHECK: @llvm.x86.avx512.pmul.hr.sw.512
return _mm512_mulhrs_epi16(__A,__B);
}
__m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
__m512i test_mm512_mask_mulhrs_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_mulhrs_epi16
// CHECK: @llvm.x86.avx512.pmul.hr.sw.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
Expand All @@ -1351,19 +1351,21 @@ __m512i test_mm512_mulhi_epi16(__m512i __A, __m512i __B) {
}
TEST_CONSTEXPR(match_v32hi(_mm512_mulhi_epi16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, 0, 0, -1, -1, -1, -1, -1));

__m512i test_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
__m512i test_mm512_mask_mulhi_epi16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_mulhi_epi16
// CHECK: @llvm.x86.avx512.pmulh.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_mulhi_epi16(__W,__U,__A,__B);
}
TEST_CONSTEXPR(match_v32hi(_mm512_mask_mulhi_epi16(_mm512_set1_epi16(1), 0xF00FF00F, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), -1, 0, 0, -1, 1, 1, 1, 1, 1, 1, 1, 1, -1, 0, 0, -1, -1, 0, 0, -1, 1, 1, 1, 1, 1, 1, 1, 1, -1, -1, -1, -1));

__m512i test_mm512_maskz_mulhi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_mulhi_epi16
// CHECK: @llvm.x86.avx512.pmulh.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_mulhi_epi16(__U,__A,__B);
}
TEST_CONSTEXPR(match_v32hi(_mm512_maskz_mulhi_epi16(0x0FF00FF0, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 0, 0, 0, 0, -1, 0, 0, -1, -1, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0, -1, -1, 0, 0, -1, 0, 0, 0, 0));

__m512i test_mm512_mulhi_epu16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mulhi_epu16
Expand All @@ -1372,26 +1374,28 @@ __m512i test_mm512_mulhi_epu16(__m512i __A, __m512i __B) {
}
TEST_CONSTEXPR(match_v32hi(_mm512_mulhi_epu16((__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 0, -64, 0, 57, 4, -60, 0, 49, 8, -56, 0, 41, 12, -52, 0, 33, 16, -48, 0, 25, 20, -44, 0, 17, 24, -40, 0, 9, 28, 5, 30, 1));

__m512i test_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
__m512i test_mm512_mask_mulhi_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_mulhi_epu16
// CHECK: @llvm.x86.avx512.pmulhu.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_mulhi_epu16(__W,__U,__A,__B);
}
TEST_CONSTEXPR(match_v32hi(_mm512_mask_mulhi_epu16(_mm512_set1_epi16(1), 0x0FF00FF0, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 1, 1, 1, 1, 4, -60, 0, 49, 8, -56, 0, 41, 1, 1, 1, 1, 1, 1, 1, 1, 20, -44, 0, 17, 24, -40, 0, 9, 1, 1, 1, 1));

__m512i test_mm512_maskz_mulhi_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_mulhi_epu16
// CHECK: @llvm.x86.avx512.pmulhu.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_mulhi_epu16(__U,__A,__B);
}
TEST_CONSTEXPR(match_v32hi(_mm512_maskz_mulhi_epu16(0xF00FF00F, (__m512i)(__v32hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16, +17, -18, +19, -20, +21, -22, +23, -24, +25, -26, +27, -28, +29, -30, +31, -32}, (__m512i)(__v32hi){-64, -62, +60, +58, -56, -54, +52, +50, -48, -46, +44, +42, -40, -38, +36, +34, -32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 0, -64, 0, 57, 0, 0, 0, 0, 0, 0, 0, 0, 12, -52, 0, 33, 16, -48, 0, 25, 0, 0, 0, 0, 0, 0, 0, 0, 28, 5, 30, 1));

__m512i test_mm512_maddubs_epi16(__m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_maddubs_epi16
// CHECK: @llvm.x86.avx512.pmaddubs.w.512
return _mm512_maddubs_epi16(__X,__Y);
}
__m512i test_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, __m512i __Y) {
__m512i test_mm512_mask_maddubs_epi16(__m512i __W, __mmask32 __U, __m512i __X, __m512i __Y) {
// CHECK-LABEL: test_mm512_mask_maddubs_epi16
// CHECK: @llvm.x86.avx512.pmaddubs.w.512
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
Expand Down
9 changes: 9 additions & 0 deletions clang/test/CodeGen/X86/avx512vlbw-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx10.1-512 -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s

#include <immintrin.h>
#include "builtin_test_helpers.h"

__mmask32 test_mm256_cmpeq_epi8_mask(__m256i __a, __m256i __b) {
// CHECK-LABEL: test_mm256_cmpeq_epi8_mask
Expand Down Expand Up @@ -1862,55 +1863,63 @@ __m128i test_mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_mulhi_epu16(__W, __U, __A, __B);
}
TEST_CONSTEXPR(match_v8hi(_mm_mask_mulhi_epu16(_mm_set1_epi16(1), 0x3C, (__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), 1, 1, 0, 9, 4, 5, 1, 1));

__m128i test_mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_mulhi_epu16
// CHECK: @llvm.x86.sse2.pmulhu.w
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_mulhi_epu16(__U, __A, __B);
}
TEST_CONSTEXPR(match_v8hi(_mm_maskz_mulhi_epu16(0x87, (__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), 0, -16, 0, 0, 0, 0, 0, 1));

__m256i test_mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_mulhi_epu16
// CHECK: @llvm.x86.avx2.pmulhu.w
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_mulhi_epu16(__W, __U, __A, __B);
}
TEST_CONSTEXPR(match_v16hi(_mm256_mask_mulhi_epu16(_mm256_set1_epi16(1), 0xF00F, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 0, -32, 0, 25, 1, 1, 1, 1, 1, 1, 1, 1, 12, 5, 14, 1));

__m256i test_mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_mulhi_epu16
// CHECK: @llvm.x86.avx2.pmulhu.w
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_mulhi_epu16(__U, __A, __B);
}
TEST_CONSTEXPR(match_v16hi(_mm256_maskz_mulhi_epu16(0x0FF0, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 0, 0, 0, 0, 4, -28, 0, 17, 8, -24, 0, 9, 0, 0, 0, 0));

__m128i test_mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_mulhi_epi16
// CHECK: @llvm.x86.sse2.pmulh.w
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_mask_mulhi_epi16(__W, __U, __A, __B);
}
TEST_CONSTEXPR(match_v8hi(_mm_mask_mulhi_epu16(_mm_set1_epi16(1), 0x78, (__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), 1, 1, 1, 9, 4, 5, 6, 1));

__m128i test_mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_maskz_mulhi_epi16
// CHECK: @llvm.x86.sse2.pmulh.w
// CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}}
return _mm_maskz_mulhi_epi16(__U, __A, __B);
}
TEST_CONSTEXPR(match_v8hi(_mm_maskz_mulhi_epi16(0xC3, (__m128i)(__v8hi){+1, -2, +3, -4, +5, -6, +7, -8}, (__m128i)(__v8hi){-16, -14, +12, +10, -8, +6, -4, +2}), -1, 0, 0, 0, 0, 0, -1, -1));

__m256i test_mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_mask_mulhi_epi16
// CHECK: @llvm.x86.avx2.pmulh.w
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_mask_mulhi_epi16(__W, __U, __A, __B);
}
TEST_CONSTEXPR(match_v16hi(_mm256_mask_mulhi_epi16(_mm256_set1_epi16(1), 0x0FF0, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), 1, 1, 1, 1, -1, 0, 0, -1, -1, 0, 0, -1, 1, 1, 1, 1));

__m256i test_mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
// CHECK-LABEL: test_mm256_maskz_mulhi_epi16
// CHECK: @llvm.x86.avx2.pmulh.w
// CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}}
return _mm256_maskz_mulhi_epi16(__U, __A, __B);
}
TEST_CONSTEXPR(match_v16hi(_mm256_maskz_mulhi_epi16(0xF00F, (__m256i)(__v16hi){+1, -2, +3, -4, +5, -6, +7, -8, +9, -10, +11, -12, +13, -14, +15, -16}, (__m256i)(__v16hi){-32, -30, +28, +26, -24, -22, +20, +18, -16, -14, +12, +10, -8, +6, -4, +2}), -1, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1));

__m128i test_mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
// CHECK-LABEL: test_mm_mask_unpackhi_epi8
Expand Down