diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index a34f109ba21ce..8d4208a887cb2 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1075,6 +1075,25 @@ X86 Support ^^^^^^^^^^^ - Remove knl/knm specific ISA supports: AVX512PF, AVX512ER, PREFETCHWT1 +- Support has been removed for the AMD "3DNow!" instruction-set. + Neither modern AMD CPUs, nor any Intel CPUs implement these + instructions, and they were never widely used. + + * The options ``-m3dnow`` and ``-m3dnowa`` are no longer honored, and will emit a warning if used. + * The macros ``__3dNOW__`` and ``__3dNOW_A__`` are no longer ever set by the compiler. + * The header ```` is deprecated, and emits a warning if included. + * The 3dNow intrinsic functions have been removed: ``_m_femms``, + ``_m_pavgusb``, ``_m_pf2id``, ``_m_pfacc``, ``_m_pfadd``, + ``_m_pfcmpeq``, ``_m_pfcmpge``, ``_m_pfcmpgt``, ``_m_pfmax``, + ``_m_pfmin``, ``_m_pfmul``, ``_m_pfrcp``, ``_m_pfrcpit1``, + ``_m_pfrcpit2``, ``_m_pfrsqrt``, ``_m_pfrsqrtit1``, ``_m_pfsub``, + ``_m_pfsubr``, ``_m_pi2fd``, ``_m_pmulhrw``, ``_m_pf2iw``, + ``_m_pfnacc``, ``_m_pfpnacc``, ``_m_pi2fw``, ``_m_pswapdsf``, + ``_m_pswapdsi``. + * The compiler builtins corresponding to each of the above + intrinsics have also been removed (``__builtin_ia32_femms``, and so on). + * "3DNow!" instructions remain supported in assembly code, including + inside inline-assembly. Arm and AArch64 Support ^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def index 7074479786b97..a85e7918f4d7e 100644 --- a/clang/include/clang/Basic/BuiltinsX86.def +++ b/clang/include/clang/Basic/BuiltinsX86.def @@ -37,36 +37,6 @@ TARGET_BUILTIN(__builtin_ia32_undef512, "V8d", "ncV:512:", "") TARGET_BUILTIN(__builtin_ia32_readeflags_u32, "Ui", "n", "") TARGET_BUILTIN(__builtin_ia32_writeeflags_u32, "vUi", "n", "") -// 3DNow! -// -TARGET_BUILTIN(__builtin_ia32_femms, "v", "n", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pavgusb, "V8cV8cV8c", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pf2id, "V2iV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfacc, "V2fV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfadd, "V2fV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfcmpeq, "V2iV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfcmpge, "V2iV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfcmpgt, "V2iV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfmax, "V2fV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfmin, "V2fV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfmul, "V2fV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrcp, "V2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrcpit1, "V2fV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrcpit2, "V2fV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrsqrt, "V2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrsqit1, "V2fV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfsub, "V2fV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfsubr, "V2fV2fV2f", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pi2fd, "V2fV2i", "ncV:64:", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pmulhrw, "V4sV4sV4s", "ncV:64:", "3dnow") -// 3DNow! Extensions (3dnowa). -TARGET_BUILTIN(__builtin_ia32_pf2iw, "V2iV2f", "ncV:64:", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pfnacc, "V2fV2fV2f", "ncV:64:", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pfpnacc, "V2fV2fV2f", "ncV:64:", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pi2fw, "V2fV2i", "ncV:64:", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pswapdsf, "V2fV2f", "ncV:64:", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "ncV:64:", "3dnowa") - // MMX // // All MMX instructions will be generated via builtins. Any MMX vector diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index be177dc38bcf1..e44f1d53b097a 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -6127,10 +6127,6 @@ def mno_80387 : Flag<["-"], "mno-80387">, Alias; def mno_fp_ret_in_387 : Flag<["-"], "mno-fp-ret-in-387">, Alias; def mmmx : Flag<["-"], "mmmx">, Group; def mno_mmx : Flag<["-"], "mno-mmx">, Group; -def m3dnow : Flag<["-"], "m3dnow">, Group; -def mno_3dnow : Flag<["-"], "mno-3dnow">, Group; -def m3dnowa : Flag<["-"], "m3dnowa">, Group; -def mno_3dnowa : Flag<["-"], "mno-3dnowa">, Group; def mamx_bf16 : Flag<["-"], "mamx-bf16">, Group; def mno_amx_bf16 : Flag<["-"], "mno-amx-bf16">, Group; def mamx_complex : Flag<["-"], "mamx-complex">, Group; @@ -6364,6 +6360,12 @@ def mvevpu : Flag<["-"], "mvevpu">, Group, def mno_vevpu : Flag<["-"], "mno-vevpu">, Group; } // let Flags = [TargetSpecific] +// Unsupported X86 feature flags (triggers a warning) +def m3dnow : Flag<["-"], "m3dnow">; +def mno_3dnow : Flag<["-"], "mno-3dnow">; +def m3dnowa : Flag<["-"], "m3dnowa">; +def mno_3dnowa : Flag<["-"], "mno-3dnowa">; + // These are legacy user-facing driver-level option spellings. They are always // aliases for options that are spelled using the more common Unix / GNU flag // style of double-dash and equals-joined flags. diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 1f6fc842ddd95..121a2c2d795fe 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -258,7 +258,9 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, if (Feature[0] != '+') continue; - if (Feature == "+aes") { + if (Feature == "+mmx") { + HasMMX = true; + } else if (Feature == "+aes") { HasAES = true; } else if (Feature == "+vaes") { HasVAES = true; @@ -487,13 +489,6 @@ bool X86TargetInfo::handleTargetFeatures(std::vector &Features, // for bfloat16 arithmetic operations in the front-end. HasBFloat16 = SSELevel >= SSE2; - MMX3DNowEnum ThreeDNowLevel = llvm::StringSwitch(Feature) - .Case("+3dnowa", AMD3DNowAthlon) - .Case("+3dnow", AMD3DNow) - .Case("+mmx", MMX) - .Default(NoMMX3DNow); - MMX3DNowLevel = std::max(MMX3DNowLevel, ThreeDNowLevel); - XOPEnum XLevel = llvm::StringSwitch(Feature) .Case("+xop", XOP) .Case("+fma4", FMA4) @@ -1031,18 +1026,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, } // Each case falls through to the previous one here. - switch (MMX3DNowLevel) { - case AMD3DNowAthlon: - Builder.defineMacro("__3dNOW_A__"); - [[fallthrough]]; - case AMD3DNow: - Builder.defineMacro("__3dNOW__"); - [[fallthrough]]; - case MMX: + if (HasMMX) { Builder.defineMacro("__MMX__"); - [[fallthrough]]; - case NoMMX3DNow: - break; } if (CPU >= CK_i486 || CPU == CK_None) { @@ -1061,8 +1046,6 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts, bool X86TargetInfo::isValidFeatureName(StringRef Name) const { return llvm::StringSwitch(Name) - .Case("3dnow", true) - .Case("3dnowa", true) .Case("adx", true) .Case("aes", true) .Case("amx-bf16", true) @@ -1232,9 +1215,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const { .Case("widekl", HasWIDEKL) .Case("lwp", HasLWP) .Case("lzcnt", HasLZCNT) - .Case("mm3dnow", MMX3DNowLevel >= AMD3DNow) - .Case("mm3dnowa", MMX3DNowLevel >= AMD3DNowAthlon) - .Case("mmx", MMX3DNowLevel >= MMX) + .Case("mmx", HasMMX) .Case("movbe", HasMOVBE) .Case("movdiri", HasMOVDIRI) .Case("movdir64b", HasMOVDIR64B) diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index a70711f4ae2bb..cdec41afd1a4b 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -67,12 +67,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { AVX2, AVX512F } SSELevel = NoSSE; - enum MMX3DNowEnum { - NoMMX3DNow, - MMX, - AMD3DNow, - AMD3DNowAthlon - } MMX3DNowLevel = NoMMX3DNow; + bool HasMMX = false; enum XOPEnum { NoXOP, SSE4A, FMA4, XOP } XOPLevel = NoXOP; enum AddrSpace { ptr32_sptr = 270, ptr32_uptr = 271, ptr64 = 272 }; @@ -348,8 +343,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { return "avx512"; if (getTriple().getArch() == llvm::Triple::x86_64 && SSELevel >= AVX) return "avx"; - if (getTriple().getArch() == llvm::Triple::x86 && - MMX3DNowLevel == NoMMX3DNow) + if (getTriple().getArch() == llvm::Triple::x86 && !HasMMX) return "no-mmx"; return ""; } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a54fa7bf87aad..f2c913496f2f5 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -15969,14 +15969,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, return Builder.CreateCall(F, {Ops[0]}); } - // 3DNow! - case X86::BI__builtin_ia32_pswapdsf: - case X86::BI__builtin_ia32_pswapdsi: { - llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext()); - Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast"); - llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd); - return Builder.CreateCall(F, Ops, "pswapd"); - } case X86::BI__builtin_ia32_rdrand16_step: case X86::BI__builtin_ia32_rdrand32_step: case X86::BI__builtin_ia32_rdrand64_step: diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp index 9fca7864b2546..2f63333b732f6 100644 --- a/clang/lib/Driver/ToolChains/Arch/X86.cpp +++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp @@ -310,4 +310,17 @@ void x86::getX86TargetFeatures(const Driver &D, const llvm::Triple &Triple, Features.push_back("+prefer-no-scatter"); if (Args.hasArg(options::OPT_mapx_inline_asm_use_gpr32)) Features.push_back("+inline-asm-use-gpr32"); + + // Warn for removed 3dnow support + if (const Arg *A = + Args.getLastArg(options::OPT_m3dnowa, options::OPT_mno_3dnowa, + options::OPT_mno_3dnow)) { + if (A->getOption().matches(options::OPT_m3dnowa)) + D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args); + } + if (const Arg *A = + Args.getLastArg(options::OPT_m3dnow, options::OPT_mno_3dnow)) { + if (A->getOption().matches(options::OPT_m3dnow)) + D.Diag(diag::warn_drv_clang_unsupported) << A->getAsString(Args); + } } diff --git a/clang/lib/Headers/mm3dnow.h b/clang/lib/Headers/mm3dnow.h index 22ab13aa33409..afffba3a9c75e 100644 --- a/clang/lib/Headers/mm3dnow.h +++ b/clang/lib/Headers/mm3dnow.h @@ -7,151 +7,16 @@ *===-----------------------------------------------------------------------=== */ +// 3dNow intrinsics are no longer supported. + #ifndef _MM3DNOW_H_INCLUDED #define _MM3DNOW_H_INCLUDED +#ifndef _CLANG_DISABLE_CRT_DEPRECATION_WARNINGS +#warning "The header is deprecated, and 3dNow! intrinsics are unsupported. For other intrinsics, include , instead." +#endif + #include #include -typedef float __v2sf __attribute__((__vector_size__(8))); - -/* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64))) - -static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow"))) -_m_femms(void) { - __builtin_ia32_femms(); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pavgusb(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pavgusb((__v8qi)__m1, (__v8qi)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pf2id(__m64 __m) { - return (__m64)__builtin_ia32_pf2id((__v2sf)__m); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfacc(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfacc((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfadd(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfadd((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfcmpeq(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfcmpeq((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfcmpge(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfcmpge((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfcmpgt(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfcmpgt((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfmax(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfmax((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfmin(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfmin((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfmul(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfmul((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfrcp(__m64 __m) { - return (__m64)__builtin_ia32_pfrcp((__v2sf)__m); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfrcpit1(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfrcpit1((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfrcpit2(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfrcpit2((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfrsqrt(__m64 __m) { - return (__m64)__builtin_ia32_pfrsqrt((__v2sf)__m); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfrsqrtit1(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfrsqit1((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfsub(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfsub((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfsubr(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfsubr((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pi2fd(__m64 __m) { - return (__m64)__builtin_ia32_pi2fd((__v2si)__m); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pmulhrw(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pmulhrw((__v4hi)__m1, (__v4hi)__m2); -} - -/* Handle the 3dnowa instructions here. */ -#undef __DEFAULT_FN_ATTRS -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64))) - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pf2iw(__m64 __m) { - return (__m64)__builtin_ia32_pf2iw((__v2sf)__m); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfnacc(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfnacc((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pfpnacc(__m64 __m1, __m64 __m2) { - return (__m64)__builtin_ia32_pfpnacc((__v2sf)__m1, (__v2sf)__m2); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pi2fw(__m64 __m) { - return (__m64)__builtin_ia32_pi2fw((__v2si)__m); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pswapdsf(__m64 __m) { - return (__m64)__builtin_ia32_pswapdsf((__v2sf)__m); -} - -static __inline__ __m64 __DEFAULT_FN_ATTRS -_m_pswapdsi(__m64 __m) { - return (__m64)__builtin_ia32_pswapdsi((__v2si)__m); -} - -#undef __DEFAULT_FN_ATTRS - #endif diff --git a/clang/lib/Headers/x86intrin.h b/clang/lib/Headers/x86intrin.h index c20bfbb8fe46e..f42e9e580f883 100644 --- a/clang/lib/Headers/x86intrin.h +++ b/clang/lib/Headers/x86intrin.h @@ -14,10 +14,6 @@ #include -#if !defined(__SCE__) || __has_feature(modules) || defined(__3dNOW__) -#include -#endif - #if !defined(__SCE__) || __has_feature(modules) || defined(__PRFCHW__) #include #endif diff --git a/clang/test/CodeGen/X86/3dnow-builtins.c b/clang/test/CodeGen/X86/3dnow-builtins.c deleted file mode 100644 index af754b71555c4..0000000000000 --- a/clang/test/CodeGen/X86/3dnow-builtins.c +++ /dev/null @@ -1,181 +0,0 @@ -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-unknown-unknown -target-feature +3dnowa -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=GCC -check-prefix=CHECK -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-scei-ps4 -target-feature +3dnowa -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=PS4 -check-prefix=CHECK -// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-sie-ps5 -target-feature +3dnowa -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=PS4 -check-prefix=CHECK - - -#include - -__m64 test_m_pavgusb(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pavgusb - // GCC-LABEL: define{{.*}} double @test_m_pavgusb - // CHECK: @llvm.x86.3dnow.pavgusb - return _m_pavgusb(m1, m2); -} - -__m64 test_m_pf2id(__m64 m) { - // PS4-LABEL: define{{.*}} i64 @test_m_pf2id - // GCC-LABEL: define{{.*}} double @test_m_pf2id - // CHECK: @llvm.x86.3dnow.pf2id - return _m_pf2id(m); -} - -__m64 test_m_pfacc(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfacc - // GCC-LABEL: define{{.*}} double @test_m_pfacc - // CHECK: @llvm.x86.3dnow.pfacc - return _m_pfacc(m1, m2); -} - -__m64 test_m_pfadd(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfadd - // GCC-LABEL: define{{.*}} double @test_m_pfadd - // CHECK: @llvm.x86.3dnow.pfadd - return _m_pfadd(m1, m2); -} - -__m64 test_m_pfcmpeq(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfcmpeq - // GCC-LABEL: define{{.*}} double @test_m_pfcmpeq - // CHECK: @llvm.x86.3dnow.pfcmpeq - return _m_pfcmpeq(m1, m2); -} - -__m64 test_m_pfcmpge(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfcmpge - // GCC-LABEL: define{{.*}} double @test_m_pfcmpge - // CHECK: @llvm.x86.3dnow.pfcmpge - return _m_pfcmpge(m1, m2); -} - -__m64 test_m_pfcmpgt(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfcmpgt - // GCC-LABEL: define{{.*}} double @test_m_pfcmpgt - // CHECK: @llvm.x86.3dnow.pfcmpgt - return _m_pfcmpgt(m1, m2); -} - -__m64 test_m_pfmax(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfmax - // GCC-LABEL: define{{.*}} double @test_m_pfmax - // CHECK: @llvm.x86.3dnow.pfmax - return _m_pfmax(m1, m2); -} - -__m64 test_m_pfmin(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfmin - // GCC-LABEL: define{{.*}} double @test_m_pfmin - // CHECK: @llvm.x86.3dnow.pfmin - return _m_pfmin(m1, m2); -} - -__m64 test_m_pfmul(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfmul - // GCC-LABEL: define{{.*}} double @test_m_pfmul - // CHECK: @llvm.x86.3dnow.pfmul - return _m_pfmul(m1, m2); -} - -__m64 test_m_pfrcp(__m64 m) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfrcp - // GCC-LABEL: define{{.*}} double @test_m_pfrcp - // CHECK: @llvm.x86.3dnow.pfrcp - return _m_pfrcp(m); -} - -__m64 test_m_pfrcpit1(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfrcpit1 - // GCC-LABEL: define{{.*}} double @test_m_pfrcpit1 - // CHECK: @llvm.x86.3dnow.pfrcpit1 - return _m_pfrcpit1(m1, m2); -} - -__m64 test_m_pfrcpit2(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfrcpit2 - // GCC-LABEL: define{{.*}} double @test_m_pfrcpit2 - // CHECK: @llvm.x86.3dnow.pfrcpit2 - return _m_pfrcpit2(m1, m2); -} - -__m64 test_m_pfrsqrt(__m64 m) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfrsqrt - // GCC-LABEL: define{{.*}} double @test_m_pfrsqrt - // CHECK: @llvm.x86.3dnow.pfrsqrt - return _m_pfrsqrt(m); -} - -__m64 test_m_pfrsqrtit1(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfrsqrtit1 - // GCC-LABEL: define{{.*}} double @test_m_pfrsqrtit1 - // CHECK: @llvm.x86.3dnow.pfrsqit1 - return _m_pfrsqrtit1(m1, m2); -} - -__m64 test_m_pfsub(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfsub - // GCC-LABEL: define{{.*}} double @test_m_pfsub - // CHECK: @llvm.x86.3dnow.pfsub - return _m_pfsub(m1, m2); -} - -__m64 test_m_pfsubr(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfsubr - // GCC-LABEL: define{{.*}} double @test_m_pfsubr - // CHECK: @llvm.x86.3dnow.pfsubr - return _m_pfsubr(m1, m2); -} - -__m64 test_m_pi2fd(__m64 m) { - // PS4-LABEL: define{{.*}} i64 @test_m_pi2fd - // GCC-LABEL: define{{.*}} double @test_m_pi2fd - // CHECK: @llvm.x86.3dnow.pi2fd - return _m_pi2fd(m); -} - -__m64 test_m_pmulhrw(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pmulhrw - // GCC-LABEL: define{{.*}} double @test_m_pmulhrw - // CHECK: @llvm.x86.3dnow.pmulhrw - return _m_pmulhrw(m1, m2); -} - -__m64 test_m_pf2iw(__m64 m) { - // PS4-LABEL: define{{.*}} i64 @test_m_pf2iw - // GCC-LABEL: define{{.*}} double @test_m_pf2iw - // CHECK: @llvm.x86.3dnowa.pf2iw - return _m_pf2iw(m); -} - -__m64 test_m_pfnacc(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfnacc - // GCC-LABEL: define{{.*}} double @test_m_pfnacc - // CHECK: @llvm.x86.3dnowa.pfnacc - return _m_pfnacc(m1, m2); -} - -__m64 test_m_pfpnacc(__m64 m1, __m64 m2) { - // PS4-LABEL: define{{.*}} i64 @test_m_pfpnacc - // GCC-LABEL: define{{.*}} double @test_m_pfpnacc - // CHECK: @llvm.x86.3dnowa.pfpnacc - return _m_pfpnacc(m1, m2); -} - -__m64 test_m_pi2fw(__m64 m) { - // PS4-LABEL: define{{.*}} i64 @test_m_pi2fw - // GCC-LABEL: define{{.*}} double @test_m_pi2fw - // CHECK: @llvm.x86.3dnowa.pi2fw - return _m_pi2fw(m); -} - -__m64 test_m_pswapdsf(__m64 m) { - // PS4-LABEL: define{{.*}} i64 @test_m_pswapdsf - // GCC-LABEL: define{{.*}} double @test_m_pswapdsf - // CHECK: @llvm.x86.3dnowa.pswapd - return _m_pswapdsf(m); -} - -__m64 test_m_pswapdsi(__m64 m) { - // PS4-LABEL: define{{.*}} i64 @test_m_pswapdsi - // GCC-LABEL: define{{.*}} double @test_m_pswapdsi - // CHECK: @llvm.x86.3dnowa.pswapd - return _m_pswapdsi(m); -} diff --git a/clang/test/CodeGen/builtins-x86.c b/clang/test/CodeGen/builtins-x86.c index e0f220dbeafcc..de31a4db5b0c1 100644 --- a/clang/test/CodeGen/builtins-x86.c +++ b/clang/test/CodeGen/builtins-x86.c @@ -3,7 +3,6 @@ // RUN: %clang_cc1 -DUSE_64 -DOPENCL -x cl -cl-std=CL2.0 -triple x86_64-unknown-unknown -target-feature +fxsr -target-feature +avx -target-feature +xsaveopt -target-feature +xsaves -target-feature +xsavec -target-feature +mwaitx -target-feature +clzero -target-feature +shstk -target-feature +wbnoinvd -target-feature +cldemote -emit-llvm -o %t %s #ifdef USE_ALL -#define USE_3DNOW #define USE_64 #define USE_SSE4 #endif @@ -96,9 +95,6 @@ void f0(void) { V4s tmp_V4s; V2i tmp_V2i; V1LLi tmp_V1LLi; -#ifdef USE_3DNOW - V2f tmp_V2f; -#endif // 128-bit V16c tmp_V16c; @@ -513,33 +509,7 @@ void f0(void) { __builtin_ia32_maskstorepd256(tmp_V4dp, tmp_V4LLi, tmp_V4d); __builtin_ia32_maskstoreps256(tmp_V8fp, tmp_V8i, tmp_V8f); -#ifdef USE_3DNOW - tmp_V8c = __builtin_ia32_pavgusb(tmp_V8c, tmp_V8c); - tmp_V2i = __builtin_ia32_pf2id(tmp_V2f); - tmp_V2f = __builtin_ia32_pfacc(tmp_V2f, tmp_V2f); - tmp_V2f = __builtin_ia32_pfadd(tmp_V2f, tmp_V2f); - tmp_V2i = __builtin_ia32_pfcmpeq(tmp_V2f, tmp_V2f); - tmp_V2i = __builtin_ia32_pfcmpge(tmp_V2f, tmp_V2f); - tmp_V2i = __builtin_ia32_pfcmpgt(tmp_V2f, tmp_V2f); - tmp_V2f = __builtin_ia32_pfmax(tmp_V2f, tmp_V2f); - tmp_V2f = __builtin_ia32_pfmin(tmp_V2f, tmp_V2f); - tmp_V2f = __builtin_ia32_pfmul(tmp_V2f, tmp_V2f); - tmp_V2f = __builtin_ia32_pfrcp(tmp_V2f); - tmp_V2f = __builtin_ia32_pfrcpit1(tmp_V2f, tmp_V2f); - tmp_V2f = __builtin_ia32_pfrcpit2(tmp_V2f, tmp_V2f); - tmp_V2f = __builtin_ia32_pfrsqrt(tmp_V2f); - tmp_V2f = __builtin_ia32_pfrsqit1(tmp_V2f, tmp_V2f); - tmp_V2f = __builtin_ia32_pfsub(tmp_V2f, tmp_V2f); - tmp_V2f = __builtin_ia32_pfsubr(tmp_V2f, tmp_V2f); - tmp_V2f = __builtin_ia32_pi2fd(tmp_V2i); - tmp_V4s = __builtin_ia32_pmulhrw(tmp_V4s, tmp_V4s); - tmp_V2i = __builtin_ia32_pf2iw(tmp_V2f); - tmp_V2f = __builtin_ia32_pfnacc(tmp_V2f, tmp_V2f); - tmp_V2f = __builtin_ia32_pfpnacc(tmp_V2f, tmp_V2f); - tmp_V2f = __builtin_ia32_pi2fw(tmp_V2i); - tmp_V2f = __builtin_ia32_pswapdsf(tmp_V2f); - tmp_V2i = __builtin_ia32_pswapdsi(tmp_V2i); - +#if USE_ALL tmp_V4i = __builtin_ia32_sha1rnds4(tmp_V4i, tmp_V4i, imm_i_0_4); tmp_V4i = __builtin_ia32_sha1nexte(tmp_V4i, tmp_V4i); tmp_V4i = __builtin_ia32_sha1msg1(tmp_V4i, tmp_V4i); diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c index 6773571556bd4..ba61457102a57 100644 --- a/clang/test/Driver/x86-target-features.c +++ b/clang/test/Driver/x86-target-features.c @@ -8,8 +8,13 @@ // RUN: %clang --target=i386 -march=i386 -mmmx -m3dnow -m3dnowa %s -### 2>&1 | FileCheck -check-prefix=MMX %s // RUN: %clang --target=i386 -march=i386 -mno-mmx -mno-3dnow -mno-3dnowa %s -### 2>&1 | FileCheck -check-prefix=NO-MMX %s -// MMX: "-target-feature" "+mmx" "-target-feature" "+3dnow" "-target-feature" "+3dnowa" -// NO-MMX: "-target-feature" "-mmx" "-target-feature" "-3dnow" "-target-feature" "-3dnowa" +// MMX: warning: the clang compiler does not support '-m3dnowa' +// MMX: warning: the clang compiler does not support '-m3dnow' +// MMX-NOT: "3dnow" +// MMX: "-target-feature" "+mmx" +// MMX-NOT: "3dnow" +// NO-MMX-NOT: warning +// NO-MMX: "-target-feature" "-mmx" // RUN: %clang --target=i386 -march=i386 -msse -msse2 -msse3 -mssse3 -msse4a -msse4.1 -msse4.2 %s -### 2>&1 | FileCheck -check-prefix=SSE %s // RUN: %clang --target=i386 -march=i386 -mno-sse -mno-sse2 -mno-sse3 -mno-ssse3 -mno-sse4a -mno-sse4.1 -mno-sse4.2 %s -### 2>&1 | FileCheck -check-prefix=NO-SSE %s diff --git a/clang/test/Headers/mm3dnow.c b/clang/test/Headers/mm3dnow.c index 255483cb9b836..a9b6dd88f8034 100644 --- a/clang/test/Headers/mm3dnow.c +++ b/clang/test/Headers/mm3dnow.c @@ -1,16 +1,21 @@ // RUN: %clang_cc1 -fsyntax-only -ffreestanding %s -verify +// RUN: %clang_cc1 -fsyntax-only -D_CLANG_DISABLE_CRT_DEPRECATION_WARNINGS -ffreestanding %s -verify // RUN: %clang_cc1 -fsyntax-only -ffreestanding -x c++ %s -verify -// expected-no-diagnostics #if defined(i386) || defined(__x86_64__) +#ifndef _CLANG_DISABLE_CRT_DEPRECATION_WARNINGS +// expected-warning@mm3dnow.h:*{{The header is deprecated}} +#else +// expected-no-diagnostics +#endif + #include -int __attribute__((__target__(("3dnow")))) foo(int a) { - _m_femms(); +int foo(void *x) { + _m_prefetch(x); + _m_prefetchw(x); return 4; } - -__m64 __attribute__((__target__(("3dnowa")))) bar(__m64 a) { - return _m_pf2iw(a); -} +#else +// expected-no-diagnostics #endif diff --git a/clang/test/Preprocessor/predefined-arch-macros.c b/clang/test/Preprocessor/predefined-arch-macros.c index f0a2ef851287f..6f470d85ca563 100644 --- a/clang/test/Preprocessor/predefined-arch-macros.c +++ b/clang/test/Preprocessor/predefined-arch-macros.c @@ -99,7 +99,6 @@ // RUN: %clang -march=winchip2 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_WINCHIP2_M32 -// CHECK_WINCHIP2_M32: #define __3dNOW__ 1 // CHECK_WINCHIP2_M32: #define __MMX__ 1 // CHECK_WINCHIP2_M32: #define __i386 1 // CHECK_WINCHIP2_M32: #define __i386__ 1 @@ -115,7 +114,6 @@ // RUN: %clang -march=c3 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_C3_M32 -// CHECK_C3_M32: #define __3dNOW__ 1 // CHECK_C3_M32: #define __MMX__ 1 // CHECK_C3_M32: #define __i386 1 // CHECK_C3_M32: #define __i386__ 1 @@ -2707,8 +2705,6 @@ // RUN: %clang -march=geode -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_GEODE_M32 -// CHECK_GEODE_M32: #define __3dNOW_A__ 1 -// CHECK_GEODE_M32: #define __3dNOW__ 1 // CHECK_GEODE_M32: #define __MMX__ 1 // CHECK_GEODE_M32: #define __geode 1 // CHECK_GEODE_M32: #define __geode__ 1 @@ -2739,7 +2735,6 @@ // RUN: %clang -march=k6-2 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K6_2_M32 -// CHECK_K6_2_M32: #define __3dNOW__ 1 // CHECK_K6_2_M32: #define __MMX__ 1 // CHECK_K6_2_M32: #define __i386 1 // CHECK_K6_2_M32: #define __i386__ 1 @@ -2757,7 +2752,6 @@ // RUN: %clang -march=k6-3 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K6_3_M32 -// CHECK_K6_3_M32: #define __3dNOW__ 1 // CHECK_K6_3_M32: #define __MMX__ 1 // CHECK_K6_3_M32: #define __i386 1 // CHECK_K6_3_M32: #define __i386__ 1 @@ -2775,8 +2769,6 @@ // RUN: %clang -march=athlon -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_M32 -// CHECK_ATHLON_M32: #define __3dNOW_A__ 1 -// CHECK_ATHLON_M32: #define __3dNOW__ 1 // CHECK_ATHLON_M32: #define __MMX__ 1 // CHECK_ATHLON_M32: #define __athlon 1 // CHECK_ATHLON_M32: #define __athlon__ 1 @@ -2792,8 +2784,6 @@ // RUN: %clang -march=athlon-tbird -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_TBIRD_M32 -// CHECK_ATHLON_TBIRD_M32: #define __3dNOW_A__ 1 -// CHECK_ATHLON_TBIRD_M32: #define __3dNOW__ 1 // CHECK_ATHLON_TBIRD_M32: #define __MMX__ 1 // CHECK_ATHLON_TBIRD_M32: #define __athlon 1 // CHECK_ATHLON_TBIRD_M32: #define __athlon__ 1 @@ -2809,8 +2799,6 @@ // RUN: %clang -march=athlon-4 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_4_M32 -// CHECK_ATHLON_4_M32: #define __3dNOW_A__ 1 -// CHECK_ATHLON_4_M32: #define __3dNOW__ 1 // CHECK_ATHLON_4_M32: #define __MMX__ 1 // CHECK_ATHLON_4_M32: #define __SSE__ 1 // CHECK_ATHLON_4_M32: #define __athlon 1 @@ -2829,8 +2817,6 @@ // RUN: %clang -march=athlon-xp -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_XP_M32 -// CHECK_ATHLON_XP_M32: #define __3dNOW_A__ 1 -// CHECK_ATHLON_XP_M32: #define __3dNOW__ 1 // CHECK_ATHLON_XP_M32: #define __MMX__ 1 // CHECK_ATHLON_XP_M32: #define __SSE__ 1 // CHECK_ATHLON_XP_M32: #define __athlon 1 @@ -2849,8 +2835,6 @@ // RUN: %clang -march=athlon-mp -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_MP_M32 -// CHECK_ATHLON_MP_M32: #define __3dNOW_A__ 1 -// CHECK_ATHLON_MP_M32: #define __3dNOW__ 1 // CHECK_ATHLON_MP_M32: #define __MMX__ 1 // CHECK_ATHLON_MP_M32: #define __SSE__ 1 // CHECK_ATHLON_MP_M32: #define __athlon 1 @@ -2881,8 +2865,6 @@ // RUN: %clang -march=k8 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_M32 -// CHECK_K8_M32: #define __3dNOW_A__ 1 -// CHECK_K8_M32: #define __3dNOW__ 1 // CHECK_K8_M32: #define __MMX__ 1 // CHECK_K8_M32: #define __SSE2__ 1 // CHECK_K8_M32: #define __SSE__ 1 @@ -2896,8 +2878,6 @@ // RUN: %clang -march=k8 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_M64 -// CHECK_K8_M64: #define __3dNOW_A__ 1 -// CHECK_K8_M64: #define __3dNOW__ 1 // CHECK_K8_M64: #define __MMX__ 1 // CHECK_K8_M64: #define __SSE2_MATH__ 1 // CHECK_K8_M64: #define __SSE2__ 1 @@ -2914,8 +2894,6 @@ // RUN: %clang -march=k8-sse3 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_SSE3_M32 -// CHECK_K8_SSE3_M32: #define __3dNOW_A__ 1 -// CHECK_K8_SSE3_M32: #define __3dNOW__ 1 // CHECK_K8_SSE3_M32: #define __MMX__ 1 // CHECK_K8_SSE3_M32: #define __SSE2__ 1 // CHECK_K8_SSE3_M32: #define __SSE3__ 1 @@ -2930,8 +2908,6 @@ // RUN: %clang -march=k8-sse3 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_K8_SSE3_M64 -// CHECK_K8_SSE3_M64: #define __3dNOW_A__ 1 -// CHECK_K8_SSE3_M64: #define __3dNOW__ 1 // CHECK_K8_SSE3_M64: #define __MMX__ 1 // CHECK_K8_SSE3_M64: #define __SSE2_MATH__ 1 // CHECK_K8_SSE3_M64: #define __SSE2__ 1 @@ -2949,8 +2925,6 @@ // RUN: %clang -march=opteron -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_OPTERON_M32 -// CHECK_OPTERON_M32: #define __3dNOW_A__ 1 -// CHECK_OPTERON_M32: #define __3dNOW__ 1 // CHECK_OPTERON_M32: #define __MMX__ 1 // CHECK_OPTERON_M32: #define __SSE2__ 1 // CHECK_OPTERON_M32: #define __SSE__ 1 @@ -2964,8 +2938,6 @@ // RUN: %clang -march=opteron -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_OPTERON_M64 -// CHECK_OPTERON_M64: #define __3dNOW_A__ 1 -// CHECK_OPTERON_M64: #define __3dNOW__ 1 // CHECK_OPTERON_M64: #define __MMX__ 1 // CHECK_OPTERON_M64: #define __SSE2_MATH__ 1 // CHECK_OPTERON_M64: #define __SSE2__ 1 @@ -2982,8 +2954,6 @@ // RUN: %clang -march=opteron-sse3 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_OPTERON_SSE3_M32 -// CHECK_OPTERON_SSE3_M32: #define __3dNOW_A__ 1 -// CHECK_OPTERON_SSE3_M32: #define __3dNOW__ 1 // CHECK_OPTERON_SSE3_M32: #define __MMX__ 1 // CHECK_OPTERON_SSE3_M32: #define __SSE2__ 1 // CHECK_OPTERON_SSE3_M32: #define __SSE3__ 1 @@ -2998,8 +2968,6 @@ // RUN: %clang -march=opteron-sse3 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_OPTERON_SSE3_M64 -// CHECK_OPTERON_SSE3_M64: #define __3dNOW_A__ 1 -// CHECK_OPTERON_SSE3_M64: #define __3dNOW__ 1 // CHECK_OPTERON_SSE3_M64: #define __MMX__ 1 // CHECK_OPTERON_SSE3_M64: #define __SSE2_MATH__ 1 // CHECK_OPTERON_SSE3_M64: #define __SSE2__ 1 @@ -3017,8 +2985,6 @@ // RUN: %clang -march=athlon64 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON64_M32 -// CHECK_ATHLON64_M32: #define __3dNOW_A__ 1 -// CHECK_ATHLON64_M32: #define __3dNOW__ 1 // CHECK_ATHLON64_M32: #define __MMX__ 1 // CHECK_ATHLON64_M32: #define __SSE2__ 1 // CHECK_ATHLON64_M32: #define __SSE__ 1 @@ -3032,8 +2998,6 @@ // RUN: %clang -march=athlon64 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON64_M64 -// CHECK_ATHLON64_M64: #define __3dNOW_A__ 1 -// CHECK_ATHLON64_M64: #define __3dNOW__ 1 // CHECK_ATHLON64_M64: #define __MMX__ 1 // CHECK_ATHLON64_M64: #define __SSE2_MATH__ 1 // CHECK_ATHLON64_M64: #define __SSE2__ 1 @@ -3050,8 +3014,6 @@ // RUN: %clang -march=athlon64-sse3 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON64_SSE3_M32 -// CHECK_ATHLON64_SSE3_M32: #define __3dNOW_A__ 1 -// CHECK_ATHLON64_SSE3_M32: #define __3dNOW__ 1 // CHECK_ATHLON64_SSE3_M32: #define __MMX__ 1 // CHECK_ATHLON64_SSE3_M32: #define __SSE2__ 1 // CHECK_ATHLON64_SSE3_M32: #define __SSE3__ 1 @@ -3066,8 +3028,6 @@ // RUN: %clang -march=athlon64-sse3 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON64_SSE3_M64 -// CHECK_ATHLON64_SSE3_M64: #define __3dNOW_A__ 1 -// CHECK_ATHLON64_SSE3_M64: #define __3dNOW__ 1 // CHECK_ATHLON64_SSE3_M64: #define __MMX__ 1 // CHECK_ATHLON64_SSE3_M64: #define __SSE2_MATH__ 1 // CHECK_ATHLON64_SSE3_M64: #define __SSE2__ 1 @@ -3085,8 +3045,6 @@ // RUN: %clang -march=athlon-fx -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_FX_M32 -// CHECK_ATHLON_FX_M32: #define __3dNOW_A__ 1 -// CHECK_ATHLON_FX_M32: #define __3dNOW__ 1 // CHECK_ATHLON_FX_M32: #define __MMX__ 1 // CHECK_ATHLON_FX_M32: #define __SSE2__ 1 // CHECK_ATHLON_FX_M32: #define __SSE__ 1 @@ -3100,8 +3058,6 @@ // RUN: %clang -march=athlon-fx -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ATHLON_FX_M64 -// CHECK_ATHLON_FX_M64: #define __3dNOW_A__ 1 -// CHECK_ATHLON_FX_M64: #define __3dNOW__ 1 // CHECK_ATHLON_FX_M64: #define __MMX__ 1 // CHECK_ATHLON_FX_M64: #define __SSE2_MATH__ 1 // CHECK_ATHLON_FX_M64: #define __SSE2__ 1 @@ -3118,8 +3074,6 @@ // RUN: %clang -march=amdfam10 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_AMDFAM10_M32 -// CHECK_AMDFAM10_M32: #define __3dNOW_A__ 1 -// CHECK_AMDFAM10_M32: #define __3dNOW__ 1 // CHECK_AMDFAM10_M32: #define __LAHF_SAHF__ 1 // CHECK_AMDFAM10_M32: #define __LZCNT__ 1 // CHECK_AMDFAM10_M32: #define __MMX__ 1 @@ -3141,8 +3095,6 @@ // RUN: %clang -march=amdfam10 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_AMDFAM10_M64 -// CHECK_AMDFAM10_M64: #define __3dNOW_A__ 1 -// CHECK_AMDFAM10_M64: #define __3dNOW__ 1 // CHECK_AMDFAM10_M64: #define __GCC_HAVE_SYNC_COMPARE_AND_SWAP_16 1 // CHECK_AMDFAM10_M64: #define __LAHF_SAHF__ 1 // CHECK_AMDFAM10_M64: #define __LZCNT__ 1 @@ -3167,8 +3119,6 @@ // RUN: %clang -march=btver1 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BTVER1_M32 -// CHECK_BTVER1_M32-NOT: #define __3dNOW_A__ 1 -// CHECK_BTVER1_M32-NOT: #define __3dNOW__ 1 // CHECK_BTVER1_M32: #define __LAHF_SAHF__ 1 // CHECK_BTVER1_M32: #define __LZCNT__ 1 // CHECK_BTVER1_M32: #define __MMX__ 1 @@ -3190,8 +3140,6 @@ // RUN: %clang -march=btver1 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BTVER1_M64 -// CHECK_BTVER1_M64-NOT: #define __3dNOW_A__ 1 -// CHECK_BTVER1_M64-NOT: #define __3dNOW__ 1 // CHECK_BTVER1_M64: #define __LAHF_SAHF__ 1 // CHECK_BTVER1_M64: #define __LZCNT__ 1 // CHECK_BTVER1_M64: #define __MMX__ 1 @@ -3215,8 +3163,6 @@ // RUN: %clang -march=btver2 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BTVER2_M32 -// CHECK_BTVER2_M32-NOT: #define __3dNOW_A__ 1 -// CHECK_BTVER2_M32-NOT: #define __3dNOW__ 1 // CHECK_BTVER2_M32: #define __AES__ 1 // CHECK_BTVER2_M32: #define __AVX__ 1 // CHECK_BTVER2_M32: #define __BMI__ 1 @@ -3245,8 +3191,6 @@ // RUN: %clang -march=btver2 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BTVER2_M64 -// CHECK_BTVER2_M64-NOT: #define __3dNOW_A__ 1 -// CHECK_BTVER2_M64-NOT: #define __3dNOW__ 1 // CHECK_BTVER2_M64: #define __AES__ 1 // CHECK_BTVER2_M64: #define __AVX__ 1 // CHECK_BTVER2_M64: #define __BMI__ 1 @@ -3277,8 +3221,6 @@ // RUN: %clang -march=bdver1 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER1_M32 -// CHECK_BDVER1_M32-NOT: #define __3dNOW_A__ 1 -// CHECK_BDVER1_M32-NOT: #define __3dNOW__ 1 // CHECK_BDVER1_M32: #define __AES__ 1 // CHECK_BDVER1_M32: #define __AVX__ 1 // CHECK_BDVER1_M32: #define __FMA4__ 1 @@ -3308,8 +3250,6 @@ // RUN: %clang -march=bdver1 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER1_M64 -// CHECK_BDVER1_M64-NOT: #define __3dNOW_A__ 1 -// CHECK_BDVER1_M64-NOT: #define __3dNOW__ 1 // CHECK_BDVER1_M64: #define __AES__ 1 // CHECK_BDVER1_M64: #define __AVX__ 1 // CHECK_BDVER1_M64: #define __FMA4__ 1 @@ -3341,8 +3281,6 @@ // RUN: %clang -march=bdver2 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER2_M32 -// CHECK_BDVER2_M32-NOT: #define __3dNOW_A__ 1 -// CHECK_BDVER2_M32-NOT: #define __3dNOW__ 1 // CHECK_BDVER2_M32: #define __AES__ 1 // CHECK_BDVER2_M32: #define __AVX__ 1 // CHECK_BDVER2_M32: #define __BMI__ 1 @@ -3376,8 +3314,6 @@ // RUN: %clang -march=bdver2 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER2_M64 -// CHECK_BDVER2_M64-NOT: #define __3dNOW_A__ 1 -// CHECK_BDVER2_M64-NOT: #define __3dNOW__ 1 // CHECK_BDVER2_M64: #define __AES__ 1 // CHECK_BDVER2_M64: #define __AVX__ 1 // CHECK_BDVER2_M64: #define __BMI__ 1 @@ -3413,8 +3349,6 @@ // RUN: %clang -march=bdver3 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER3_M32 -// CHECK_BDVER3_M32-NOT: #define __3dNOW_A__ 1 -// CHECK_BDVER3_M32-NOT: #define __3dNOW__ 1 // CHECK_BDVER3_M32: #define __AES__ 1 // CHECK_BDVER3_M32: #define __AVX__ 1 // CHECK_BDVER3_M32: #define __BMI__ 1 @@ -3450,8 +3384,6 @@ // RUN: %clang -march=bdver3 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER3_M64 -// CHECK_BDVER3_M64-NOT: #define __3dNOW_A__ 1 -// CHECK_BDVER3_M64-NOT: #define __3dNOW__ 1 // CHECK_BDVER3_M64: #define __AES__ 1 // CHECK_BDVER3_M64: #define __AVX__ 1 // CHECK_BDVER3_M64: #define __BMI__ 1 @@ -3489,8 +3421,6 @@ // RUN: %clang -march=bdver4 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER4_M32 -// CHECK_BDVER4_M32-NOT: #define __3dNOW_A__ 1 -// CHECK_BDVER4_M32-NOT: #define __3dNOW__ 1 // CHECK_BDVER4_M32: #define __AES__ 1 // CHECK_BDVER4_M32: #define __AVX2__ 1 // CHECK_BDVER4_M32: #define __AVX__ 1 @@ -3529,8 +3459,6 @@ // RUN: %clang -march=bdver4 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_BDVER4_M64 -// CHECK_BDVER4_M64-NOT: #define __3dNOW_A__ 1 -// CHECK_BDVER4_M64-NOT: #define __3dNOW__ 1 // CHECK_BDVER4_M64: #define __AES__ 1 // CHECK_BDVER4_M64: #define __AVX2__ 1 // CHECK_BDVER4_M64: #define __AVX__ 1 @@ -3571,8 +3499,6 @@ // RUN: %clang -march=znver1 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER1_M32 -// CHECK_ZNVER1_M32-NOT: #define __3dNOW_A__ 1 -// CHECK_ZNVER1_M32-NOT: #define __3dNOW__ 1 // CHECK_ZNVER1_M32: #define __ADX__ 1 // CHECK_ZNVER1_M32: #define __AES__ 1 // CHECK_ZNVER1_M32: #define __AVX2__ 1 @@ -3618,8 +3544,6 @@ // RUN: %clang -march=znver1 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER1_M64 -// CHECK_ZNVER1_M64-NOT: #define __3dNOW_A__ 1 -// CHECK_ZNVER1_M64-NOT: #define __3dNOW__ 1 // CHECK_ZNVER1_M64: #define __ADX__ 1 // CHECK_ZNVER1_M64: #define __AES__ 1 // CHECK_ZNVER1_M64: #define __AVX2__ 1 @@ -3668,8 +3592,6 @@ // RUN: %clang -march=znver2 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M32 -// CHECK_ZNVER2_M32-NOT: #define __3dNOW_A__ 1 -// CHECK_ZNVER2_M32-NOT: #define __3dNOW__ 1 // CHECK_ZNVER2_M32: #define __ADX__ 1 // CHECK_ZNVER2_M32: #define __AES__ 1 // CHECK_ZNVER2_M32: #define __AVX2__ 1 @@ -3719,8 +3641,6 @@ // RUN: %clang -march=znver2 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER2_M64 -// CHECK_ZNVER2_M64-NOT: #define __3dNOW_A__ 1 -// CHECK_ZNVER2_M64-NOT: #define __3dNOW__ 1 // CHECK_ZNVER2_M64: #define __ADX__ 1 // CHECK_ZNVER2_M64: #define __AES__ 1 // CHECK_ZNVER2_M64: #define __AVX2__ 1 @@ -3772,8 +3692,6 @@ // RUN: %clang -march=znver3 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER3_M32 -// CHECK_ZNVER3_M32-NOT: #define __3dNOW_A__ 1 -// CHECK_ZNVER3_M32-NOT: #define __3dNOW__ 1 // CHECK_ZNVER3_M32: #define __ADX__ 1 // CHECK_ZNVER3_M32: #define __AES__ 1 // CHECK_ZNVER3_M32: #define __AVX2__ 1 @@ -3823,8 +3741,6 @@ // RUN: %clang -march=znver3 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER3_M64 -// CHECK_ZNVER3_M64-NOT: #define __3dNOW_A__ 1 -// CHECK_ZNVER3_M64-NOT: #define __3dNOW__ 1 // CHECK_ZNVER3_M64: #define __ADX__ 1 // CHECK_ZNVER3_M64: #define __AES__ 1 // CHECK_ZNVER3_M64: #define __AVX2__ 1 @@ -3878,8 +3794,6 @@ // RUN: %clang -march=znver4 -m32 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER4_M32 -// CHECK_ZNVER4_M32-NOT: #define __3dNOW_A__ 1 -// CHECK_ZNVER4_M32-NOT: #define __3dNOW__ 1 // CHECK_ZNVER4_M32: #define __ADX__ 1 // CHECK_ZNVER4_M32: #define __AES__ 1 // CHECK_ZNVER4_M32: #define __AVX2__ 1 @@ -3944,8 +3858,6 @@ // RUN: %clang -march=znver4 -m64 -E -dM %s -o - 2>&1 \ // RUN: -target i386-unknown-linux \ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ZNVER4_M64 -// CHECK_ZNVER4_M64-NOT: #define __3dNOW_A__ 1 -// CHECK_ZNVER4_M64-NOT: #define __3dNOW__ 1 // CHECK_ZNVER4_M64: #define __ADX__ 1 // CHECK_ZNVER4_M64: #define __AES__ 1 // CHECK_ZNVER4_M64: #define __AVX2__ 1 diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c index 6e9c968273a46..5c0b815c8ae6f 100644 --- a/clang/test/Preprocessor/x86_target_features.c +++ b/clang/test/Preprocessor/x86_target_features.c @@ -348,12 +348,12 @@ // RUN: %clang -target i386-unknown-unknown -march=atom -m3dnow -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=3DNOWPRFCHW %s -// 3DNOWPRFCHW: #define __3dNOW__ 1 +// 3DNOWPRFCHW-NOT: #define __3dNOW__ 1 // 3DNOWPRFCHW-NOT: #define __PRFCHW__ 1 // RUN: %clang -target i386-unknown-unknown -march=atom -mno-prfchw -m3dnow -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=3DNOWNOPRFCHW %s -// 3DNOWNOPRFCHW: #define __3dNOW__ 1 +// 3DNOWNOPRFCHW-NOT: #define __3dNOW__ 1 // 3DNOWNOPRFCHW-NOT: #define __PRFCHW__ 1 // RUN: %clang -target i386-unknown-unknown -march=atom -mprfchw -mno-3dnow -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=NO3DNOWPRFCHW %s diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 55b3b486d705d..086bd5db77da2 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -216,6 +216,9 @@ Changes to the X86 Backend - Removed knl/knm specific ISA intrinsics: AVX512PF, AVX512ER, PREFETCHWT1, while assembly encoding/decoding supports are kept. +- Removed ``3DNow!``-specific ISA intrinsics and codegen support. The ``3dnow`` and ``3dnowa`` target features are no longer supported. The intrinsics ``llvm.x86.3dnow.*``, ``llvm.x86.3dnowa.*``, and ``llvm.x86.mmx.femms`` have been removed. Assembly encoding/decoding for the corresponding instructions remains supported. + + Changes to the OCaml bindings ----------------------------- diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td index aee804047e1b0..adc46f9789ebb 100644 --- a/llvm/include/llvm/IR/IntrinsicsX86.td +++ b/llvm/include/llvm/IR/IntrinsicsX86.td @@ -129,83 +129,6 @@ let TargetPrefix = "x86" in { Intrinsic<[], [llvm_ptr_ty], []>; } -//===----------------------------------------------------------------------===// -// 3DNow! - -let TargetPrefix = "x86" in { - def int_x86_3dnow_pavgusb : ClangBuiltin<"__builtin_ia32_pavgusb">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pf2id : ClangBuiltin<"__builtin_ia32_pf2id">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfacc : ClangBuiltin<"__builtin_ia32_pfacc">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pfadd : ClangBuiltin<"__builtin_ia32_pfadd">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pfcmpeq : ClangBuiltin<"__builtin_ia32_pfcmpeq">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pfcmpge : ClangBuiltin<"__builtin_ia32_pfcmpge">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pfcmpgt : ClangBuiltin<"__builtin_ia32_pfcmpgt">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pfmax : ClangBuiltin<"__builtin_ia32_pfmax">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pfmin : ClangBuiltin<"__builtin_ia32_pfmin">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pfmul : ClangBuiltin<"__builtin_ia32_pfmul">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pfrcp : ClangBuiltin<"__builtin_ia32_pfrcp">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfrcpit1 : ClangBuiltin<"__builtin_ia32_pfrcpit1">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pfrcpit2 : ClangBuiltin<"__builtin_ia32_pfrcpit2">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pfrsqrt : ClangBuiltin<"__builtin_ia32_pfrsqrt">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pfrsqit1 : ClangBuiltin<"__builtin_ia32_pfrsqit1">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pfsub : ClangBuiltin<"__builtin_ia32_pfsub">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pfsubr : ClangBuiltin<"__builtin_ia32_pfsubr">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnow_pi2fd : ClangBuiltin<"__builtin_ia32_pi2fd">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnow_pmulhrw : ClangBuiltin<"__builtin_ia32_pmulhrw">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; -} - -//===----------------------------------------------------------------------===// -// 3DNow! extensions - -let TargetPrefix = "x86" in { - def int_x86_3dnowa_pf2iw : ClangBuiltin<"__builtin_ia32_pf2iw">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnowa_pfnacc : ClangBuiltin<"__builtin_ia32_pfnacc">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnowa_pfpnacc : ClangBuiltin<"__builtin_ia32_pfpnacc">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty, llvm_x86mmx_ty], - [IntrNoMem]>; - def int_x86_3dnowa_pi2fw : ClangBuiltin<"__builtin_ia32_pi2fw">, - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; - def int_x86_3dnowa_pswapd : - DefaultAttrsIntrinsic<[llvm_x86mmx_ty], [llvm_x86mmx_ty], [IntrNoMem]>; -} - //===----------------------------------------------------------------------===// // SSE1 @@ -2332,8 +2255,6 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_mmx_emms : ClangBuiltin<"__builtin_ia32_emms">, Intrinsic<[], [], []>; - def int_x86_mmx_femms : ClangBuiltin<"__builtin_ia32_femms">, - Intrinsic<[], [], []>; } // Integer arithmetic ops. diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index 8cb003b838d06..9dafd5e628ca8 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -86,14 +86,8 @@ def FeatureSSE42 : SubtargetFeature<"sse4.2", "X86SSELevel", "SSE42", // The MMX subtarget feature is separate from the rest of the SSE features // because it's important (for odd compatibility reasons) to be able to // turn it off explicitly while allowing SSE+ to be on. -def FeatureMMX : SubtargetFeature<"mmx","X863DNowLevel", "MMX", +def FeatureMMX : SubtargetFeature<"mmx","HasMMX", "true", "Enable MMX instructions">; -def Feature3DNow : SubtargetFeature<"3dnow", "X863DNowLevel", "ThreeDNow", - "Enable 3DNow! instructions", - [FeatureMMX]>; -def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA", - "Enable 3DNow! Athlon instructions", - [Feature3DNow]>; // All x86-64 hardware has SSE2, but we don't mark SSE2 as an implied // feature, because SSE2 can be disabled (e.g. for compiling OS kernels) // without disabling 64-bit mode. Nothing should imply this feature bit. It @@ -1341,7 +1335,6 @@ def ProcessorFeatures { list BarcelonaFeatures = [FeatureX87, FeatureCX8, FeatureSSE4A, - Feature3DNowA, FeatureFXSR, FeatureNOPL, FeatureCX16, @@ -1834,32 +1827,32 @@ def : ProcModel; -def : Proc<"k6-2", [FeatureX87, FeatureCX8, Feature3DNow], +def : Proc<"k6-2", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"k6-3", [FeatureX87, FeatureCX8, Feature3DNow], +def : Proc<"k6-3", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; foreach P = ["athlon", "athlon-tbird"] in { - def : Proc; } foreach P = ["athlon-4", "athlon-xp", "athlon-mp"] in { def : Proc; } foreach P = ["k8", "opteron", "athlon64", "athlon-fx"] in { - def : Proc; } foreach P = ["k8-sse3", "opteron-sse3", "athlon64-sse3"] in { - def : Proc; -def : Proc<"geode", [FeatureX87, FeatureCX8, Feature3DNowA], +def : Proc<"geode", [FeatureX87, FeatureCX8, FeatureMMX, FeaturePRFCHW], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; def : Proc<"winchip-c6", [FeatureX87, FeatureMMX], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"winchip2", [FeatureX87, Feature3DNow], +def : Proc<"winchip2", [FeatureX87, FeatureMMX, FeaturePRFCHW], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; -def : Proc<"c3", [FeatureX87, Feature3DNow], +def : Proc<"c3", [FeatureX87, FeatureMMX, FeaturePRFCHW], [TuningSlowUAMem16, TuningInsertVZEROUPPER]>; def : Proc<"c3-2", [FeatureX87, FeatureCX8, FeatureMMX, FeatureSSE1, FeatureFXSR, FeatureCMOV], diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 1d947ac2346d0..d01cc299f3e18 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -530,7 +530,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, VT, Custom); } - if (Subtarget.hasSSEPrefetch() || Subtarget.hasThreeDNow()) + if (Subtarget.hasSSEPrefetch()) setOperationAction(ISD::PREFETCH , MVT::Other, Custom); setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); diff --git a/llvm/lib/Target/X86/X86Instr3DNow.td b/llvm/lib/Target/X86/X86Instr3DNow.td index 03612de0fad94..13fe7d2ccbe77 100644 --- a/llvm/lib/Target/X86/X86Instr3DNow.td +++ b/llvm/lib/Target/X86/X86Instr3DNow.td @@ -12,7 +12,7 @@ //===----------------------------------------------------------------------===// class I3DNow o, Format F, dag outs, dag ins, string asm, list pat> - : I, Requires<[Has3DNow]> { + : I { } class I3DNow_binop o, Format F, dag ins, string Mnemonic, list pat> @@ -25,66 +25,60 @@ class I3DNow_conv o, Format F, dag ins, string Mnemonic, list pat> : I3DNow, ThreeDNow; -multiclass I3DNow_binop_rm_int opc, string Mn, - X86FoldableSchedWrite sched, bit Commutable = 0, - string Ver = ""> { - let isCommutable = Commutable in - def rr : I3DNow_binop( - !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, VR64:$src2))]>, - Sched<[sched]>; - def rm : I3DNow_binop( - !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src1, - (bitconvert (load_mmx addr:$src2))))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>; +multiclass I3DNow_binop_rm opc, string Mn, + X86FoldableSchedWrite sched, bit Commutable = 0> { + let mayStore=0, hasSideEffects=0 in { + let isCommutable = Commutable, mayLoad=0 in + def rr : I3DNow_binop, Sched<[sched]>; + let mayLoad=1 in + def rm : I3DNow_binop, Sched<[sched.Folded, sched.ReadAfterFold]>; + } } -multiclass I3DNow_conv_rm_int opc, string Mn, - X86FoldableSchedWrite sched, string Ver = ""> { - def rr : I3DNow_conv( - !strconcat("int_x86_3dnow", Ver, "_", Mn)) VR64:$src))]>, - Sched<[sched]>; - def rm : I3DNow_conv( - !strconcat("int_x86_3dnow", Ver, "_", Mn)) - (bitconvert (load_mmx addr:$src))))]>, - Sched<[sched.Folded, sched.ReadAfterFold]>; +multiclass I3DNow_conv_rm opc, string Mn, + X86FoldableSchedWrite sched> { + let mayStore=0, hasSideEffects=0 in { + let mayLoad=0 in + def rr : I3DNow_conv, Sched<[sched]>; + let mayLoad=1 in + def rm : I3DNow_conv, Sched<[sched.Folded, sched.ReadAfterFold]>; + } } -defm PAVGUSB : I3DNow_binop_rm_int<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>; -defm PF2ID : I3DNow_conv_rm_int<0x1D, "pf2id", WriteCvtPS2I>; -defm PFACC : I3DNow_binop_rm_int<0xAE, "pfacc", WriteFAdd>; -defm PFADD : I3DNow_binop_rm_int<0x9E, "pfadd", WriteFAdd, 1>; -defm PFCMPEQ : I3DNow_binop_rm_int<0xB0, "pfcmpeq", WriteFAdd, 1>; -defm PFCMPGE : I3DNow_binop_rm_int<0x90, "pfcmpge", WriteFAdd>; -defm PFCMPGT : I3DNow_binop_rm_int<0xA0, "pfcmpgt", WriteFAdd>; -defm PFMAX : I3DNow_binop_rm_int<0xA4, "pfmax", WriteFAdd>; -defm PFMIN : I3DNow_binop_rm_int<0x94, "pfmin", WriteFAdd>; -defm PFMUL : I3DNow_binop_rm_int<0xB4, "pfmul", WriteFAdd, 1>; -defm PFRCP : I3DNow_conv_rm_int<0x96, "pfrcp", WriteFAdd>; -defm PFRCPIT1 : I3DNow_binop_rm_int<0xA6, "pfrcpit1", WriteFAdd>; -defm PFRCPIT2 : I3DNow_binop_rm_int<0xB6, "pfrcpit2", WriteFAdd>; -defm PFRSQIT1 : I3DNow_binop_rm_int<0xA7, "pfrsqit1", WriteFAdd>; -defm PFRSQRT : I3DNow_conv_rm_int<0x97, "pfrsqrt", WriteFAdd>; -defm PFSUB : I3DNow_binop_rm_int<0x9A, "pfsub", WriteFAdd, 1>; -defm PFSUBR : I3DNow_binop_rm_int<0xAA, "pfsubr", WriteFAdd, 1>; -defm PI2FD : I3DNow_conv_rm_int<0x0D, "pi2fd", WriteCvtI2PS>; -defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>; +defm PAVGUSB : I3DNow_binop_rm<0xBF, "pavgusb", SchedWriteVecALU.MMX, 1>; +defm PF2ID : I3DNow_conv_rm<0x1D, "pf2id", WriteCvtPS2I>; +defm PFACC : I3DNow_binop_rm<0xAE, "pfacc", WriteFAdd>; +defm PFADD : I3DNow_binop_rm<0x9E, "pfadd", WriteFAdd, 1>; +defm PFCMPEQ : I3DNow_binop_rm<0xB0, "pfcmpeq", WriteFAdd, 1>; +defm PFCMPGE : I3DNow_binop_rm<0x90, "pfcmpge", WriteFAdd>; +defm PFCMPGT : I3DNow_binop_rm<0xA0, "pfcmpgt", WriteFAdd>; +defm PFMAX : I3DNow_binop_rm<0xA4, "pfmax", WriteFAdd>; +defm PFMIN : I3DNow_binop_rm<0x94, "pfmin", WriteFAdd>; +defm PFMUL : I3DNow_binop_rm<0xB4, "pfmul", WriteFAdd, 1>; +defm PFRCP : I3DNow_conv_rm<0x96, "pfrcp", WriteFAdd>; +defm PFRCPIT1 : I3DNow_binop_rm<0xA6, "pfrcpit1", WriteFAdd>; +defm PFRCPIT2 : I3DNow_binop_rm<0xB6, "pfrcpit2", WriteFAdd>; +defm PFRSQIT1 : I3DNow_binop_rm<0xA7, "pfrsqit1", WriteFAdd>; +defm PFRSQRT : I3DNow_conv_rm<0x97, "pfrsqrt", WriteFAdd>; +defm PFSUB : I3DNow_binop_rm<0x9A, "pfsub", WriteFAdd, 1>; +defm PFSUBR : I3DNow_binop_rm<0xAA, "pfsubr", WriteFAdd, 1>; +defm PI2FD : I3DNow_conv_rm<0x0D, "pi2fd", WriteCvtI2PS>; +defm PMULHRW : I3DNow_binop_rm<0xB7, "pmulhrw", SchedWriteVecIMul.MMX, 1>; -let SchedRW = [WriteEMMS], - Defs = [MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7, - ST0, ST1, ST2, ST3, ST4, ST5, ST6, ST7] in +let SchedRW = [WriteEMMS], mayLoad=1, mayStore=1, hasSideEffects=1 in def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", - [(int_x86_mmx_femms)]>, TB; + []>, TB; -let SchedRW = [WriteLoad] in { -let Predicates = [Has3DNow, NoSSEPrefetch] in +let SchedRW = [WriteLoad], mayLoad=1, mayStore=1, hasSideEffects=0 in { def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr), "prefetch\t$addr", - [(prefetch addr:$addr, timm, timm, (i32 1))]>, TB; + []>, TB; +// Note: PREFETCHW is the only instruction in this file which is NOT specific to 3DNow! def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr", [(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))]>, TB, Requires<[HasPrefetchW]>; @@ -94,8 +88,8 @@ def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr" } // "3DNowA" instructions -defm PF2IW : I3DNow_conv_rm_int<0x1C, "pf2iw", WriteCvtPS2I, "a">; -defm PI2FW : I3DNow_conv_rm_int<0x0C, "pi2fw", WriteCvtI2PS, "a">; -defm PFNACC : I3DNow_binop_rm_int<0x8A, "pfnacc", WriteFAdd, 0, "a">; -defm PFPNACC : I3DNow_binop_rm_int<0x8E, "pfpnacc", WriteFAdd, 0, "a">; -defm PSWAPD : I3DNow_conv_rm_int<0xBB, "pswapd", SchedWriteShuffle.MMX, "a">; +defm PF2IW : I3DNow_conv_rm<0x1C, "pf2iw", WriteCvtPS2I>; +defm PI2FW : I3DNow_conv_rm<0x0C, "pi2fw", WriteCvtI2PS>; +defm PFNACC : I3DNow_binop_rm<0x8A, "pfnacc", WriteFAdd, 0>; +defm PFPNACC : I3DNow_binop_rm<0x8E, "pfpnacc", WriteFAdd, 0>; +defm PSWAPD : I3DNow_conv_rm<0xBB, "pswapd", SchedWriteShuffle.MMX>; diff --git a/llvm/lib/Target/X86/X86InstrPredicates.td b/llvm/lib/Target/X86/X86InstrPredicates.td index 419ff9e6f5c0f..f6038cf7a94cb 100644 --- a/llvm/lib/Target/X86/X86InstrPredicates.td +++ b/llvm/lib/Target/X86/X86InstrPredicates.td @@ -50,8 +50,6 @@ def HasCMOV : Predicate<"Subtarget->canUseCMOV()">; def NoCMOV : Predicate<"!Subtarget->canUseCMOV()">; def HasNOPL : Predicate<"Subtarget->hasNOPL()">; def HasMMX : Predicate<"Subtarget->hasMMX()">; -def Has3DNow : Predicate<"Subtarget->hasThreeDNow()">; -def Has3DNowA : Predicate<"Subtarget->hasThreeDNowA()">; def HasSSE1 : Predicate<"Subtarget->hasSSE1()">; def UseSSE1 : Predicate<"Subtarget->hasSSE1() && !Subtarget->hasAVX()">; def HasSSE2 : Predicate<"Subtarget->hasSSE2()">; @@ -141,7 +139,6 @@ def HasSGX : Predicate<"Subtarget->hasSGX()">; def HasSM3 : Predicate<"Subtarget->hasSM3()">; def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">; -def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">; def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; def HasPREFETCHI : Predicate<"Subtarget->hasPREFETCHI()">; def HasPrefetchW : Predicate<"Subtarget->hasPrefetchW()">; diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp index 6c9a94c949590..4e8e04b1112c0 100644 --- a/llvm/lib/Target/X86/X86Subtarget.cpp +++ b/llvm/lib/Target/X86/X86Subtarget.cpp @@ -290,8 +290,7 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef TuneCPU, IsUnalignedMem16Slow = false; LLVM_DEBUG(dbgs() << "Subtarget features: SSELevel " << X86SSELevel - << ", 3DNowLevel " << X863DNowLevel << ", 64bit " - << HasX86_64 << "\n"); + << ", MMX " << HasMMX << ", 64bit " << HasX86_64 << "\n"); if (Is64Bit && !HasX86_64) report_fatal_error("64-bit code requested on a subtarget that doesn't " "support it!"); diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 4532db134fcb4..e3cb9ee8ce190 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -55,10 +55,6 @@ class X86Subtarget final : public X86GenSubtargetInfo { NoSSE, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, AVX, AVX2, AVX512 }; - enum X863DNowEnum { - NoThreeDNow, MMX, ThreeDNow, ThreeDNowA - }; - /// Which PIC style to use PICStyles::Style PICStyle; @@ -67,9 +63,6 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or none supported. X86SSEEnum X86SSELevel = NoSSE; - /// MMX, 3DNow, 3DNow Athlon, or none supported. - X863DNowEnum X863DNowLevel = NoThreeDNow; - #define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \ bool ATTRIBUTE = DEFAULT; #include "X86GenSubtargetInfo.inc" @@ -207,21 +200,16 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool hasAVX2() const { return X86SSELevel >= AVX2; } bool hasAVX512() const { return X86SSELevel >= AVX512; } bool hasInt256() const { return hasAVX2(); } - bool hasMMX() const { return X863DNowLevel >= MMX; } - bool hasThreeDNow() const { return X863DNowLevel >= ThreeDNow; } - bool hasThreeDNowA() const { return X863DNowLevel >= ThreeDNowA; } bool hasAnyFMA() const { return hasFMA() || hasFMA4(); } bool hasPrefetchW() const { // The PREFETCHW instruction was added with 3DNow but later CPUs gave it - // its own CPUID bit as part of deprecating 3DNow. We assume the - // L1 version exists if the L2 version does. - return hasThreeDNow() || hasPRFCHW(); + // its own CPUID bit as part of deprecating 3DNow. + return hasPRFCHW(); } bool hasSSEPrefetch() const { - // We implicitly enable these when we have a write prefix supporting cache - // level OR if we have prfchw, but don't already have a read prefetch from - // 3dnow. - return hasSSE1() || (hasPRFCHW() && !hasThreeDNow()) || hasPREFETCHI(); + // We also implicitly enable these when we have a write prefix supporting + // cache level OR if we have prfchw. + return hasSSE1() || hasPRFCHW() || hasPREFETCHI(); } bool canUseLAHFSAHF() const { return hasLAHFSAHF64() || !is64Bit(); } // These are generic getters that OR together all of the thunk types diff --git a/llvm/test/CodeGen/X86/3dnow-intrinsics.ll b/llvm/test/CodeGen/X86/3dnow-intrinsics.ll deleted file mode 100644 index a82f705b77d84..0000000000000 --- a/llvm/test/CodeGen/X86/3dnow-intrinsics.ll +++ /dev/null @@ -1,896 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s --check-prefix=X64 - -define <8 x i8> @test_pavgusb(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone { -; X86-LABEL: test_pavgusb: -; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: pavgusb %mm1, %mm0 -; X86-NEXT: movq %mm0, (%eax) -; X86-NEXT: retl $4 -; -; X64-LABEL: test_pavgusb: -; X64: # %bb.0: # %entry -; X64-NEXT: pavgusb %mm1, %mm0 -; X64-NEXT: movq2dq %mm0, %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast x86_mmx %a.coerce to <8 x i8> - %1 = bitcast x86_mmx %b.coerce to <8 x i8> - %2 = bitcast <8 x i8> %0 to x86_mmx - %3 = bitcast <8 x i8> %1 to x86_mmx - %4 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %2, x86_mmx %3) - %5 = bitcast x86_mmx %4 to <8 x i8> - ret <8 x i8> %5 -} - -declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone - -define <2 x i32> @test_pf2id(<2 x float> %a) nounwind readnone { -; X86-LABEL: test_pf2id: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: pf2id %mm1, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pf2id: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm0, %mm0 -; X64-NEXT: pf2id %mm0, %mm0 -; X64-NEXT: movq2dq %mm0, %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = tail call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %0) - %2 = bitcast x86_mmx %1 to <2 x i32> - ret <2 x i32> %2 -} - -declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone - -define <2 x float> @test_pfacc(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfacc: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfacc %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfacc: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfacc %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone - -define <2 x float> @test_pfadd(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfadd: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfadd %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfadd: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfadd %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone - -define <2 x i32> @test_pfcmpeq(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfcmpeq: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfcmpeq %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfcmpeq: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfcmpeq %mm0, %mm1 -; X64-NEXT: movq2dq %mm1, %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x i32> - ret <2 x i32> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone - -define <2 x i32> @test_pfcmpge(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfcmpge: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfcmpge %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfcmpge: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfcmpge %mm0, %mm1 -; X64-NEXT: movq2dq %mm1, %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x i32> - ret <2 x i32> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone - -define <2 x i32> @test_pfcmpgt(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfcmpgt: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfcmpgt %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfcmpgt: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfcmpgt %mm0, %mm1 -; X64-NEXT: movq2dq %mm1, %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x i32> - ret <2 x i32> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone - -define <2 x float> @test_pfmax(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfmax: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfmax %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfmax: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfmax %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone - -define <2 x float> @test_pfmin(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfmin: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfmin %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfmin: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfmin %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone - -define <2 x float> @test_pfmul(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfmul: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfmul %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfmul: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfmul %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone - -define <2 x float> @test_pfrcp(<2 x float> %a) nounwind readnone { -; X86-LABEL: test_pfrcp: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: pfrcp %mm1, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfrcp: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm0, %mm0 -; X64-NEXT: pfrcp %mm0, %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = tail call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %0) - %2 = bitcast x86_mmx %1 to <2 x float> - ret <2 x float> %2 -} - -declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone - -define <2 x float> @test_pfrcpit1(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfrcpit1: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfrcpit1 %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfrcpit1: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfrcpit1 %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone - -define <2 x float> @test_pfrcpit2(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfrcpit2: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfrcpit2 %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfrcpit2: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfrcpit2 %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone - -define <2 x float> @test_pfrsqrt(<2 x float> %a) nounwind readnone { -; X86-LABEL: test_pfrsqrt: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: pfrsqrt %mm1, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfrsqrt: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm0, %mm0 -; X64-NEXT: pfrsqrt %mm0, %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = tail call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %0) - %2 = bitcast x86_mmx %1 to <2 x float> - ret <2 x float> %2 -} - -declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone - -define <2 x float> @test_pfrsqit1(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfrsqit1: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfrsqit1 %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfrsqit1: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfrsqit1 %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone - -define <2 x float> @test_pfsub(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfsub: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfsub %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfsub: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfsub %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone - -define <2 x float> @test_pfsubr(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfsubr: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfsubr %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfsubr: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfsubr %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone - -define <2 x float> @test_pi2fd(x86_mmx %a.coerce) nounwind readnone { -; X86-LABEL: test_pi2fd: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: pi2fd %mm0, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pi2fd: -; X64: # %bb.0: # %entry -; X64-NEXT: pi2fd %mm0, %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast x86_mmx %a.coerce to <2 x i32> - %1 = bitcast <2 x i32> %0 to x86_mmx - %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone - -define <4 x i16> @test_pmulhrw(x86_mmx %a.coerce, x86_mmx %b.coerce) nounwind readnone { -; X86-LABEL: test_pmulhrw: -; X86: # %bb.0: # %entry -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: pmulhrw %mm1, %mm0 -; X86-NEXT: movq %mm0, (%eax) -; X86-NEXT: retl $4 -; -; X64-LABEL: test_pmulhrw: -; X64: # %bb.0: # %entry -; X64-NEXT: pmulhrw %mm1, %mm0 -; X64-NEXT: movq2dq %mm0, %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast x86_mmx %a.coerce to <4 x i16> - %1 = bitcast x86_mmx %b.coerce to <4 x i16> - %2 = bitcast <4 x i16> %0 to x86_mmx - %3 = bitcast <4 x i16> %1 to x86_mmx - %4 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %2, x86_mmx %3) - %5 = bitcast x86_mmx %4 to <4 x i16> - ret <4 x i16> %5 -} - -declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone - -define <2 x i32> @test_pf2iw(<2 x float> %a) nounwind readnone { -; X86-LABEL: test_pf2iw: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: pf2iw %mm1, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pf2iw: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm0, %mm0 -; X64-NEXT: pf2iw %mm0, %mm0 -; X64-NEXT: movq2dq %mm0, %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = tail call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %0) - %2 = bitcast x86_mmx %1 to <2 x i32> - ret <2 x i32> %2 -} - -declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone - -define <2 x float> @test_pfnacc(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfnacc: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfnacc %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfnacc: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfnacc %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone - -define <2 x float> @test_pfpnacc(<2 x float> %a, <2 x float> %b) nounwind readnone { -; X86-LABEL: test_pfpnacc: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 20(%ebp), %mm0 -; X86-NEXT: movd 16(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm2 -; X86-NEXT: punpckldq %mm0, %mm2 # mm2 = mm2[0],mm0[0] -; X86-NEXT: pfpnacc %mm1, %mm2 -; X86-NEXT: movq %mm2, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pfpnacc: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm1, %mm0 -; X64-NEXT: movdq2q %xmm0, %mm1 -; X64-NEXT: pfpnacc %mm0, %mm1 -; X64-NEXT: movq %mm1, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = bitcast <2 x float> %b to x86_mmx - %2 = tail call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %0, x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone - -define <2 x float> @test_pi2fw(x86_mmx %a.coerce) nounwind readnone { -; X86-LABEL: test_pi2fw: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: pi2fw %mm0, %mm0 -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pi2fw: -; X64: # %bb.0: # %entry -; X64-NEXT: pi2fw %mm0, %mm0 -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast x86_mmx %a.coerce to <2 x i32> - %1 = bitcast <2 x i32> %0 to x86_mmx - %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1) - %3 = bitcast x86_mmx %2 to <2 x float> - ret <2 x float> %3 -} - -declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone - -define <2 x float> @test_pswapdsf(<2 x float> %a) nounwind readnone { -; X86-LABEL: test_pswapdsf: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: pswapd %mm1, %mm0 # mm0 = mm1[1,0] -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: flds {{[0-9]+}}(%esp) -; X86-NEXT: flds (%esp) -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pswapdsf: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm0, %mm0 -; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] -; X64-NEXT: movq %mm0, -{{[0-9]+}}(%rsp) -; X64-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x float> %a to x86_mmx - %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) - %2 = bitcast x86_mmx %1 to <2 x float> - ret <2 x float> %2 -} - -define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone { -; X86-LABEL: test_pswapdsi: -; X86: # %bb.0: # %entry -; X86-NEXT: pushl %ebp -; X86-NEXT: movl %esp, %ebp -; X86-NEXT: andl $-8, %esp -; X86-NEXT: subl $8, %esp -; X86-NEXT: movd 12(%ebp), %mm0 -; X86-NEXT: movd 8(%ebp), %mm1 -; X86-NEXT: punpckldq %mm0, %mm1 # mm1 = mm1[0],mm0[0] -; X86-NEXT: pswapd %mm1, %mm0 # mm0 = mm1[1,0] -; X86-NEXT: movq %mm0, (%esp) -; X86-NEXT: movl (%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %ebp, %esp -; X86-NEXT: popl %ebp -; X86-NEXT: retl -; -; X64-LABEL: test_pswapdsi: -; X64: # %bb.0: # %entry -; X64-NEXT: movdq2q %xmm0, %mm0 -; X64-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] -; X64-NEXT: movq2dq %mm0, %xmm0 -; X64-NEXT: retq -entry: - %0 = bitcast <2 x i32> %a to x86_mmx - %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) - %2 = bitcast x86_mmx %1 to <2 x i32> - ret <2 x i32> %2 -} - -declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone diff --git a/llvm/test/CodeGen/X86/commute-3dnow.ll b/llvm/test/CodeGen/X86/commute-3dnow.ll deleted file mode 100644 index dc3910920365d..0000000000000 --- a/llvm/test/CodeGen/X86/commute-3dnow.ll +++ /dev/null @@ -1,270 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X86 -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+mmx,+3dnow | FileCheck %s --check-prefix=X64 - -define void @commute_m_pfadd(ptr%a0, ptr%a1, ptr%a2) nounwind { -; X86-LABEL: commute_m_pfadd: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movq (%edx), %mm0 -; X86-NEXT: pfadd (%eax), %mm0 -; X86-NEXT: pfadd (%ecx), %mm0 -; X86-NEXT: movq %mm0, (%ecx) -; X86-NEXT: retl -; -; X64-LABEL: commute_m_pfadd: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: pfadd (%rsi), %mm0 -; X64-NEXT: pfadd (%rdx), %mm0 -; X64-NEXT: movq %mm0, (%rdx) -; X64-NEXT: retq - %1 = load x86_mmx, ptr %a0 - %2 = load x86_mmx, ptr %a1 - %3 = load x86_mmx, ptr %a2 - %4 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %1, x86_mmx %2) - %5 = tail call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %3, x86_mmx %4) - store x86_mmx %5, ptr %a2 - ret void -} -declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) - -define void @commute_m_pfsub(ptr%a0, ptr%a1, ptr%a2) nounwind { -; X86-LABEL: commute_m_pfsub: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movq (%edx), %mm0 -; X86-NEXT: pfsub (%eax), %mm0 -; X86-NEXT: pfsubr (%ecx), %mm0 -; X86-NEXT: movq %mm0, (%ecx) -; X86-NEXT: retl -; -; X64-LABEL: commute_m_pfsub: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: pfsub (%rsi), %mm0 -; X64-NEXT: pfsubr (%rdx), %mm0 -; X64-NEXT: movq %mm0, (%rdx) -; X64-NEXT: retq - %1 = load x86_mmx, ptr %a0 - %2 = load x86_mmx, ptr %a1 - %3 = load x86_mmx, ptr %a2 - %4 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %1, x86_mmx %2) - %5 = tail call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %3, x86_mmx %4) - store x86_mmx %5, ptr %a2 - ret void -} -declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) - -define void @commute_m_pfsubr(ptr%a0, ptr%a1, ptr%a2) nounwind { -; X86-LABEL: commute_m_pfsubr: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movq (%edx), %mm0 -; X86-NEXT: pfsubr (%eax), %mm0 -; X86-NEXT: pfsub (%ecx), %mm0 -; X86-NEXT: movq %mm0, (%ecx) -; X86-NEXT: retl -; -; X64-LABEL: commute_m_pfsubr: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: pfsubr (%rsi), %mm0 -; X64-NEXT: pfsub (%rdx), %mm0 -; X64-NEXT: movq %mm0, (%rdx) -; X64-NEXT: retq - %1 = load x86_mmx, ptr %a0 - %2 = load x86_mmx, ptr %a1 - %3 = load x86_mmx, ptr %a2 - %4 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %1, x86_mmx %2) - %5 = tail call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %3, x86_mmx %4) - store x86_mmx %5, ptr %a2 - ret void -} -declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) - -define void @commute_m_pfmul(ptr%a0, ptr%a1, ptr%a2) nounwind { -; X86-LABEL: commute_m_pfmul: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movq (%edx), %mm0 -; X86-NEXT: pfmul (%eax), %mm0 -; X86-NEXT: pfmul (%ecx), %mm0 -; X86-NEXT: movq %mm0, (%ecx) -; X86-NEXT: retl -; -; X64-LABEL: commute_m_pfmul: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: pfmul (%rsi), %mm0 -; X64-NEXT: pfmul (%rdx), %mm0 -; X64-NEXT: movq %mm0, (%rdx) -; X64-NEXT: retq - %1 = load x86_mmx, ptr %a0 - %2 = load x86_mmx, ptr %a1 - %3 = load x86_mmx, ptr %a2 - %4 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %1, x86_mmx %2) - %5 = tail call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %3, x86_mmx %4) - store x86_mmx %5, ptr %a2 - ret void -} -declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) - -; PFMAX can't commute without fast-math. -define void @commute_m_pfmax(ptr%a0, ptr%a1, ptr%a2) nounwind { -; X86-LABEL: commute_m_pfmax: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movq (%edx), %mm0 -; X86-NEXT: movq (%ecx), %mm1 -; X86-NEXT: pfmax (%eax), %mm0 -; X86-NEXT: pfmax %mm0, %mm1 -; X86-NEXT: movq %mm1, (%ecx) -; X86-NEXT: retl -; -; X64-LABEL: commute_m_pfmax: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: movq (%rdx), %mm1 -; X64-NEXT: pfmax (%rsi), %mm0 -; X64-NEXT: pfmax %mm0, %mm1 -; X64-NEXT: movq %mm1, (%rdx) -; X64-NEXT: retq - %1 = load x86_mmx, ptr %a0 - %2 = load x86_mmx, ptr %a1 - %3 = load x86_mmx, ptr %a2 - %4 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %1, x86_mmx %2) - %5 = tail call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %3, x86_mmx %4) - store x86_mmx %5, ptr %a2 - ret void -} -declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) - -; PFMIN can't commute without fast-math. -define void @commute_m_pfmin(ptr%a0, ptr%a1, ptr%a2) nounwind { -; X86-LABEL: commute_m_pfmin: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movq (%edx), %mm0 -; X86-NEXT: movq (%ecx), %mm1 -; X86-NEXT: pfmin (%eax), %mm0 -; X86-NEXT: pfmin %mm0, %mm1 -; X86-NEXT: movq %mm1, (%ecx) -; X86-NEXT: retl -; -; X64-LABEL: commute_m_pfmin: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: movq (%rdx), %mm1 -; X64-NEXT: pfmin (%rsi), %mm0 -; X64-NEXT: pfmin %mm0, %mm1 -; X64-NEXT: movq %mm1, (%rdx) -; X64-NEXT: retq - %1 = load x86_mmx, ptr %a0 - %2 = load x86_mmx, ptr %a1 - %3 = load x86_mmx, ptr %a2 - %4 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %1, x86_mmx %2) - %5 = tail call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %3, x86_mmx %4) - store x86_mmx %5, ptr %a2 - ret void -} -declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) - -define void @commute_m_pfcmpeq(ptr%a0, ptr%a1, ptr%a2) nounwind { -; X86-LABEL: commute_m_pfcmpeq: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movq (%edx), %mm0 -; X86-NEXT: pfcmpeq (%eax), %mm0 -; X86-NEXT: pfcmpeq (%ecx), %mm0 -; X86-NEXT: movq %mm0, (%ecx) -; X86-NEXT: retl -; -; X64-LABEL: commute_m_pfcmpeq: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: pfcmpeq (%rsi), %mm0 -; X64-NEXT: pfcmpeq (%rdx), %mm0 -; X64-NEXT: movq %mm0, (%rdx) -; X64-NEXT: retq - %1 = load x86_mmx, ptr %a0 - %2 = load x86_mmx, ptr %a1 - %3 = load x86_mmx, ptr %a2 - %4 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %1, x86_mmx %2) - %5 = tail call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %3, x86_mmx %4) - store x86_mmx %5, ptr %a2 - ret void -} -declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) - -define void @commute_m_pavgusb(ptr%a0, ptr%a1, ptr%a2) nounwind { -; X86-LABEL: commute_m_pavgusb: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movq (%edx), %mm0 -; X86-NEXT: pavgusb (%eax), %mm0 -; X86-NEXT: pavgusb (%ecx), %mm0 -; X86-NEXT: movq %mm0, (%ecx) -; X86-NEXT: retl -; -; X64-LABEL: commute_m_pavgusb: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: pavgusb (%rsi), %mm0 -; X64-NEXT: pavgusb (%rdx), %mm0 -; X64-NEXT: movq %mm0, (%rdx) -; X64-NEXT: retq - %1 = load x86_mmx, ptr %a0 - %2 = load x86_mmx, ptr %a1 - %3 = load x86_mmx, ptr %a2 - %4 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %1, x86_mmx %2) - %5 = tail call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %3, x86_mmx %4) - store x86_mmx %5, ptr %a2 - ret void -} -declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) - -define void @commute_m_pmulhrw(ptr%a0, ptr%a1, ptr%a2) nounwind { -; X86-LABEL: commute_m_pmulhrw: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movq (%edx), %mm0 -; X86-NEXT: pmulhrw (%eax), %mm0 -; X86-NEXT: pmulhrw (%ecx), %mm0 -; X86-NEXT: movq %mm0, (%ecx) -; X86-NEXT: retl -; -; X64-LABEL: commute_m_pmulhrw: -; X64: # %bb.0: -; X64-NEXT: movq (%rdi), %mm0 -; X64-NEXT: pmulhrw (%rsi), %mm0 -; X64-NEXT: pmulhrw (%rdx), %mm0 -; X64-NEXT: movq %mm0, (%rdx) -; X64-NEXT: retq - %1 = load x86_mmx, ptr %a0 - %2 = load x86_mmx, ptr %a1 - %3 = load x86_mmx, ptr %a2 - %4 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %1, x86_mmx %2) - %5 = tail call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %3, x86_mmx %4) - store x86_mmx %5, ptr %a2 - ret void -} -declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) diff --git a/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir b/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir index 800af1ce5432e..559560ac20f8a 100644 --- a/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir +++ b/llvm/test/CodeGen/X86/expand-vr64-gr64-copy.mir @@ -1,36 +1,31 @@ -# RUN: llc -run-pass postrapseudos -mtriple=x86_64-unknown-unknown -mattr=+3dnow -o - %s | FileCheck %s +# RUN: llc -run-pass postrapseudos -mtriple=x86_64-unknown-unknown -mattr=+mmx -o - %s | FileCheck %s # This test verifies that the ExpandPostRA pass expands the GR64 <-> VR64 # copies into appropriate MMX_MOV instructions. --- | - define <2 x i32> @test_pswapdsi(<2 x i32> %a) nounwind readnone { + define <2 x i32> @test_paddw(<2 x i32> %a) nounwind readnone { entry: %0 = bitcast <2 x i32> %a to x86_mmx - %1 = tail call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %0) + %1 = tail call x86_mmx @llvm.x86.mmx.padd.w(x86_mmx %0, x86_mmx %0) %2 = bitcast x86_mmx %1 to <2 x i32> ret <2 x i32> %2 } - declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone - ... --- -name: test_pswapdsi +name: test_paddw tracksRegLiveness: true body: | bb.0.entry: liveins: $xmm0 - - $xmm0 = PSHUFDri killed $xmm0, -24 - MOVPQI2QImr $rsp, 1, $noreg, -8, $noreg, killed $xmm0 - $mm0 = PSWAPDrm $rsp, 1, $noreg, -8, $noreg + $mm0 = MMX_MOVDQ2Qrr killed $xmm0 + $mm0 = MMX_PADDWrr killed $mm0, $mm0 + ; Inserted dummy copy here, for test: ; CHECK: $rax = MMX_MOVD64from64rr $mm0 ; CHECK-NEXT: $mm0 = MMX_MOVD64to64rr $rax $rax = COPY $mm0 $mm0 = COPY $rax - MMX_MOVQ64mr $rsp, 1, $noreg, -16, $noreg, killed $mm0 - $xmm0 = MOVQI2PQIrm $rsp, 1, $noreg, -16, $noreg - $xmm0 = PSHUFDri killed $xmm0, -44 - RET64 $xmm0 + $xmm0 = MMX_MOVQ2DQrr killed $mm0 + RET 0, $xmm0 ... diff --git a/llvm/test/CodeGen/X86/pr35982.ll b/llvm/test/CodeGen/X86/pr35982.ll index 4a79a109f8b60..b6022698edaeb 100644 --- a/llvm/test/CodeGen/X86/pr35982.ll +++ b/llvm/test/CodeGen/X86/pr35982.ll @@ -46,50 +46,5 @@ define float @PR35982_emms(<1 x i64>) nounwind { ret float %11 } -define float @PR35982_femms(<1 x i64>) nounwind { -; NO-POSTRA-LABEL: PR35982_femms: -; NO-POSTRA: # %bb.0: -; NO-POSTRA-NEXT: subl $8, %esp -; NO-POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax -; NO-POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; NO-POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1] -; NO-POSTRA-NEXT: movd %mm0, %ecx -; NO-POSTRA-NEXT: femms -; NO-POSTRA-NEXT: movl %eax, (%esp) -; NO-POSTRA-NEXT: fildl (%esp) -; NO-POSTRA-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; NO-POSTRA-NEXT: fiaddl {{[0-9]+}}(%esp) -; NO-POSTRA-NEXT: addl $8, %esp -; NO-POSTRA-NEXT: retl -; -; POSTRA-LABEL: PR35982_femms: -; POSTRA: # %bb.0: -; POSTRA-NEXT: subl $8, %esp -; POSTRA-NEXT: movq {{[0-9]+}}(%esp), %mm0 -; POSTRA-NEXT: movl {{[0-9]+}}(%esp), %eax -; POSTRA-NEXT: punpckhdq %mm0, %mm0 # mm0 = mm0[1,1] -; POSTRA-NEXT: movd %mm0, %ecx -; POSTRA-NEXT: femms -; POSTRA-NEXT: movl %eax, (%esp) -; POSTRA-NEXT: fildl (%esp) -; POSTRA-NEXT: movl %ecx, {{[0-9]+}}(%esp) -; POSTRA-NEXT: fiaddl {{[0-9]+}}(%esp) -; POSTRA-NEXT: addl $8, %esp -; POSTRA-NEXT: retl - %2 = bitcast <1 x i64> %0 to <2 x i32> - %3 = extractelement <2 x i32> %2, i32 0 - %4 = extractelement <1 x i64> %0, i32 0 - %5 = bitcast i64 %4 to x86_mmx - %6 = tail call x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx %5, x86_mmx %5) - %7 = bitcast x86_mmx %6 to <2 x i32> - %8 = extractelement <2 x i32> %7, i32 0 - tail call void @llvm.x86.mmx.femms() - %9 = sitofp i32 %3 to float - %10 = sitofp i32 %8 to float - %11 = fadd float %9, %10 - ret float %11 -} - declare x86_mmx @llvm.x86.mmx.punpckhdq(x86_mmx, x86_mmx) -declare void @llvm.x86.mmx.femms() declare void @llvm.x86.mmx.emms() diff --git a/llvm/test/CodeGen/X86/prefetch.ll b/llvm/test/CodeGen/X86/prefetch.ll index c10e0526787d5..c3551644dfb7f 100644 --- a/llvm/test/CodeGen/X86/prefetch.ll +++ b/llvm/test/CodeGen/X86/prefetch.ll @@ -6,16 +6,11 @@ ; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=X86-PRFCHWSSE ; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=X86-PRFCHWSSE ; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=X86-SSE -; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=X86-3DNOW -; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=X86-3DNOW +; RUN: llc < %s -mtriple=i686-- -mattr=+prfchw | FileCheck %s -check-prefix=X86-PRFCHWSSE ; Rules: -; 3dnow by itself get you just the single prefetch instruction with no hints ; sse provides prefetch0/1/2/nta -; supporting prefetchw, but not 3dnow implicitly provides prefetcht0/1/2/nta regardless of sse setting as we need something to fall back to for the non-write hint. -; 3dnow prefetch instruction will only get used if you have no other prefetch instructions enabled - -; rdar://10538297 +; supporting prefetchw implicitly provides prefetcht0/1/2/nta as well, as we need something to fall back to for the non-write hint. define void @t(ptr %ptr) nounwind { ; X86-SSE-LABEL: t: @@ -43,19 +38,7 @@ define void @t(ptr %ptr) nounwind { ; X86-PRFCHWSSE-NEXT: prefetchw (%eax) ; X86-PRFCHWSSE-NEXT: prefetchw (%eax) ; X86-PRFCHWSSE-NEXT: retl -; -; X86-3DNOW-LABEL: t: -; X86-3DNOW: # %bb.0: # %entry -; X86-3DNOW-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-3DNOW-NEXT: prefetch (%eax) -; X86-3DNOW-NEXT: prefetch (%eax) -; X86-3DNOW-NEXT: prefetch (%eax) -; X86-3DNOW-NEXT: prefetch (%eax) -; X86-3DNOW-NEXT: prefetchw (%eax) -; X86-3DNOW-NEXT: prefetchw (%eax) -; X86-3DNOW-NEXT: prefetchw (%eax) -; X86-3DNOW-NEXT: prefetchw (%eax) -; X86-3DNOW-NEXT: retl + entry: tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 1, i32 1 ) tail call void @llvm.prefetch( ptr %ptr, i32 0, i32 2, i32 1 ) diff --git a/llvm/test/CodeGen/X86/stack-folding-3dnow.ll b/llvm/test/CodeGen/X86/stack-folding-3dnow.ll deleted file mode 100644 index 1cbd61567f327..0000000000000 --- a/llvm/test/CodeGen/X86/stack-folding-3dnow.ll +++ /dev/null @@ -1,387 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+3dnow | FileCheck %s - -define x86_mmx @stack_fold_pavgusb(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pavgusb: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pavgusb {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pf2id(x86_mmx %a) { -; CHECK-LABEL: stack_fold_pf2id: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pf2id {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %a) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pf2iw(x86_mmx %a) { -; CHECK-LABEL: stack_fold_pf2iw: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pf2iw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %a) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfacc(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfacc: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfacc {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfadd(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfadd: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfadd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfcmpeq(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfcmpeq: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfcmpeq {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfcmpge(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfcmpge: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfcmpge {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfcmpgt(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfcmpgt: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfcmpgt {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfmax(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfmax: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfmax {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfmin(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfmin: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfmin {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfmul(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfmul: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfmul {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfnacc(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfnacc: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfnacc {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfpnacc(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfpnacc: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfpnacc {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfrcp(x86_mmx %a) { -; CHECK-LABEL: stack_fold_pfrcp: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfrcp {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %a) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfrcpit1(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfrcpit1: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfrcpit1 {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfrcpit2(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfrcpit2: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfrcpit2 {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfrsqit1(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfrsqit1: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfrsqit1 {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfrsqrt(x86_mmx %a) { -; CHECK-LABEL: stack_fold_pfrsqrt: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfrsqrt {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %a) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfsub(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfsub: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfsub {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pfsubr(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pfsubr: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pfsubr {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pi2fd(x86_mmx %a) { -; CHECK-LABEL: stack_fold_pi2fd: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pi2fd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %a) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pi2fw(x86_mmx %a) { -; CHECK-LABEL: stack_fold_pi2fw: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pi2fw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %a) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pmulhrw(x86_mmx %a, x86_mmx %b) { -; CHECK-LABEL: stack_fold_pmulhrw: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm1, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pmulhrw {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a, x86_mmx %b) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx, x86_mmx) nounwind readnone - -define x86_mmx @stack_fold_pswapd(x86_mmx %a) { -; CHECK-LABEL: stack_fold_pswapd: -; CHECK: # %bb.0: -; CHECK-NEXT: movq %mm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill -; CHECK-NEXT: #APP -; CHECK-NEXT: nop -; CHECK-NEXT: #NO_APP -; CHECK-NEXT: pswapd {{[-0-9]+}}(%r{{[sb]}}p), %mm0 # 8-byte Folded Reload -; CHECK-NEXT: # mm0 = mem[1,0] -; CHECK-NEXT: movq2dq %mm0, %xmm0 -; CHECK-NEXT: retq - %1 = tail call x86_mmx asm sideeffect "nop", "=y,~{mm1},~{mm2},~{mm3},~{mm4},~{mm5},~{mm6},~{mm7}"() - %2 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %a) nounwind readnone - ret x86_mmx %2 -} -declare x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx) nounwind readnone