From 22909c00920365721d8a173fcb33ff4a57e3e999 Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Fri, 29 Mar 2019 09:06:53 -0700 Subject: [PATCH] [WIP] Support for Arm64 Vector ABI Extend HFA support to support vectors as well as floating point types. Fix #16022 --- src/jit/codegenarm64.cpp | 4 +- src/jit/codegenarmarch.cpp | 8 +- src/jit/codegencommon.cpp | 27 +++- src/jit/compiler.cpp | 187 +++++++++++--------------- src/jit/compiler.h | 170 ++++++++++++----------- src/jit/compiler.hpp | 6 +- src/jit/dll/jit.nativeproj | 2 +- src/jit/flowgraph.cpp | 2 +- src/jit/gentree.cpp | 10 +- src/jit/gentree.h | 3 + src/jit/importer.cpp | 10 +- src/jit/lclvars.cpp | 44 +++--- src/jit/lower.cpp | 3 +- src/jit/lsraarmarch.cpp | 2 +- src/jit/lsrabuild.cpp | 14 +- src/jit/morph.cpp | 168 ++++++++++++++--------- src/jit/register_arg_convention.h | 4 +- src/jit/simd.cpp | 2 +- src/jit/target.h | 14 +- src/jit/vartype.h | 124 ++++++++++++++++- src/vm/argdestination.h | 14 +- src/vm/arm64/CallDescrWorkerARM64.asm | 12 +- src/vm/arm64/asmconstants.h | 2 +- src/vm/arm64/asmhelpers.asm | 5 +- src/vm/arm64/calldescrworkerarm64.S | 13 +- src/vm/arm64/cgencpu.h | 2 +- src/vm/callhelpers.h | 5 + src/vm/callingconvention.h | 51 +++++-- src/vm/class.cpp | 148 +++++++++++++++++--- src/vm/class.h | 23 +++- src/vm/methodtable.h | 3 + 31 files changed, 722 insertions(+), 360 deletions(-) diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp index 9cfaccef0851..734e7ea5be0d 100644 --- a/src/jit/codegenarm64.cpp +++ b/src/jit/codegenarm64.cpp @@ -2021,10 +2021,10 @@ void CodeGen::genSimpleReturn(GenTree* treeNode) GenTree* op1 = treeNode->gtGetOp1(); var_types targetType = treeNode->TypeGet(); - assert(!isStructReturn(treeNode)); + assert(targetType != TYP_STRUCT); assert(targetType != TYP_VOID); - regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET; + regNumber retReg = varTypeUsesFloatArgReg(treeNode) ? REG_FLOATRET : REG_INTRET; bool movRequired = (op1->gtRegNum != retReg); diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp index cdb818b93422..3ed47a9d9dae 100644 --- a/src/jit/codegenarmarch.cpp +++ b/src/jit/codegenarmarch.cpp @@ -2355,7 +2355,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call) } else { - assert(!varTypeIsStruct(call)); + assert(call->gtType != TYP_STRUCT); if (call->gtType == TYP_REF) { @@ -2509,9 +2509,13 @@ void CodeGen::genCallInstruction(GenTreeCall* call) // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers. returnReg = REG_PINVOKE_TCB; } + else if (compiler->opts.compUseSoftFP) + { + returnReg = REG_INTRET; + } else #endif // _TARGET_ARM_ - if (varTypeIsFloating(returnType) && !compiler->opts.compUseSoftFP) + if (varTypeUsesFloatArgReg(returnType)) { returnReg = REG_FLOATRET; } diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp index 9946ff33a6db..fb3f0c13dd99 100644 --- a/src/jit/codegencommon.cpp +++ b/src/jit/codegencommon.cpp @@ -10160,7 +10160,11 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass) structPassingKind howToReturnStruct; var_types returnType = getReturnTypeForStruct(hClass, &howToReturnStruct); +#ifdef _TARGET_ARM64_ + return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType)); +#else return (varTypeIsStruct(returnType)); +#endif } //---------------------------------------------- @@ -10169,11 +10173,7 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass) bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass) { -#ifdef FEATURE_HFA - return varTypeIsFloating(GetHfaType(hClass)); -#else - return false; -#endif + return varTypeIsValidHfaType(GetHfaType(hClass)); } bool Compiler::IsHfa(GenTree* tree) @@ -10206,7 +10206,20 @@ var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass) { #ifdef FEATURE_HFA CorInfoType corType = info.compCompHnd->getHFAType(hClass); - if (corType != CORINFO_TYPE_UNDEF) +#ifdef _TARGET_ARM64_ + if (corType == CORINFO_TYPE_VALUECLASS) + { + // This is a vector type. + // HVAs are only supported on ARM64, and only for sizes of 8 or 16 bytes. + // For 8-byte vectors corType will be returned as CORINFO_TYPE_DOUBLE. + result = TYP_SIMD16; + JITDUMP("Found an HVA of SIMD16\n"); + // This type may not appear elsewhere, but it will occupy a floating point register. + compFloatingPointUsed = true; + } + else +#endif // _TARGET_ARM64_ + if (corType != CORINFO_TYPE_UNDEF) { result = JITtype2varType(corType); } @@ -11515,7 +11528,7 @@ void CodeGen::genReturn(GenTree* treeNode) else #endif // _TARGET_X86_ || _TARGET_ARM_ { - if (isStructReturn(treeNode)) + if (targetType == TYP_STRUCT) { genStructReturn(treeNode); } diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp index e005a77c2900..1cc2f600ed16 100644 --- a/src/jit/compiler.cpp +++ b/src/jit/compiler.cpp @@ -573,8 +573,8 @@ bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd) // of size 'structSize'. // We examine 'clsHnd' to check the GC layout of the struct and // return TYP_REF for structs that simply wrap an object. -// If the struct is a one element HFA, we will return the -// proper floating point type. +// If the struct is a one element HFA/HVA, we will return the +// proper floating point or vector type. // // Arguments: // structSize - the size of the struct type, cannot be zero @@ -592,13 +592,64 @@ bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd) // same way as any other 8-byte struct // For ARM32 if we have an HFA struct that wraps a 64-bit double // we will return TYP_DOUBLE. +// For vector calling conventions, a vector is considered a "primitive" +// type, as it is passed in a single register. // var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd, bool isVarArg) { assert(structSize != 0); - var_types useType; + var_types useType = TYP_UNKNOWN; +// Start by determining if we have an HFA/HVA with a single element. +#ifdef FEATURE_HFA +#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) + // Arm64 Windows VarArg methods arguments will not classify HFA types, they will need to be treated + // as if they are not HFA types. + if (!isVarArg) +#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) + { + switch (structSize) + { + case 4: + case 8: +#ifdef _TARGET_ARM64_ + case 16: +#endif // _TARGET_ARM64_ + { + var_types hfaType; +#ifdef ARM_SOFTFP + // For ARM_SOFTFP, HFA is unsupported so we need to check in another way. + // This matters only for size-4 struct because bigger structs would be processed with RetBuf. + if (isSingleFloat32Struct(clsHnd)) + { + hfaType = TYP_FLOAT; + } +#else // !ARM_SOFTFP + hfaType = GetHfaType(clsHnd); +#endif // ARM_SOFTFP + // We're only interested in the case where the struct size is equal to the size of the hfaType. + if (varTypeIsValidHfaType(hfaType)) + { + if (genTypeSize(hfaType) == structSize) + { + useType = hfaType; + } + else + { + return TYP_UNKNOWN; + } + } + } + } + if (useType != TYP_UNKNOWN) + { + return useType; + } + } +#endif // FEATURE_HFA + + // Now deal with non-HFA/HVA structs. switch (structSize) { case 1: @@ -618,15 +669,8 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS #ifdef _TARGET_64BIT_ case 4: - if (IsHfa(clsHnd)) - { - // A structSize of 4 with IsHfa, it must be an HFA of one float - useType = TYP_FLOAT; - } - else - { - useType = TYP_INT; - } + // We dealt with the one-float HFA above. All other 4-byte structs are handled as INT. + useType = TYP_INT; break; #if !defined(_TARGET_XARCH_) || defined(UNIX_AMD64_ABI) @@ -640,86 +684,13 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS #endif // _TARGET_64BIT_ case TARGET_POINTER_SIZE: -#ifdef ARM_SOFTFP - // For ARM_SOFTFP, HFA is unsupported so we need to check in another way - // This matters only for size-4 struct cause bigger structs would be processed with RetBuf - if (isSingleFloat32Struct(clsHnd)) -#else // !ARM_SOFTFP - if (IsHfa(clsHnd) -#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) - // Arm64 Windows VarArg methods arguments will not - // classify HFA types, they will need to be treated - // as if they are not HFA types. - && !isVarArg -#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) - ) -#endif // ARM_SOFTFP - { -#ifdef _TARGET_64BIT_ - var_types hfaType = GetHfaType(clsHnd); - - // A structSize of 8 with IsHfa, we have two possiblities: - // An HFA of one double or an HFA of two floats - // - // Check and exclude the case of an HFA of two floats - if (hfaType == TYP_DOUBLE) - { - // We have an HFA of one double - useType = TYP_DOUBLE; - } - else - { - assert(hfaType == TYP_FLOAT); - - // We have an HFA of two floats - // This should be passed or returned in two FP registers - useType = TYP_UNKNOWN; - } -#else // a 32BIT target - // A structSize of 4 with IsHfa, it must be an HFA of one float - useType = TYP_FLOAT; -#endif // _TARGET_64BIT_ - } - else - { - BYTE gcPtr = 0; - // Check if this pointer-sized struct is wrapping a GC object - info.compCompHnd->getClassGClayout(clsHnd, &gcPtr); - useType = getJitGCType(gcPtr); - } - break; - -#ifdef _TARGET_ARM_ - case 8: - if (IsHfa(clsHnd)) - { - var_types hfaType = GetHfaType(clsHnd); - - // A structSize of 8 with IsHfa, we have two possiblities: - // An HFA of one double or an HFA of two floats - // - // Check and exclude the case of an HFA of two floats - if (hfaType == TYP_DOUBLE) - { - // We have an HFA of one double - useType = TYP_DOUBLE; - } - else - { - assert(hfaType == TYP_FLOAT); - - // We have an HFA of two floats - // This should be passed or returned in two FP registers - useType = TYP_UNKNOWN; - } - } - else - { - // We don't have an HFA - useType = TYP_UNKNOWN; - } - break; -#endif // _TARGET_ARM_ + { + BYTE gcPtr = 0; + // Check if this pointer-sized struct is wrapping a GC object + info.compCompHnd->getClassGClayout(clsHnd, &gcPtr); + useType = getJitGCType(gcPtr); + } + break; default: useType = TYP_UNKNOWN; @@ -802,11 +773,11 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, else #endif // UNIX_AMD64_ABI - // The largest primitive type is 8 bytes (TYP_DOUBLE) + // The largest arg passed in a single register is MAX_PASS_SINGLEREG_BYTES, // so we can skip calling getPrimitiveTypeForStruct when we // have a struct that is larger than that. // - if (structSize <= sizeof(double)) + if (structSize <= MAX_PASS_SINGLEREG_BYTES) { // We set the "primitive" useType based upon the structSize // and also examine the clsHnd to see if it is an HFA of count one @@ -829,14 +800,21 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, // if (structSize <= MAX_PASS_MULTIREG_BYTES) { - // Structs that are HFA's are passed by value in multiple registers - if (IsHfa(clsHnd) + // Structs that are HFA's are passed by value in multiple registers. + // Arm64 Windows VarArg methods arguments will not classify HFA types, they will need to be treated + // as if they are not HFA types. + var_types hfaType; #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) - && !isVarArg // Arm64 Windows VarArg methods arguments will not - // classify HFA types, they will need to be treated - // as if they are not HFA types. -#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) - ) + if (isVarArg) + { + hfaType = TYP_UNDEF; + } + else +#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) + { + hfaType = GetHfaType(clsHnd); + } + if (varTypeIsValidHfaType(hfaType)) { // HFA's of count one should have been handled by getPrimitiveTypeForStruct assert(GetHfaCount(clsHnd) >= 2); @@ -851,7 +829,6 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, { #ifdef UNIX_AMD64_ABI - // The case of (structDesc.eightByteCount == 1) should have already been handled if ((structDesc.eightByteCount > 1) || !structDesc.passedInRegisters) { @@ -1035,10 +1012,10 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, // Check for cases where a small struct is returned in a register // via a primitive type. // - // The largest primitive type is 8 bytes (TYP_DOUBLE) + // The largest "primitive type" is MAX_PASS_SINGLEREG_BYTES // so we can skip calling getPrimitiveTypeForStruct when we // have a struct that is larger than that. - if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= sizeof(double))) + if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= MAX_PASS_SINGLEREG_BYTES)) { // We set the "primitive" useType based upon the structSize // and also examine the clsHnd to see if it is an HFA of count one @@ -1070,7 +1047,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd, // because when HFA are enabled, normally we would use two FP registers to pass or return it // // But if we don't have support for multiple register return types, we have to change this. - // Since we what we have an 8-byte struct (float + float) we change useType to TYP_I_IMPL + // Since what we have is an 8-byte struct (float + float) we change useType to TYP_I_IMPL // so that the struct is returned instead using an 8-byte integer register. // if ((FEATURE_MULTIREG_RET == 0) && (useType == TYP_UNKNOWN) && (structSize == (2 * sizeof(float))) && IsHfa(clsHnd)) diff --git a/src/jit/compiler.h b/src/jit/compiler.h index 400e61b1f3d2..1546048983b8 100644 --- a/src/jit/compiler.h +++ b/src/jit/compiler.h @@ -595,11 +595,9 @@ class LclVarDsc unsigned char lvIsMultiRegRet : 1; // true if this is a multireg LclVar struct assigned from a multireg call #ifdef FEATURE_HFA - unsigned char _lvIsHfa : 1; // Is this a struct variable who's class handle is an HFA type - unsigned char _lvIsHfaRegArg : 1; // Is this a HFA argument variable? // TODO-CLEANUP: Remove this and replace - // with (lvIsRegArg && lvIsHfa()) - unsigned char _lvHfaTypeIsFloat : 1; // Is the HFA type float or double? -#endif // FEATURE_HFA + unsigned char _lvIsHfa : 1; // Is this a struct variable who's class handle is an HFA type + unsigned char _lvHfaLogSize : 2; // The base-2 log of the HFA element size in 4-byte units (e.g. 0 for TYP_FLOAT) +#endif // FEATURE_HFA #ifdef DEBUG // TODO-Cleanup: See the note on lvSize() - this flag is only in use by asserts that are checking for struct @@ -682,54 +680,32 @@ class LclVarDsc bool lvIsHfaRegArg() const { #ifdef FEATURE_HFA - return _lvIsHfaRegArg; + return lvIsRegArg && _lvIsHfa; #else return false; #endif } - void lvSetIsHfaRegArg(bool value = true) - { -#ifdef FEATURE_HFA - _lvIsHfaRegArg = value; -#endif - } - - bool lvHfaTypeIsFloat() const - { -#ifdef FEATURE_HFA - return _lvHfaTypeIsFloat; -#else - return false; -#endif - } - - void lvSetHfaTypeIsFloat(bool value) - { -#ifdef FEATURE_HFA - _lvHfaTypeIsFloat = value; -#endif - } - - // on Arm64 - Returns 1-4 indicating the number of register slots used by the HFA - // on Arm32 - Returns the total number of single FP register slots used by the HFA, max is 8 + //------------------------------------------------------------------------------ + // lvHfaSlots: Get the number of slots used by an HFA local + // + // Return Value: + // On Arm64 - Returns 1-4 indicating the number of register slots used by the HFA + // On Arm32 - Returns the total number of single FP register slots used by the HFA, max is 8 // unsigned lvHfaSlots() const { assert(lvIsHfa()); assert(varTypeIsStruct(lvType)); + unsigned slots = 0; #ifdef _TARGET_ARM_ - return lvExactSize / sizeof(float); -#else // _TARGET_ARM64_ - if (lvHfaTypeIsFloat()) - { - return lvExactSize / sizeof(float); - } - else - { - return lvExactSize / sizeof(double); - } + slots = lvExactSize / sizeof(float); + assert(slots <= 8); +#elif defined(_TARGET_ARM64_) + return lvExactSize >> (2 + _lvHfaLogSize); + assert(slots <= 4); #endif // _TARGET_ARM64_ + return slots; } // lvIsMultiRegArgOrRet() @@ -750,7 +726,7 @@ class LclVarDsc regNumberSmall _lvOtherReg; // Used for "upper half" of long var. #endif // !defined(_TARGET_64BIT_) - regNumberSmall _lvArgReg; // The register in which this argument is passed. + regNumberSmall _lvArgReg; // The (first) register in which this argument is passed. #if FEATURE_MULTIREG_ARGS regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register. @@ -1030,14 +1006,22 @@ class LclVarDsc { return isFloatRegType(lvType) || lvIsHfaRegArg(); } + var_types GetHfaType() const { - return lvIsHfa() ? (lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE) : TYP_UNDEF; +#ifdef FEATURE_HFA + assert(lvIsHfa()); + return hfaTypeForLogSize(_lvHfaLogSize); +#endif // FEATURE_HFA + return TYP_UNDEF; } + void SetHfaType(var_types type) { - assert(varTypeIsFloating(type)); - lvSetHfaTypeIsFloat(type == TYP_FLOAT); +#ifdef FEATURE_HFA + assert(lvIsHfa()); + _lvHfaLogSize = logSizeForHfaType(type); +#endif // FEATURE_HFA } var_types lvaArgType(); @@ -1487,8 +1471,8 @@ struct fgArgTabEntry bool _isSplit : 1; // True when this argument is split between the registers and OutArg area #endif // FEATURE_ARG_SPLIT #ifdef FEATURE_HFA - bool _isHfaArg : 1; // True when the argument is an HFA type. - bool _isDoubleHfa : 1; // True when the argument is an HFA, with an element type of DOUBLE. + bool _isHfaArg : 1; // True when the argument is an HFA type. + unsigned char _hfaLogSize : 2; // The base 2 log of the HFA element size in 4-byte units (e.g. 0 for TYP_FLOAT) #endif bool isLateArg() @@ -1589,10 +1573,12 @@ struct fgArgTabEntry var_types getHfaType() { #ifdef FEATURE_HFA - return _isHfaArg ? (_isDoubleHfa ? TYP_DOUBLE : TYP_FLOAT) : TYP_UNDEF; -#else + if (_isHfaArg) + { + return hfaTypeForLogSize(_hfaLogSize); + } +#endif // FEATURE_HFA return TYP_UNDEF; -#endif } void setHfaType(var_types type, unsigned hfaSlots) @@ -1607,29 +1593,35 @@ struct fgArgTabEntry // Note that hfaSlots is the number of registers we will use. For ARM, that is twice // the number of "double registers". unsigned numHfaRegs = hfaSlots; - if (isPassedInRegisters()) - { #ifdef _TARGET_ARM_ - if (type == TYP_DOUBLE) - { - // Must be an even number of registers. - assert((numRegs & 1) == 0); - numHfaRegs = hfaSlots / 2; - } + if (type == TYP_DOUBLE) + { + // Must be an even number of registers. + assert((numRegs & 1) == 0); + numHfaRegs = hfaSlots / 2; + } #endif // _TARGET_ARM_ - if (_isHfaArg) + + if (!_isHfaArg) + { + // We haven't previously set this; do so now. + unsigned logSize = logSizeForHfaType(type); + if (isPassedInRegisters()) { - // This should already be set correctly. - assert(numRegs == numHfaRegs); - assert(_isDoubleHfa == (type == TYP_DOUBLE)); + numRegs = numHfaRegs; } - else + _isHfaArg = true; + _hfaLogSize = logSize; + } + else + { + // We've already set this; ensure that it's consistent. + if (isPassedInRegisters()) { - numRegs = numHfaRegs; + assert(numRegs == numHfaRegs); } + assert(_hfaLogSize == logSizeForHfaType(type)); } - _isDoubleHfa = (type == TYP_DOUBLE); - _isHfaArg = true; } #endif // FEATURE_HFA } @@ -1701,22 +1693,30 @@ struct fgArgTabEntry { unsigned size = getSlotCount(); #ifdef FEATURE_HFA -#ifdef _TARGET_ARM_ - // We counted the number of regs, but if they are DOUBLE hfa regs we have to double the size. - if (isHfaRegArg && (hfaType == TYP_DOUBLE)) + if (isHfaRegArg) { - assert(!isSplit); - size <<= 1; - } +#ifdef _TARGET_ARM_ + // We counted the number of regs, but if they are DOUBLE hfa regs we have to double the size. + if (hfaType == TYP_DOUBLE) + { + assert(!isSplit); + size <<= 1; + } #elif defined(_TARGET_ARM64_) - // We counted the number of regs, but if they are FLOAT hfa regs we have to halve the size. - if (isHfaRegArg && (hfaType == TYP_FLOAT)) - { - // Round up in case of odd HFA count. - size = (size + 1) >> 1; - } + // We counted the number of regs, but if they are FLOAT hfa regs we have to halve the size, + // or if they are SIMD16 vector hfa regs we have to double the size. + if (hfaType == TYP_FLOAT) + { + // Round up in case of odd HFA count. + size = (size + 1) >> 1; + } + else if (hfaType == TYP_SIMD16) + { + size <<= 1; + } #endif // _TARGET_ARM64_ -#endif + } +#endif // FEATURE_HFA return size; } @@ -7612,6 +7612,18 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // Should we support SIMD intrinsics? bool featureSIMD; + // Should we recognize SIMD types? + // We always do this on ARM64 to support HVA types. + __declspec(property(get = GetSupportSIMDTypes)) bool supportSIMDTypes; + bool GetSupportSIMDTypes() + { +#ifdef _TARGET_ARM64_ + return true; +#else + return featureSIMD; +#endif + } + // Have we identified any SIMD types? // This is currently used by struct promotion to avoid getting type information for a struct // field to see if it is a SIMD type, if we haven't seen any SIMD types or operations in diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp index 901a58ec9e35..0b30114f6768 100644 --- a/src/jit/compiler.hpp +++ b/src/jit/compiler.hpp @@ -2919,7 +2919,7 @@ inline regNumber genMapFloatRegArgNumToRegNum(unsigned argNum) __forceinline regNumber genMapRegArgNumToRegNum(unsigned argNum, var_types type) { - if (varTypeIsFloating(type)) + if (varTypeUsesFloatArgReg(type)) { return genMapFloatRegArgNumToRegNum(argNum); } @@ -2957,7 +2957,7 @@ inline regMaskTP genMapFloatRegArgNumToRegMask(unsigned argNum) __forceinline regMaskTP genMapArgNumToRegMask(unsigned argNum, var_types type) { regMaskTP result; - if (varTypeIsFloating(type)) + if (varTypeUsesFloatArgReg(type)) { result = genMapFloatRegArgNumToRegMask(argNum); #ifdef _TARGET_ARM_ @@ -3076,7 +3076,7 @@ inline unsigned genMapFloatRegNumToRegArgNum(regNumber regNum) inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type) { - if (varTypeIsFloating(type)) + if (varTypeUsesFloatArgReg(type)) { return genMapFloatRegNumToRegArgNum(regNum); } diff --git a/src/jit/dll/jit.nativeproj b/src/jit/dll/jit.nativeproj index 7b8dacb78c52..56e94be9cac8 100644 --- a/src/jit/dll/jit.nativeproj +++ b/src/jit/dll/jit.nativeproj @@ -31,7 +31,7 @@ $(OutputName).def - $(ClDefines);FEATURE_SIMD + $(ClDefines);FEATURE_SIMD $(SdkLibPath)\kernel32.lib;$(SdkLibPath)\user32.lib;$(SdkLibPath)\advapi32.lib;$(SdkLibPath)\oleaut32.lib;$(SdkLibPath)\uuid.lib $(Win32DllLibs);$(ClrLibPath)\utilcode.lib diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp index 3075cf3bc12d..7eebedd04e1f 100644 --- a/src/jit/flowgraph.cpp +++ b/src/jit/flowgraph.cpp @@ -23258,7 +23258,7 @@ GenTreeStmt* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo) if (varTypeIsStruct(argType)) { structHnd = gtGetStructHandleIfPresent(argNode); - noway_assert(structHnd != NO_CLASS_HANDLE); + noway_assert((structHnd != NO_CLASS_HANDLE) || (argType != TYP_STRUCT)); } // Unsafe value cls check is not needed for diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp index 833d115cb2cc..5fa34833e0ba 100644 --- a/src/jit/gentree.cpp +++ b/src/jit/gentree.cpp @@ -17653,7 +17653,8 @@ GenTreeSIMD* Compiler::gtNewSIMDNode( assert(op1 != nullptr); SetOpLclRelatedToSIMDIntrinsic(op1); - return new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, baseType, size); + GenTreeSIMD* simdNode = new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, baseType, size); + return simdNode; } GenTreeSIMD* Compiler::gtNewSIMDNode( @@ -17663,7 +17664,8 @@ GenTreeSIMD* Compiler::gtNewSIMDNode( SetOpLclRelatedToSIMDIntrinsic(op1); SetOpLclRelatedToSIMDIntrinsic(op2); - return new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size); + GenTreeSIMD* simdNode = new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size); + return simdNode; } //------------------------------------------------------------------- @@ -18035,7 +18037,7 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HA case Compiler::SPK_PrimitiveType: { assert(returnType != TYP_UNKNOWN); - assert(!varTypeIsStruct(returnType)); + assert(returnType != TYP_STRUCT); m_regType[0] = returnType; break; } @@ -18046,7 +18048,7 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HA var_types hfaType = comp->GetHfaType(retClsHnd); // We should have an hfa struct type - assert(varTypeIsFloating(hfaType)); + assert(varTypeIsValidHfaType(hfaType)); // Note that the retail build issues a warning about a potential divsion by zero without this Max function unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType))); diff --git a/src/jit/gentree.h b/src/jit/gentree.h index 5d45427a395b..b294748b67c8 100644 --- a/src/jit/gentree.h +++ b/src/jit/gentree.h @@ -3532,6 +3532,9 @@ struct GenTreeCall final : public GenTree return varTypeIsLong(gtType); #elif FEATURE_MULTIREG_RET && defined(_TARGET_ARM_) return varTypeIsLong(gtType) || (varTypeIsStruct(gtType) && !HasRetBufArg()); +#elif defined(FEATURE_HFA) && defined(_TARGET_ARM64_) + // SIMD types are returned in vector regs on ARM64. + return (gtType == TYP_STRUCT) && !HasRetBufArg(); #elif FEATURE_MULTIREG_RET return varTypeIsStruct(gtType) && !HasRetBufArg(); #else diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index ce5c4740c7d3..5a175fd966e3 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -1217,7 +1217,7 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr, // If it is a multi-reg struct return, don't change the oper to GT_LCL_FLD. // That is, the IR will be of the form lclVar = call for multi-reg return // - GenTree* lcl = destAddr->gtOp.gtOp1; + GenTreeLclVar* lcl = destAddr->gtOp.gtOp1->AsLclVar(); if (src->AsCall()->HasMultiRegRetVal()) { // Mark the struct LclVar as used in a MultiReg return context @@ -1227,7 +1227,7 @@ GenTree* Compiler::impAssignStructPtr(GenTree* destAddr, lcl->gtFlags |= GTF_DONT_CSE; lvaTable[lcl->gtLclVarCommon.gtLclNum].lvIsMultiRegRet = true; } - else // The call result is not a multireg return + else if (lcl->gtType != src->gtType) { // We change this to a GT_LCL_FLD (from a GT_ADDR of a GT_LCL_VAR) lcl->ChangeOper(GT_LCL_FLD); @@ -1532,7 +1532,7 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd, #ifdef FEATURE_SIMD // Check to see if this is a SIMD type. - if (featureSIMD && !mayContainGCPtrs) + if (supportSIMDTypes && !mayContainGCPtrs) { unsigned originalSize = info.compCompHnd->getClassSize(structHnd); @@ -9001,7 +9001,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op, CORINFO_CLASS_HANDLE re { // It is possible that we now have a lclVar of scalar type. // If so, don't transform it to GT_LCL_FLD. - if (varTypeIsStruct(lvaTable[op->AsLclVar()->gtLclNum].lvType)) + if (lvaTable[op->AsLclVar()->gtLclNum].lvType != info.compRetNativeType) { op->ChangeOper(GT_LCL_FLD); } @@ -18935,7 +18935,7 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo) if ((!foundSIMDType || (type == TYP_STRUCT)) && isSIMDorHWSIMDClass(&(lclVarInfo[i + argCnt].lclVerTypeInfo))) { foundSIMDType = true; - if (featureSIMD && type == TYP_STRUCT) + if (supportSIMDTypes && type == TYP_STRUCT) { var_types structType = impNormStructType(lclVarInfo[i + argCnt].lclVerTypeInfo.GetClassHandle()); lclVarInfo[i + argCnt].lclTypeInfo = structType; diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp index 0f6cdf217348..7012299ac92f 100644 --- a/src/jit/lclvars.cpp +++ b/src/jit/lclvars.cpp @@ -124,7 +124,7 @@ void Compiler::lvaInitTypeRef() info.compILargsCount = info.compArgsCount; #ifdef FEATURE_SIMD - if (featureSIMD && (info.compRetNativeType == TYP_STRUCT)) + if (supportSIMDTypes && (info.compRetNativeType == TYP_STRUCT)) { var_types structType = impNormStructType(info.compMethodInfo->args.retTypeClass); info.compRetType = structType; @@ -149,7 +149,7 @@ void Compiler::lvaInitTypeRef() if ((howToReturnStruct == SPK_PrimitiveType) || (howToReturnStruct == SPK_EnclosingType)) { assert(returnType != TYP_UNKNOWN); - assert(!varTypeIsStruct(returnType)); + assert(returnType != TYP_STRUCT); info.compRetNativeType = returnType; @@ -397,7 +397,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo) { varDsc->lvType = TYP_BYREF; #ifdef FEATURE_SIMD - if (featureSIMD) + if (supportSIMDTypes) { var_types simdBaseType = TYP_UNKNOWN; var_types type = impNormStructType(info.compClassHnd, nullptr, nullptr, &simdBaseType); @@ -505,7 +505,7 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo) } } #ifdef FEATURE_SIMD - else if (featureSIMD && varTypeIsSIMD(info.compRetType)) + else if (supportSIMDTypes && varTypeIsSIMD(info.compRetType)) { varDsc->lvSIMDType = true; varDsc->lvBaseType = @@ -598,8 +598,9 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) // If the argType is a struct, then check if it is an HFA if (varTypeIsStruct(argType)) { - hfaType = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF - isHfaArg = varTypeIsFloating(hfaType); + // hfaType is set to float, double or SIMD type if it is an HFA, otherwise TYP_UNDEF. + hfaType = GetHfaType(typeHnd); + isHfaArg = varTypeIsValidHfaType(hfaType); } } else if (info.compIsVarArgs) @@ -616,7 +617,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) if (isHfaArg) { - // We have an HFA argument, so from here on out treat the type as a float or double. + // We have an HFA argument, so from here on out treat the type as a float, double or vector. // The orginal struct type is available by using origArgType // We also update the cSlots to be the number of float/double fields in the HFA argType = hfaType; @@ -819,15 +820,20 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo) { // We need to save the fact that this HFA is enregistered varDsc->lvSetIsHfa(); - varDsc->lvSetIsHfaRegArg(); varDsc->SetHfaType(hfaType); - varDsc->lvIsMultiRegArg = (varDsc->lvHfaSlots() > 1); + // Note that we can have HVAs of SIMD types even if we are not recognizing intrinsics. + // In that case, we won't have normalized the vector types on the varDsc, so if we have a single vector + // register, we need to set the type now. Otherwise, later we'll assume this is passed by reference. + if (varDsc->lvHfaSlots() != 1) + { + varDsc->lvIsMultiRegArg = true; + } } varDsc->lvIsRegArg = 1; #if FEATURE_MULTIREG_ARGS - if (varTypeIsStruct(argType)) + if (argType == TYP_STRUCT) { #if defined(UNIX_AMD64_ABI) varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType); @@ -2513,10 +2519,10 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool if (varDsc->lvExactSize <= MAX_PASS_MULTIREG_BYTES) { var_types hfaType = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF - if (varTypeIsFloating(hfaType)) + if (varTypeIsValidHfaType(hfaType)) { varDsc->_lvIsHfa = true; - varDsc->lvSetHfaTypeIsFloat(hfaType == TYP_FLOAT); + varDsc->SetHfaType(hfaType); // hfa variables can never contain GC pointers assert(varDsc->lvStructGcCount == 0); @@ -2588,8 +2594,7 @@ void Compiler::lvaSetStructUsedAsVarArg(unsigned varNum) LclVarDsc* varDsc = &lvaTable[varNum]; // For varargs methods incoming and outgoing arguments should not be treated // as HFA. - varDsc->_lvIsHfa = false; - varDsc->_lvHfaTypeIsFloat = false; + varDsc->_lvIsHfa = false; #endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) #endif // FEATURE_HFA } @@ -6914,16 +6919,9 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r } } - if (varDsc->lvIsHfaRegArg()) + if (varDsc->lvIsHfa()) { - if (varDsc->lvHfaTypeIsFloat()) - { - printf(" (enregistered HFA: float) "); - } - else - { - printf(" (enregistered HFA: double)"); - } + printf(" HFA(%s) ", varTypeName(varDsc->GetHfaType())); } if (varDsc->lvDoNotEnregister) diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp index a2f7080a9d40..c23527612fb7 100644 --- a/src/jit/lower.cpp +++ b/src/jit/lower.cpp @@ -5691,7 +5691,8 @@ void Lowering::ContainCheckRet(GenTreeOp* ret) { GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon(); LclVarDsc* varDsc = &(comp->lvaTable[lclVarCommon->gtLclNum]); - assert(varDsc->lvIsMultiRegRet); + // This must be a multi-reg return or an HFA of a single element. + assert(varDsc->lvIsMultiRegRet || (varDsc->lvIsHfa() && varTypeIsValidHfaType(varDsc->lvType))); // Mark var as contained if not enregistrable. if (!varTypeIsEnregisterableStruct(op1)) diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp index 251bf53ab267..9a54c3227978 100644 --- a/src/jit/lsraarmarch.cpp +++ b/src/jit/lsraarmarch.cpp @@ -208,7 +208,7 @@ int LinearScan::BuildCall(GenTreeCall* call) assert(retTypeDesc != nullptr); dstCandidates = retTypeDesc->GetABIReturnRegs(); } - else if (varTypeIsFloating(registerType)) + else if (varTypeUsesFloatArgReg(registerType)) { dstCandidates = RBM_FLOATRET; } diff --git a/src/jit/lsrabuild.cpp b/src/jit/lsrabuild.cpp index 5d8df339a67d..946756ad6e71 100644 --- a/src/jit/lsrabuild.cpp +++ b/src/jit/lsrabuild.cpp @@ -3060,7 +3060,7 @@ int LinearScan::BuildReturn(GenTree* tree) regMaskTP useCandidates = RBM_NONE; #if FEATURE_MULTIREG_RET - if (varTypeIsStruct(tree)) + if (tree->TypeGet() == TYP_STRUCT) { // op1 has to be either an lclvar or a multi-reg returning call if (op1->OperGet() == GT_LCL_VAR) @@ -3081,6 +3081,16 @@ int LinearScan::BuildReturn(GenTree* tree) return srcCount; } } + else if (varTypeIsSIMD(tree)) + { +#ifdef _TARGET_ARM64_ + useCandidates = allSIMDRegs(); + BuildUse(op1, useCandidates); + return 1; +#else // !_TARGET_ARM64_ + assert(!"Unexpected SIMD return type"); +#endif // !_TARGET_ARM64_ + } else #endif // FEATURE_MULTIREG_RET { @@ -3199,7 +3209,7 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node) GenTreeObj* obj = op1->AsObj(); GenTree* addr = obj->Addr(); unsigned size = obj->gtBlkSize; - assert(size <= TARGET_POINTER_SIZE); + assert(size <= MAX_PASS_SINGLEREG_BYTES); if (addr->OperIsLocalAddr()) { // We don't need a source register. diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp index 2369d1eda53d..2d84fbd09d36 100644 --- a/src/jit/morph.cpp +++ b/src/jit/morph.cpp @@ -828,6 +828,7 @@ void fgArgTabEntry::Dump() { printf("fgArgTabEntry[arg %u", argNum); printf(" %d.%s", node->gtTreeID, GenTree::OpName(node->gtOper)); + printf(" %s", varTypeName(argType)); if (regNum != REG_STK) { printf(", %u reg%s:", numRegs, numRegs == 1 ? "" : "s"); @@ -867,7 +868,7 @@ void fgArgTabEntry::Dump() } if (isHfaRegArg) { - printf(", isHfa"); + printf(", isHfa(%s)", varTypeName(getHfaType())); } if (isBackFilled) { @@ -2300,12 +2301,16 @@ void fgArgInfo::EvalArgsToTemps() { setupArg = compiler->fgMorphCopyBlock(setupArg); #if defined(_TARGET_ARMARCH_) || defined(UNIX_AMD64_ABI) - // This scalar LclVar widening step is only performed for ARM and AMD64 unix. - // - CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum); - unsigned structSize = varDsc->lvExactSize; + if (lclVarType == TYP_STRUCT) + { + // This scalar LclVar widening step is only performed for ARM architectures. + // + CORINFO_CLASS_HANDLE clsHnd = compiler->lvaGetStruct(tmpVarNum); + unsigned structSize = varDsc->lvExactSize; - scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg); + scalarType = + compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg); + } #endif // _TARGET_ARMARCH_ || defined (UNIX_AMD64_ABI) } @@ -2391,7 +2396,7 @@ void fgArgInfo::EvalArgsToTemps() #else // !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI) - if (varTypeIsStruct(defArg)) + if (defArg->TypeGet() == TYP_STRUCT) { clsHnd = compiler->gtGetStructHandleIfPresent(defArg); noway_assert(clsHnd != NO_CLASS_HANDLE); @@ -3079,7 +3084,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call) #ifdef FEATURE_HFA hfaType = GetHfaType(argx); - isHfaArg = varTypeIsFloating(hfaType); + isHfaArg = varTypeIsValidHfaType(hfaType); #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_) // Make sure for vararg methods isHfaArg is not true. @@ -3872,7 +3877,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) { if (isPow2(passingSize)) { - canTransform = true; + canTransform = (!argEntry->isHfaArg || (passingSize == genTypeSize(argEntry->getHfaType()))); } #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI) @@ -3957,15 +3962,16 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } else { - // We have a struct argument that's less than pointer size, and it is either a power of 2, + // We have a struct argument that fits into a register, and it is either a power of 2, // or a local. - // Change our GT_OBJ into a GT_IND of the correct type. + // Change our argument, as needed, into a value of the appropriate type. CLANG_FORMAT_COMMENT_ANCHOR; #ifdef _TARGET_ARM_ assert((size == 1) || ((structBaseType == TYP_DOUBLE) && (size == 2))); #else - assert(size == 1); + assert((size == 1) || + (varTypeIsSIMD(structBaseType) && size == (genTypeSize(structBaseType) / REGSIZE_BYTES))); #endif assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize)); @@ -4043,7 +4049,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) copyBlkClass = objClass; } } - else if (!varTypeIsIntegralOrI(varDsc->TypeGet())) + else if (genActualType(varDsc->TypeGet()) != structBaseType) { // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD argObj->ChangeOper(GT_LCL_FLD); @@ -4057,42 +4063,39 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) } assert(varTypeCanReg(argObj->TypeGet()) || ((copyBlkClass != NO_CLASS_HANDLE) && varTypeCanReg(structBaseType))); - - size = 1; } #endif // !_TARGET_X86_ #ifndef UNIX_AMD64_ABI // We still have a struct unless we converted the GT_OBJ into a GT_IND above... - if (varTypeIsStruct(structBaseType) && !argEntry->passedByRef) + if (isHfaArg && passUsingFloatRegs) { - if (isHfaArg && passUsingFloatRegs) - { - size = argEntry->numRegs; - } - else - { - // If the valuetype size is not a multiple of TARGET_POINTER_SIZE, - // we must copyblk to a temp before doing the obj to avoid - // the obj reading memory past the end of the valuetype - CLANG_FORMAT_COMMENT_ANCHOR; + size = argEntry->numRegs; + } + else if (structBaseType == TYP_STRUCT) + { + // If the valuetype size is not a multiple of TARGET_POINTER_SIZE, + // we must copyblk to a temp before doing the obj to avoid + // the obj reading memory past the end of the valuetype + CLANG_FORMAT_COMMENT_ANCHOR; - if (roundupSize > originalSize) - { - copyBlkClass = objClass; + if (roundupSize > originalSize) + { + copyBlkClass = objClass; - // There are a few special cases where we can omit using a CopyBlk - // where we normally would need to use one. + // There are a few special cases where we can omit using a CopyBlk + // where we normally would need to use one. - if (argObj->gtObj.gtOp1->IsLocalAddrExpr() != nullptr) // Is the source a LclVar? - { - copyBlkClass = NO_CLASS_HANDLE; - } + if (argObj->OperIs(GT_OBJ) && + argObj->AsObj()->gtGetOp1()->IsLocalAddrExpr() != nullptr) // Is the source a LclVar? + { + copyBlkClass = NO_CLASS_HANDLE; } - - size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items } + + size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items } + #endif // !UNIX_AMD64_ABI } } @@ -4159,7 +4162,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call) #if FEATURE_MULTIREG_ARGS if (isStructArg) { - if (size > 1 || isHfaArg) + if ((size > 1) || (isHfaArg && (argx->TypeGet() == TYP_STRUCT))) { hasMultiregStructArgs = true; } @@ -4376,9 +4379,35 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call) } unsigned size = (fgEntryPtr->numRegs + fgEntryPtr->numSlots); - if ((size > 1) || fgEntryPtr->isHfaArg) + if ((size > 1) || (fgEntryPtr->isHfaArg && (argx->TypeGet() == TYP_STRUCT))) { foundStructArg = true; + if (fgEntryPtr->isHfaArg) + { + var_types hfaType = fgEntryPtr->hfaType; + unsigned structSize; + if (argx->OperIs(GT_OBJ)) + { + structSize = argx->AsObj()->gtBlkSize; + } + else + { + assert(argx->OperIs(GT_LCL_VAR)); + structSize = lvaGetDesc(argx->AsLclVar()->gtLclNum)->lvExactSize; + } + assert(structSize > 0); + if (structSize == genTypeSize(hfaType)) + { + if (argx->OperIs(GT_OBJ)) + { + fgMorphBlkToInd(argx->AsObj(), hfaType); + } + else + { + argx->gtType = hfaType; + } + } + } if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST)) { arg = fgMorphMultiregStructArg(arg, fgEntryPtr); @@ -4490,14 +4519,19 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry #if FEATURE_MULTIREG_ARGS // Examine 'arg' and setup argValue objClass and structSize // - CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg); - GenTree* argValue = arg; // normally argValue will be arg, but see right below - unsigned structSize = 0; + CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg); + noway_assert(objClass != NO_CLASS_HANDLE); + GenTree* argValue = arg; // normally argValue will be arg, but see right below + unsigned structSize = 0; - if (arg->OperGet() == GT_OBJ) + if (arg->TypeGet() != TYP_STRUCT) + { + structSize = genTypeSize(arg->TypeGet()); + assert(structSize == info.compCompHnd->getClassSize(objClass)); + } + else if (arg->OperGet() == GT_OBJ) { GenTreeObj* argObj = arg->AsObj(); - objClass = argObj->gtClass; structSize = argObj->Size(); assert(structSize == info.compCompHnd->getClassSize(objClass)); @@ -4527,7 +4561,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry } else { - objClass = gtGetStructHandleIfPresent(arg); structSize = info.compCompHnd->getClassSize(objClass); } noway_assert(objClass != NO_CLASS_HANDLE); @@ -4538,8 +4571,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry unsigned elemSize = 0; var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0 - hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF - if (varTypeIsFloating(hfaType) + hfaType = fgEntryPtr->hfaType; + if (varTypeIsValidHfaType(hfaType) #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) && !fgEntryPtr->isVararg #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) @@ -4657,8 +4690,13 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_) ) { - // We have a HFA struct - noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE)); + // We have a HFA struct. + // Note that GetHfaType may not be the same as elemType, since TYP_SIMD8 is handled the same as TYP_DOUBLE. + var_types useElemType = elemType; +#ifdef _TARGET_ARM64_ + useElemType = (elemType == TYP_SIMD8) ? TYP_DOUBLE : useElemType; +#endif // _TARGET_ARM64_ + noway_assert(useElemType == varDsc->GetHfaType()); noway_assert(elemSize == genTypeSize(elemType)); noway_assert(elemCount == (varDsc->lvExactSize / elemSize)); noway_assert(elemSize * elemCount == varDsc->lvExactSize); @@ -5291,7 +5329,7 @@ void Compiler::fgFixupStructReturn(GenTree* callNode) #if FEATURE_MULTIREG_RET // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer. - assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg); + assert((call->gtType != TYP_STRUCT) || call->HasMultiRegRetVal() || callHasRetBuffArg); #else // !FEATURE_MULTIREG_RET // No more struct returns assert(call->TypeGet() != TYP_STRUCT); @@ -7103,7 +7141,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee) #elif defined(_TARGET_ARM64_) // ARM64 var_types hfaType = GetHfaType(argx); - bool isHfaArg = varTypeIsFloating(hfaType); + bool isHfaArg = varTypeIsValidHfaType(hfaType); size_t size = 1; if (isHfaArg) @@ -17461,23 +17499,28 @@ void Compiler::fgMarkImplicitByRefArgs() if (varDsc->lvIsParam && varTypeIsStruct(varDsc)) { - size_t size; + size_t size = varDsc->lvExactSize; + assert(size == info.compCompHnd->getClassSize(varDsc->lvVerTypeInfo.GetClassHandle())); - if (varDsc->lvSize() > REGSIZE_BYTES) + bool isPassedByReference; +#if defined(_TARGET_AMD64_) + isPassedByReference = (size > REGSIZE_BYTES || (size & (size - 1)) != 0); +#elif defined(_TARGET_ARM64_) + if (size > TARGET_POINTER_SIZE) { - size = varDsc->lvSize(); + CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandleForValueClass(); + structPassingKind howToPassStruct; + var_types type = + getArgTypeForStruct(clsHnd, &howToPassStruct, this->info.compIsVarArgs, varDsc->lvExactSize); + isPassedByReference = (howToPassStruct == SPK_ByReference); } else { - CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle(); - size = info.compCompHnd->getClassSize(typeHnd); + isPassedByReference = false; } - -#if defined(_TARGET_AMD64_) - if (size > REGSIZE_BYTES || (size & (size - 1)) != 0) -#elif defined(_TARGET_ARM64_) - if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc, this->info.compIsVarArgs)) #endif + + if (isPassedByReference) { // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local // So I am now using it to indicate that this is one of the weird implicit @@ -17623,8 +17666,7 @@ void Compiler::fgRetypeImplicitByRefArgs() // the parameter which is really a pointer to the struct. fieldVarDsc->lvIsRegArg = false; fieldVarDsc->lvIsMultiRegArg = false; - fieldVarDsc->lvSetIsHfaRegArg(false); - fieldVarDsc->lvArgReg = REG_NA; + fieldVarDsc->lvArgReg = REG_NA; #if FEATURE_MULTIREG_ARGS fieldVarDsc->lvOtherArgReg = REG_NA; #endif diff --git a/src/jit/register_arg_convention.h b/src/jit/register_arg_convention.h index 28f29b7c13c3..ad20b4a0f543 100644 --- a/src/jit/register_arg_convention.h +++ b/src/jit/register_arg_convention.h @@ -58,7 +58,7 @@ struct InitVarDscInfo // return ref to current register arg for this type unsigned& regArgNum(var_types type) { - return varTypeIsFloating(type) ? floatRegArgNum : intRegArgNum; + return varTypeUsesFloatArgReg(type) ? floatRegArgNum : intRegArgNum; } // Allocate a set of contiguous argument registers. "type" is either an integer @@ -110,7 +110,7 @@ struct InitVarDscInfo // return max register arg for this type unsigned maxRegArgNum(var_types type) { - return varTypeIsFloating(type) ? maxFloatRegArgNum : maxIntRegArgNum; + return varTypeUsesFloatArgReg(type) ? maxFloatRegArgNum : maxIntRegArgNum; } bool enoughAvailRegs(var_types type, unsigned numRegs = 1); diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp index b4cecb3e2046..d2a959abb57b 100644 --- a/src/jit/simd.cpp +++ b/src/jit/simd.cpp @@ -121,7 +121,7 @@ int Compiler::getSIMDTypeAlignment(var_types simdType) // var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, unsigned* sizeBytes /*= nullptr */) { - assert(featureSIMD); + assert(supportSIMDTypes); if (m_simdHandleCache == nullptr) { diff --git a/src/jit/target.h b/src/jit/target.h index 561db79c6ae5..e2707284f785 100644 --- a/src/jit/target.h +++ b/src/jit/target.h @@ -246,6 +246,7 @@ typedef unsigned char regNumberSmall; #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register #define FEATURE_MULTIREG_ARGS 0 // Support for passing a single argument in more than one register #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register + #define MAX_PASS_SINGLEREG_BYTES 8 // Maximum size of a struct passed in a single register (double). #define MAX_PASS_MULTIREG_BYTES 0 // No multireg arguments (note this seems wrong as MAX_ARG_REG_COUNT is 2) #define MAX_RET_MULTIREG_BYTES 8 // Maximum size of a struct that could be returned in more than one register @@ -531,6 +532,7 @@ typedef unsigned char regNumberSmall; #define FEATURE_FASTTAILCALL 1 // Tail calls made as epilog+jmp #define FEATURE_TAILCALL_OPT 1 // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls. #define FEATURE_SET_FLAGS 0 // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set + #define MAX_PASS_SINGLEREG_BYTES 8 // Maximum size of a struct passed in a single register (double). #ifdef UNIX_AMD64_ABI #define FEATURE_MULTIREG_ARGS_OR_RET 1 // Support for passing and/or returning single values in more than one register #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register @@ -912,6 +914,7 @@ typedef unsigned char regNumberSmall; #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register (including passing HFAs) #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register (including HFA returns) #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register + #define MAX_PASS_SINGLEREG_BYTES 8 // Maximum size of a struct passed in a single register (double). #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register (Max is an HFA of 4 doubles) #define MAX_RET_MULTIREG_BYTES 32 // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles) #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA) @@ -1219,9 +1222,10 @@ typedef unsigned char regNumberSmall; #define FEATURE_MULTIREG_ARGS 1 // Support for passing a single argument in more than one register #define FEATURE_MULTIREG_RET 1 // Support for returning a single value in more than one register #define FEATURE_STRUCT_CLASSIFIER 0 // Uses a classifier function to determine is structs are passed/returned in more than one register - #define MAX_PASS_MULTIREG_BYTES 32 // Maximum size of a struct that could be passed in more than one register (max is 4 doubles using an HFA) - #define MAX_RET_MULTIREG_BYTES 32 // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles) - #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA) + #define MAX_PASS_SINGLEREG_BYTES 16 // Maximum size of a struct passed in a single register (16-byte vector). + #define MAX_PASS_MULTIREG_BYTES 64 // Maximum size of a struct that could be passed in more than one register (max is 4 16-byte vectors using an HVA) + #define MAX_RET_MULTIREG_BYTES 64 // Maximum size of a struct that could be returned in more than one register (Max is an HVA of 4 16-byte vectors) + #define MAX_ARG_REG_COUNT 4 // Maximum registers used to pass a single argument in multiple registers. (max is 4 128-bit vectors using an HVA) #define MAX_RET_REG_COUNT 4 // Maximum registers used to return a value. #define NOGC_WRITE_BARRIERS 1 // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers @@ -1944,10 +1948,10 @@ inline regNumber regNextOfType(regNumber reg, var_types type) * Type checks */ -inline bool isFloatRegType(int /* s/b "var_types" */ type) +inline bool isFloatRegType(var_types type) { #if CPU_HAS_FP_SUPPORT - return type == TYP_DOUBLE || type == TYP_FLOAT; + return varTypeUsesFloatReg(type); #else return false; #endif diff --git a/src/jit/vartype.h b/src/jit/vartype.h index 04793ea86830..1d7f3c07fb6c 100644 --- a/src/jit/vartype.h +++ b/src/jit/vartype.h @@ -176,7 +176,7 @@ inline bool varTypeIsI(T vt) template inline bool varTypeCanReg(T vt) { - return ((varTypeClassification[TypeGet(vt)] & (VTF_INT | VTF_I | VTF_FLT)) != 0); + return (TypeGet(vt) != TYP_STRUCT); } template @@ -276,6 +276,128 @@ inline bool varTypeIsEnregisterableStruct(T vt) return (TypeGet(vt) != TYP_STRUCT); } +template +inline bool varTypeUsesFloatReg(T vt) +{ + // Note that not all targets support SIMD, but if they don't, varTypeIsSIMD will + // always return false. + return varTypeIsFloating(vt) || varTypeIsSIMD(vt); +} + +template +inline bool varTypeUsesFloatArgReg(T vt) +{ +#ifdef _TARGET_ARM64_ + // Arm64 passes SIMD types in floating point registers. + return varTypeUsesFloatReg(vt); +#else + // Other targets pass them as regular structs - by reference or by value. + return varTypeIsFloating(vt); +#endif +} + +//------------------------------------------------------------------------ +// varTypeIsValidHfaType: Determine if the type is a valid HFA type +// +// Arguments: +// vt - the type of interest +// +// Return Value: +// Returns true iff the type is a valid HFA type. +// +// Notes: +// This should only be called with the return value from GetHfaType(). +// The only valid values are TYP_UNDEF, for which this returns false, +// TYP_FLOAT, TYP_DOUBLE, or (ARM64-only) TYP_SIMD*. +// +template +inline bool varTypeIsValidHfaType(T vt) +{ +#ifdef FEATURE_HFA + bool isValid = (TypeGet(vt) != TYP_UNDEF); + if (isValid) + { +#ifdef _TARGET_ARM64_ + assert(varTypeUsesFloatReg(vt)); +#else // !_TARGET_ARM64_ + assert(varTypeIsFloating(vt)); +#endif // !_TARGET_ARM64_ + } + return isValid; +#else // !FEATURE_HFA + return false; +#endif // !FEATURE_HFA +} + +#ifdef FEATURE_HFA +//------------------------------------------------------------------------ +// logSizeForHfaType: Get the logSize for an HFA type +// +// Arguments: +// vt - the type of interest +// +// Return Value: +// Returns base 2 log of the size of the type in 4-byte units. +// +// Notes: +// This should only be called with a valid HFA type. +// The only valid values are TYP_FLOAT, TYP_DOUBLE, or (ARM64-only) TYP_SIMD*. +// +template +inline unsigned char logSizeForHfaType(T vt) +{ + unsigned char logSize = 0; + switch (vt) + { + case TYP_FLOAT: + logSize = 0; + break; +#ifdef _TARGET_ARM64_ + case TYP_SIMD16: + logSize = 2; + break; + case TYP_SIMD8: +// We don't distinguish between HFAs of double and HFAs of SIMD8. +#endif // _TARGET_ARM64_ + case TYP_DOUBLE: + logSize = 1; + break; + default: + assert(!"unexpected type for logSizeForHfaType"); + break; + } + return logSize; +} + +// hfaTypeForLogSize: Get HFA type for a given logSize +// +// Arguments: +// logSize - base 2 log of the size of the type in 4-byte units. +// +// Return Value: +// Returns the HFA type +// +inline var_types hfaTypeForLogSize(unsigned char logSize) +{ + switch (logSize) + { + case 0: + return TYP_FLOAT; + case 1: + // We don't distinguish between HFAs of double and HFAs of SIMD8. + return TYP_DOUBLE; +#ifdef _TARGET_ARM64_ + case 2: + return TYP_SIMD16; +#endif // _TARGET_ARM64_ + default: + assert(!"unexpected logSize for logSizeForHfaType"); + return TYP_UNDEF; + } +} + +#endif // FEATURE_HFA + /*****************************************************************************/ #endif // _VARTYPE_H_ /*****************************************************************************/ diff --git a/src/vm/argdestination.h b/src/vm/argdestination.h index 386ba57c821f..8ddd7b210412 100644 --- a/src/vm/argdestination.h +++ b/src/vm/argdestination.h @@ -60,22 +60,24 @@ class ArgDestination // fieldBytes - size of the structure void CopyHFAStructToRegister(void *src, int fieldBytes) { - // We are either copying either a float or double HFA and need to + // We are copying a float, double or vector HFA/HVA and need to // enregister each field. int floatRegCount = m_argLocDescForStructInRegs->m_cFloatReg; - bool typeFloat = m_argLocDescForStructInRegs->m_isSinglePrecision; + int hfaFieldSize = m_argLocDescForStructInRegs->m_hfaFieldSize; UINT64* dest = (UINT64*) this->GetDestinationAddress(); for (int i = 0; i < floatRegCount; ++i) { // Copy 4 or 8 bytes from src. - UINT64 val = typeFloat ? *((UINT32*)src + i) : *((UINT64*)src + i); + UINT64 val = (hfaFieldSize == 4) ? *((UINT32*)src) : *((UINT64*)src); // Always store 8 bytes *(dest++) = val; - // For now, always zero the next 8 bytes. - // (When HVAs are supported we will get the next 8 bytes from src.) - *(dest++) = 0; + // Either zero the next 8 bytes or get the next 8 bytes from src for 16-byte vector. + *(dest++) = (hfaFieldSize == 16) ? *((UINT64*)src + 1) : 0; + + // Increment src by the appropriate amount. + src = (void*)((char*)src + hfaFieldSize); } } diff --git a/src/vm/arm64/CallDescrWorkerARM64.asm b/src/vm/arm64/CallDescrWorkerARM64.asm index fe277ceb6282..9f2ec2461159 100644 --- a/src/vm/arm64/CallDescrWorkerARM64.asm +++ b/src/vm/arm64/CallDescrWorkerARM64.asm @@ -93,7 +93,7 @@ LNoFloatingPoint bne LNoDoubleReturn LFloatReturn - str d0, [x19, #(CallDescrData__returnValue + 0)] + str q0, [x19, #(CallDescrData__returnValue + 0)] b LReturnDone LNoDoubleReturn @@ -117,6 +117,16 @@ LNoFloatHFAReturn LNoDoubleHFAReturn + ;;VectorHFAReturn return case + cmp w3, #64 + bne LNoVectorHFAReturn + + stp q0, q1, [x19, #(CallDescrData__returnValue + 0)] + stp q2, q3, [x19, #(CallDescrData__returnValue + 0x20)] + b LReturnDone + +LNoVectorHFAReturn + EMIT_BREAKPOINT ; Unreachable LIntReturn diff --git a/src/vm/arm64/asmconstants.h b/src/vm/arm64/asmconstants.h index caffa809eb50..8c99ed841967 100644 --- a/src/vm/arm64/asmconstants.h +++ b/src/vm/arm64/asmconstants.h @@ -61,7 +61,7 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__FloatArgumentRegisters == sizeof(FloatArgumentRegi #define CallDescrData__fpReturnSize 0x20 #define CallDescrData__pTarget 0x28 #define CallDescrData__pRetBuffArg 0x30 -#define CallDescrData__returnValue 0x38 +#define CallDescrData__returnValue 0x40 ASMCONSTANTS_C_ASSERT(CallDescrData__pSrc == offsetof(CallDescrData, pSrc)) ASMCONSTANTS_C_ASSERT(CallDescrData__numStackSlots == offsetof(CallDescrData, numStackSlots)) diff --git a/src/vm/arm64/asmhelpers.asm b/src/vm/arm64/asmhelpers.asm index 7d8aad3e481b..abfd67313a52 100644 --- a/src/vm/arm64/asmhelpers.asm +++ b/src/vm/arm64/asmhelpers.asm @@ -692,8 +692,9 @@ NoFloatingPointRetVal ; x0 = fpRetSize - ; return value is stored before float argument registers - add x1, sp, #(__PWTB_FloatArgumentRegisters - 0x20) + ; The return value is stored before float argument registers + ; The maximum size of a return value is 0x40 (HVA of 4x16) + add x1, sp, #(__PWTB_FloatArgumentRegisters - 0x40) bl setStubReturnValue EPILOG_WITH_TRANSITION_BLOCK_RETURN diff --git a/src/vm/arm64/calldescrworkerarm64.S b/src/vm/arm64/calldescrworkerarm64.S index f987d402ddee..8e8084ba3496 100644 --- a/src/vm/arm64/calldescrworkerarm64.S +++ b/src/vm/arm64/calldescrworkerarm64.S @@ -85,7 +85,7 @@ LOCAL_LABEL(NoFloatingPoint): bne LOCAL_LABEL(NoDoubleReturn) LOCAL_LABEL(FloatReturn): - str d0, [x19, #(CallDescrData__returnValue + 0)] + str q0, [x19, #(CallDescrData__returnValue + 0)] b LOCAL_LABEL(ReturnDone) LOCAL_LABEL(NoDoubleReturn): @@ -97,6 +97,7 @@ LOCAL_LABEL(NoDoubleReturn): stp s0, s1, [x19, #(CallDescrData__returnValue + 0)] stp s2, s3, [x19, #(CallDescrData__returnValue + 0x08)] b LOCAL_LABEL(ReturnDone) + LOCAL_LABEL(NoFloatHFAReturn): //DoubleHFAReturn return case @@ -109,6 +110,16 @@ LOCAL_LABEL(NoFloatHFAReturn): LOCAL_LABEL(NoDoubleHFAReturn): + //VectorHFAReturn return case + cmp w3, #64 + bne LOCAL_LABEL(LNoVectorHFAReturn) + + stp q0, q1, [x19, #(CallDescrData__returnValue + 0)] + stp q2, q3, [x19, #(CallDescrData__returnValue + 0x20)] + b LOCAL_LABEL(ReturnDone) + +LOCAL_LABEL(LNoVectorHFAReturn): + EMIT_BREAKPOINT // Unreachable LOCAL_LABEL(IntReturn): diff --git a/src/vm/arm64/cgencpu.h b/src/vm/arm64/cgencpu.h index fd1fbafe96da..a2cac4eb7c20 100644 --- a/src/vm/arm64/cgencpu.h +++ b/src/vm/arm64/cgencpu.h @@ -51,7 +51,7 @@ extern PCODE GetPreStubEntryPoint(); #define CACHE_LINE_SIZE 64 #define LOG2SLOT LOG2_PTRSIZE -#define ENREGISTERED_RETURNTYPE_MAXSIZE 32 // bytes (four FP registers: d0,d1,d2 and d3) +#define ENREGISTERED_RETURNTYPE_MAXSIZE 64 // bytes (four vector registers: q0,q1,q2 and q3) #define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 16 // bytes (two int registers: x0 and x1) #define ENREGISTERED_PARAMTYPE_MAXSIZE 16 // bytes (max value type size that can be passed by value) diff --git a/src/vm/callhelpers.h b/src/vm/callhelpers.h index db9cfad6cb1e..d04412b68e17 100644 --- a/src/vm/callhelpers.h +++ b/src/vm/callhelpers.h @@ -39,8 +39,13 @@ struct CallDescrData // Return value // #ifdef ENREGISTERED_RETURNTYPE_MAXSIZE +#ifdef _TARGET_ARM64_ + // Use NEON128 to ensure proper alignment for vectors. + __declspec(align(16)) NEON128 returnValue[ENREGISTERED_RETURNTYPE_MAXSIZE / sizeof(NEON128)]; +#else // Use UINT64 to ensure proper alignment UINT64 returnValue[ENREGISTERED_RETURNTYPE_MAXSIZE / sizeof(UINT64)]; +#endif #else UINT64 returnValue; #endif diff --git a/src/vm/callingconvention.h b/src/vm/callingconvention.h index 7368fecac816..cb117278b511 100644 --- a/src/vm/callingconvention.h +++ b/src/vm/callingconvention.h @@ -50,8 +50,7 @@ struct ArgLocDesc #endif // UNIX_AMD64_ABI #if defined(_TARGET_ARM64_) - bool m_isSinglePrecision; // For determining if HFA is single or double - // precision + int m_hfaFieldSize; // Size of HFA field #endif // defined(_TARGET_ARM64_) #if defined(_TARGET_ARM_) @@ -76,7 +75,7 @@ struct ArgLocDesc m_fRequires64BitAlignment = FALSE; #endif #if defined(_TARGET_ARM64_) - m_isSinglePrecision = FALSE; + m_hfaFieldSize = 0; #endif // defined(_TARGET_ARM64_) #if defined(UNIX_AMD64_ABI) m_eeClass = NULL; @@ -589,10 +588,19 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE if (!m_argTypeHandle.IsNull() && m_argTypeHandle.IsHFA()) { CorElementType type = m_argTypeHandle.GetHFAType(); - bool isFloatType = (type == ELEMENT_TYPE_R4); + int hfaFieldSize = 0; + switch (type) + { + case ELEMENT_TYPE_R4: hfaFieldSize = 4; break; + case ELEMENT_TYPE_R8: hfaFieldSize = 8; break; +#ifdef _TARGET_ARM64_ + case ELEMENT_TYPE_VALUETYPE: hfaFieldSize = 16; break; +#endif + default: _ASSERTE(!"Invalid HFA Type"); + } - pLoc->m_cFloatReg = isFloatType ? GetArgSize()/sizeof(float): GetArgSize()/sizeof(double); - pLoc->m_isSinglePrecision = isFloatType; + pLoc->m_cFloatReg = GetArgSize()/hfaFieldSize; + pLoc->m_hfaFieldSize = hfaFieldSize; } else { @@ -1297,16 +1305,25 @@ int ArgIteratorTemplate::GetNextOffset() if (thValueType.IsHFA()) { CorElementType type = thValueType.GetHFAType(); - bool isFloatType = (type == ELEMENT_TYPE_R4); - cFPRegs = (type == ELEMENT_TYPE_R4)? (argSize/sizeof(float)): (argSize/sizeof(double)); + int hfaFieldSize = 0; + switch (type) + { + case ELEMENT_TYPE_R4: hfaFieldSize = 4; break; + case ELEMENT_TYPE_R8: hfaFieldSize = 8; break; +#ifdef _TARGET_ARM64_ + case ELEMENT_TYPE_VALUETYPE: hfaFieldSize = 16; break; +#endif + default: _ASSERTE(!"Invalid HFA Type"); + } + cFPRegs = argSize/hfaFieldSize; m_argLocDescForStructInRegs.Init(); m_argLocDescForStructInRegs.m_cFloatReg = cFPRegs; m_argLocDescForStructInRegs.m_idxFloatReg = m_idxFPReg; - m_argLocDescForStructInRegs.m_isSinglePrecision = isFloatType; - + m_argLocDescForStructInRegs.m_hfaFieldSize = hfaFieldSize; + m_hasArgLocDescForStructInRegs = true; } else @@ -1474,9 +1491,17 @@ void ArgIteratorTemplate::ComputeReturnFlags() { CorElementType hfaType = thValueType.GetHFAType(); - flags |= (hfaType == ELEMENT_TYPE_R4) ? - ((4 * sizeof(float)) << RETURN_FP_SIZE_SHIFT) : - ((4 * sizeof(double)) << RETURN_FP_SIZE_SHIFT); + int hfaFieldSize = 0; + switch (hfaType) + { + case ELEMENT_TYPE_R4: hfaFieldSize = 4; break; + case ELEMENT_TYPE_R8: hfaFieldSize = 8; break; +#ifdef _TARGET_ARM64_ + case ELEMENT_TYPE_VALUETYPE: hfaFieldSize = 16; break; +#endif + default: _ASSERTE(!"Invalid HFA Type"); + } + flags |= ((4 * hfaFieldSize) << RETURN_FP_SIZE_SHIFT); break; } diff --git a/src/vm/class.cpp b/src/vm/class.cpp index 1e551a9c5d50..2cce9092118c 100644 --- a/src/vm/class.cpp +++ b/src/vm/class.cpp @@ -1172,6 +1172,47 @@ bool MethodTable::IsHFA() } #endif // !FEATURE_HFA +//******************************************************************************* +int MethodTable::GetVectorSize() +{ +#if defined(_TARGET_ARM64_ ) || defined(_TARGET_XARCH) + if (IsIntrinsicType()) + { + LPCUTF8 namespaceName; + LPCUTF8 className = GetFullyQualifiedNameInfo(&namespaceName); + int vectorSize = 0; + + if (strcmp(className, "Vector`1") == 0) + { + vectorSize = GetNumInstanceFieldBytes(); + _ASSERTE(strcmp(namespaceName, "System.Numerics") == 0); + return vectorSize; + } + if (strcmp(className, "Vector128`1") == 0) + { + vectorSize = 16; + } +#ifdef _TARGET_XARCH_ + else if (strcmp(className, "Vector256`1") == 0) + { + vectorSize = 32; + } +#elif defined(_TARGET_ARM64_) + else if (strcmp(className, "Vector64`1") == 0) + { + vectorSize = 8; + } +#endif + if (vectorSize != 0) + { + _ASSERTE(strcmp(namespaceName, "System.Runtime.Intrinsics") == 0); + return vectorSize; + } + } +#endif // defined(_TARGET_ARM64_ ) || defined(_TARGET_XARCH) + return 0; +} + //******************************************************************************* CorElementType MethodTable::GetHFAType() { @@ -1191,17 +1232,28 @@ CorElementType MethodTable::GetHFAType() _ASSERTE(pMT->IsValueType()); _ASSERTE(pMT->GetNumInstanceFields() > 0); + int vectorSize = pMT->GetVectorSize(); + if (vectorSize != 0) + { + return (vectorSize == 8) ? ELEMENT_TYPE_R8 : ELEMENT_TYPE_VALUETYPE; + } + PTR_FieldDesc pFirstField = pMT->GetApproxFieldDescListRaw(); CorElementType fieldType = pFirstField->GetFieldType(); - + // All HFA fields have to be of the same type, so we can just return the type of the first field switch (fieldType) { case ELEMENT_TYPE_VALUETYPE: pMT = pFirstField->LookupApproxFieldTypeHandle().GetMethodTable(); + vectorSize = pMT->GetVectorSize(); + if (vectorSize != 0) + { + return (vectorSize == 8) ? ELEMENT_TYPE_R8 : ELEMENT_TYPE_VALUETYPE; + } break; - + case ELEMENT_TYPE_R4: case ELEMENT_TYPE_R8: return fieldType; @@ -1212,7 +1264,7 @@ CorElementType MethodTable::GetHFAType() _ASSERTE(false); return ELEMENT_TYPE_END; } - } + } } bool MethodTable::IsNativeHFA() @@ -1231,6 +1283,7 @@ CorElementType MethodTable::GetNativeHFAType() // // When FEATURE_HFA is defined, we cache the value; otherwise we recompute it with each // call. The latter is only for the armaltjit and the arm64altjit. +// bool #if defined(FEATURE_HFA) EEClass::CheckForHFA(MethodTable ** pByValueClassCache) @@ -1249,20 +1302,17 @@ EEClass::CheckForHFA() if (HasExplicitFieldOffsetLayout()) return false; - // The SIMD Intrinsic types are meant to be handled specially and should not be treated as HFA - if (GetMethodTable()->IsIntrinsicType()) + // The SIMD Intrinsic Vector types appear to have multiple fields, but need to be treated + // as an opaque type of a single vector. + if (GetMethodTable()->GetVectorSize() != 0) { - LPCUTF8 namespaceName; - LPCUTF8 className = GetMethodTable()->GetFullyQualifiedNameInfo(&namespaceName); - - if ((strcmp(className, "Vector256`1") == 0) || (strcmp(className, "Vector128`1") == 0) || - (strcmp(className, "Vector64`1") == 0)) - { - assert(strcmp(namespaceName, "System.Runtime.Intrinsics") == 0); - return false; - } +#if defined(FEATURE_HFA) + GetMethodTable()->SetIsHFA(); +#endif + return true; } + int elemSize = 0; CorElementType hfaType = ELEMENT_TYPE_END; FieldDesc *pFieldDescList = GetFieldDescList(); @@ -1273,13 +1323,43 @@ EEClass::CheckForHFA() switch (fieldType) { +#ifdef _TARGET_ARM64_ case ELEMENT_TYPE_VALUETYPE: + { + // hfa/hva types are unique by size, except for Vector64 which we can conveniently + // treat as if it were a double for ABI purposes. However, it only qualifies as + // an HVA if all fields are the same type. This will ensure that we only + // consider it an HVA if all the fields are ELEMENT_TYPE_VALUETYPE (which have been + // determined above to be vectors) of the same size. + MethodTable* pMT; +#if defined(FEATURE_HFA) + pMT = pByValueClassCache[i]; +#else + pMT = pFD->LookupApproxFieldTypeHandle().AsMethodTable(); +#endif + int thisElemSize = pMT->GetVectorSize(); + if (thisElemSize != 0) + { + if (elemSize == 0) + { + elemSize = thisElemSize; + } + else if ((thisElemSize != elemSize) || (hfaType != ELEMENT_TYPE_VALUETYPE)) + { + return false; + } + } + else + { #if defined(FEATURE_HFA) - fieldType = pByValueClassCache[i]->GetHFAType(); + fieldType = pByValueClassCache[i]->GetHFAType(); #else - fieldType = pFD->LookupApproxFieldTypeHandle().AsMethodTable()->GetHFAType(); + fieldType = pFD->LookupApproxFieldTypeHandle().AsMethodTable()->GetHFAType(); #endif + } + } break; +#endif // _TARGET_ARM64_ case ELEMENT_TYPE_R4: case ELEMENT_TYPE_R8: @@ -1308,11 +1388,26 @@ EEClass::CheckForHFA() } } - if (hfaType == ELEMENT_TYPE_END) + switch (hfaType) + { + case ELEMENT_TYPE_R4: + elemSize = 4; + break; + case ELEMENT_TYPE_R8: + elemSize = 8; +#ifdef _TARGET_ARM64_ + case ELEMENT_TYPE_VALUETYPE: + // Should already have set elemSize, but be conservative + if (elemSize == 0) + { + return false; + } + break; +#endif + default: + // ELEMENT_TYPE_END return false; - - int elemSize = (hfaType == ELEMENT_TYPE_R8) ? sizeof(double) : sizeof(float); - + } // Note that we check the total size, but do not perform any checks on number of fields: // - Type of fields can be HFA valuetype itself // - Managed C++ HFA valuetypes have just one of type float to signal that @@ -1327,7 +1422,7 @@ EEClass::CheckForHFA() if (totalSize / elemSize > 4) return false; - // All the above tests passed. It's HFA! + // All the above tests passed. It's HFA(/HVA)! #if defined(FEATURE_HFA) GetMethodTable()->SetIsHFA(); #endif @@ -1399,7 +1494,16 @@ CorElementType EEClassLayoutInfo::GetNativeHFATypeRaw() if (hfaType == ELEMENT_TYPE_END) return ELEMENT_TYPE_END; - int elemSize = (hfaType == ELEMENT_TYPE_R8) ? sizeof(double) : sizeof(float); + int elemSize = 1; + switch (hfaType) + { + case ELEMENT_TYPE_R4: elemSize = sizeof(float); break; + case ELEMENT_TYPE_R8: elemSize = sizeof(double); break; +#ifdef _TARGET_ARM64_ + case ELEMENT_TYPE_VALUETYPE: elemSize = 16; break; +#endif + default: _ASSERTE(!"Invalid HFA Type"); + } // Note that we check the total size, but do not perform any checks on number of fields: // - Type of fields can be HFA valuetype itself diff --git a/src/vm/class.h b/src/vm/class.h index 2853aee330e2..fae62795a8b7 100644 --- a/src/vm/class.h +++ b/src/vm/class.h @@ -414,8 +414,11 @@ class EEClassLayoutInfo #endif // UNIX_AMD64_ABI #ifdef FEATURE_HFA // HFA type of the unmanaged layout + // Note that these are not flags, they are discrete values. e_R4_HFA = 0x10, e_R8_HFA = 0x20, + e_16_HFA = 0x30, + e_HFATypeFlags = 0x30, #endif }; @@ -526,15 +529,19 @@ class EEClassLayoutInfo bool IsNativeHFA() { LIMITED_METHOD_CONTRACT; - return (m_bFlags & (e_R4_HFA | e_R8_HFA)) != 0; + return (m_bFlags & e_HFATypeFlags) != 0; } CorElementType GetNativeHFAType() { LIMITED_METHOD_CONTRACT; - if (IsNativeHFA()) - return (m_bFlags & e_R4_HFA) ? ELEMENT_TYPE_R4 : ELEMENT_TYPE_R8; - return ELEMENT_TYPE_END; + switch (m_bFlags & e_HFATypeFlags) + { + case e_R4_HFA: return ELEMENT_TYPE_R4; + case e_R8_HFA: return ELEMENT_TYPE_R8; + case e_16_HFA: return ELEMENT_TYPE_VALUETYPE; + default: return ELEMENT_TYPE_END; + } } #else // !FEATURE_HFA bool IsNativeHFA() @@ -580,7 +587,13 @@ class EEClassLayoutInfo void SetNativeHFAType(CorElementType hfaType) { LIMITED_METHOD_CONTRACT; - m_bFlags |= (hfaType == ELEMENT_TYPE_R4) ? e_R4_HFA : e_R8_HFA; + switch (hfaType) + { + case ELEMENT_TYPE_R4: m_bFlags |= e_R4_HFA; break; + case ELEMENT_TYPE_R8: m_bFlags |= e_R8_HFA; break; + case ELEMENT_TYPE_VALUETYPE: m_bFlags |= e_16_HFA; break; + default: _ASSERTE(!"Invalid HFA Type"); + } } #endif #ifdef UNIX_AMD64_ABI diff --git a/src/vm/methodtable.h b/src/vm/methodtable.h index 74febebc39bc..154efa2ee4aa 100644 --- a/src/vm/methodtable.h +++ b/src/vm/methodtable.h @@ -1929,6 +1929,9 @@ class MethodTable bool IsHFA(); #endif // FEATURE_HFA + // Returns the size in bytes of this type if it is a HW vector type; 0 otherwise. + int GetVectorSize(); + // Get the HFA type. This is supported both with FEATURE_HFA, in which case it // depends on the cached bit on the class, or without, in which case it is recomputed // for each invocation.