From 22909c00920365721d8a173fcb33ff4a57e3e999 Mon Sep 17 00:00:00 2001
From: Carol Eidt <carol.eidt@microsoft.com>
Date: Fri, 29 Mar 2019 09:06:53 -0700
Subject: [PATCH] [WIP] Support for Arm64 Vector ABI

Extend HFA support to support vectors as well as floating point types.

Fix #16022
---
 src/jit/codegenarm64.cpp              |   4 +-
 src/jit/codegenarmarch.cpp            |   8 +-
 src/jit/codegencommon.cpp             |  27 +++-
 src/jit/compiler.cpp                  | 187 +++++++++++---------------
 src/jit/compiler.h                    | 170 ++++++++++++-----------
 src/jit/compiler.hpp                  |   6 +-
 src/jit/dll/jit.nativeproj            |   2 +-
 src/jit/flowgraph.cpp                 |   2 +-
 src/jit/gentree.cpp                   |  10 +-
 src/jit/gentree.h                     |   3 +
 src/jit/importer.cpp                  |  10 +-
 src/jit/lclvars.cpp                   |  44 +++---
 src/jit/lower.cpp                     |   3 +-
 src/jit/lsraarmarch.cpp               |   2 +-
 src/jit/lsrabuild.cpp                 |  14 +-
 src/jit/morph.cpp                     | 168 ++++++++++++++---------
 src/jit/register_arg_convention.h     |   4 +-
 src/jit/simd.cpp                      |   2 +-
 src/jit/target.h                      |  14 +-
 src/jit/vartype.h                     | 124 ++++++++++++++++-
 src/vm/argdestination.h               |  14 +-
 src/vm/arm64/CallDescrWorkerARM64.asm |  12 +-
 src/vm/arm64/asmconstants.h           |   2 +-
 src/vm/arm64/asmhelpers.asm           |   5 +-
 src/vm/arm64/calldescrworkerarm64.S   |  13 +-
 src/vm/arm64/cgencpu.h                |   2 +-
 src/vm/callhelpers.h                  |   5 +
 src/vm/callingconvention.h            |  51 +++++--
 src/vm/class.cpp                      | 148 +++++++++++++++++---
 src/vm/class.h                        |  23 +++-
 src/vm/methodtable.h                  |   3 +
 31 files changed, 722 insertions(+), 360 deletions(-)

diff --git a/src/jit/codegenarm64.cpp b/src/jit/codegenarm64.cpp
index 9cfaccef0851..734e7ea5be0d 100644
--- a/src/jit/codegenarm64.cpp
+++ b/src/jit/codegenarm64.cpp
@@ -2021,10 +2021,10 @@ void CodeGen::genSimpleReturn(GenTree* treeNode)
     GenTree*  op1        = treeNode->gtGetOp1();
     var_types targetType = treeNode->TypeGet();
 
-    assert(!isStructReturn(treeNode));
+    assert(targetType != TYP_STRUCT);
     assert(targetType != TYP_VOID);
 
-    regNumber retReg = varTypeIsFloating(treeNode) ? REG_FLOATRET : REG_INTRET;
+    regNumber retReg = varTypeUsesFloatArgReg(treeNode) ? REG_FLOATRET : REG_INTRET;
 
     bool movRequired = (op1->gtRegNum != retReg);
 
diff --git a/src/jit/codegenarmarch.cpp b/src/jit/codegenarmarch.cpp
index cdb818b93422..3ed47a9d9dae 100644
--- a/src/jit/codegenarmarch.cpp
+++ b/src/jit/codegenarmarch.cpp
@@ -2355,7 +2355,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
     }
     else
     {
-        assert(!varTypeIsStruct(call));
+        assert(call->gtType != TYP_STRUCT);
 
         if (call->gtType == TYP_REF)
         {
@@ -2509,9 +2509,13 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
                 // TCB in REG_PINVOKE_TCB. fgMorphCall() sets the correct argument registers.
                 returnReg = REG_PINVOKE_TCB;
             }
+            else if (compiler->opts.compUseSoftFP)
+            {
+                returnReg = REG_INTRET;
+            }
             else
 #endif // _TARGET_ARM_
-                if (varTypeIsFloating(returnType) && !compiler->opts.compUseSoftFP)
+                if (varTypeUsesFloatArgReg(returnType))
             {
                 returnReg = REG_FLOATRET;
             }
diff --git a/src/jit/codegencommon.cpp b/src/jit/codegencommon.cpp
index 9946ff33a6db..fb3f0c13dd99 100644
--- a/src/jit/codegencommon.cpp
+++ b/src/jit/codegencommon.cpp
@@ -10160,7 +10160,11 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
     structPassingKind howToReturnStruct;
     var_types         returnType = getReturnTypeForStruct(hClass, &howToReturnStruct);
 
+#ifdef _TARGET_ARM64_
+    return (varTypeIsStruct(returnType) && (howToReturnStruct != SPK_PrimitiveType));
+#else
     return (varTypeIsStruct(returnType));
+#endif
 }
 
 //----------------------------------------------
@@ -10169,11 +10173,7 @@ bool Compiler::IsMultiRegReturnedType(CORINFO_CLASS_HANDLE hClass)
 
 bool Compiler::IsHfa(CORINFO_CLASS_HANDLE hClass)
 {
-#ifdef FEATURE_HFA
-    return varTypeIsFloating(GetHfaType(hClass));
-#else
-    return false;
-#endif
+    return varTypeIsValidHfaType(GetHfaType(hClass));
 }
 
 bool Compiler::IsHfa(GenTree* tree)
@@ -10206,7 +10206,20 @@ var_types Compiler::GetHfaType(CORINFO_CLASS_HANDLE hClass)
     {
 #ifdef FEATURE_HFA
         CorInfoType corType = info.compCompHnd->getHFAType(hClass);
-        if (corType != CORINFO_TYPE_UNDEF)
+#ifdef _TARGET_ARM64_
+        if (corType == CORINFO_TYPE_VALUECLASS)
+        {
+            // This is a vector type.
+            // HVAs are only supported on ARM64, and only for sizes of 8 or 16 bytes.
+            // For 8-byte vectors corType will be returned as CORINFO_TYPE_DOUBLE.
+            result = TYP_SIMD16;
+            JITDUMP("Found an HVA of SIMD16\n");
+            // This type may not appear elsewhere, but it will occupy a floating point register.
+            compFloatingPointUsed = true;
+        }
+        else
+#endif // _TARGET_ARM64_
+            if (corType != CORINFO_TYPE_UNDEF)
         {
             result = JITtype2varType(corType);
         }
@@ -11515,7 +11528,7 @@ void CodeGen::genReturn(GenTree* treeNode)
     else
 #endif // _TARGET_X86_ || _TARGET_ARM_
     {
-        if (isStructReturn(treeNode))
+        if (targetType == TYP_STRUCT)
         {
             genStructReturn(treeNode);
         }
diff --git a/src/jit/compiler.cpp b/src/jit/compiler.cpp
index e005a77c2900..1cc2f600ed16 100644
--- a/src/jit/compiler.cpp
+++ b/src/jit/compiler.cpp
@@ -573,8 +573,8 @@ bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd)
 //     of size 'structSize'.
 //     We examine 'clsHnd' to check the GC layout of the struct and
 //     return TYP_REF for structs that simply wrap an object.
-//     If the struct is a one element HFA, we will return the
-//     proper floating point type.
+//     If the struct is a one element HFA/HVA, we will return the
+//     proper floating point or vector type.
 //
 // Arguments:
 //    structSize - the size of the struct type, cannot be zero
@@ -592,13 +592,64 @@ bool Compiler::isSingleFloat32Struct(CORINFO_CLASS_HANDLE clsHnd)
 //    same way as any other 8-byte struct
 //    For ARM32 if we have an HFA struct that wraps a 64-bit double
 //    we will return TYP_DOUBLE.
+//    For vector calling conventions, a vector is considered a "primitive"
+//    type, as it is passed in a single register.
 //
 var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS_HANDLE clsHnd, bool isVarArg)
 {
     assert(structSize != 0);
 
-    var_types useType;
+    var_types useType = TYP_UNKNOWN;
 
+// Start by determining if we have an HFA/HVA with a single element.
+#ifdef FEATURE_HFA
+#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
+    // Arm64 Windows VarArg methods arguments will not classify HFA types, they will need to be treated
+    // as if they are not HFA types.
+    if (!isVarArg)
+#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
+    {
+        switch (structSize)
+        {
+            case 4:
+            case 8:
+#ifdef _TARGET_ARM64_
+            case 16:
+#endif // _TARGET_ARM64_
+            {
+                var_types hfaType;
+#ifdef ARM_SOFTFP
+                // For ARM_SOFTFP, HFA is unsupported so we need to check in another way.
+                // This matters only for size-4 struct because bigger structs would be processed with RetBuf.
+                if (isSingleFloat32Struct(clsHnd))
+                {
+                    hfaType = TYP_FLOAT;
+                }
+#else  // !ARM_SOFTFP
+                hfaType = GetHfaType(clsHnd);
+#endif // ARM_SOFTFP
+                // We're only interested in the case where the struct size is equal to the size of the hfaType.
+                if (varTypeIsValidHfaType(hfaType))
+                {
+                    if (genTypeSize(hfaType) == structSize)
+                    {
+                        useType = hfaType;
+                    }
+                    else
+                    {
+                        return TYP_UNKNOWN;
+                    }
+                }
+            }
+        }
+        if (useType != TYP_UNKNOWN)
+        {
+            return useType;
+        }
+    }
+#endif // FEATURE_HFA
+
+    // Now deal with non-HFA/HVA structs.
     switch (structSize)
     {
         case 1:
@@ -618,15 +669,8 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
 
 #ifdef _TARGET_64BIT_
         case 4:
-            if (IsHfa(clsHnd))
-            {
-                // A structSize of 4 with IsHfa, it must be an HFA of one float
-                useType = TYP_FLOAT;
-            }
-            else
-            {
-                useType = TYP_INT;
-            }
+            // We dealt with the one-float HFA above. All other 4-byte structs are handled as INT.
+            useType = TYP_INT;
             break;
 
 #if !defined(_TARGET_XARCH_) || defined(UNIX_AMD64_ABI)
@@ -640,86 +684,13 @@ var_types Compiler::getPrimitiveTypeForStruct(unsigned structSize, CORINFO_CLASS
 #endif // _TARGET_64BIT_
 
         case TARGET_POINTER_SIZE:
-#ifdef ARM_SOFTFP
-            // For ARM_SOFTFP, HFA is unsupported so we need to check in another way
-            // This matters only for size-4 struct cause bigger structs would be processed with RetBuf
-            if (isSingleFloat32Struct(clsHnd))
-#else // !ARM_SOFTFP
-            if (IsHfa(clsHnd)
-#if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
-                // Arm64 Windows VarArg methods arguments will not
-                // classify HFA types, they will need to be treated
-                // as if they are not HFA types.
-                && !isVarArg
-#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
-                )
-#endif // ARM_SOFTFP
-            {
-#ifdef _TARGET_64BIT_
-                var_types hfaType = GetHfaType(clsHnd);
-
-                // A structSize of 8 with IsHfa, we have two possiblities:
-                // An HFA of one double or an HFA of two floats
-                //
-                // Check and exclude the case of an HFA of two floats
-                if (hfaType == TYP_DOUBLE)
-                {
-                    // We have an HFA of one double
-                    useType = TYP_DOUBLE;
-                }
-                else
-                {
-                    assert(hfaType == TYP_FLOAT);
-
-                    // We have an HFA of two floats
-                    // This should be passed or returned in two FP registers
-                    useType = TYP_UNKNOWN;
-                }
-#else  // a 32BIT target
-                // A structSize of 4 with IsHfa, it must be an HFA of one float
-                useType = TYP_FLOAT;
-#endif // _TARGET_64BIT_
-            }
-            else
-            {
-                BYTE gcPtr = 0;
-                // Check if this pointer-sized struct is wrapping a GC object
-                info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
-                useType = getJitGCType(gcPtr);
-            }
-            break;
-
-#ifdef _TARGET_ARM_
-        case 8:
-            if (IsHfa(clsHnd))
-            {
-                var_types hfaType = GetHfaType(clsHnd);
-
-                // A structSize of 8 with IsHfa, we have two possiblities:
-                // An HFA of one double or an HFA of two floats
-                //
-                // Check and exclude the case of an HFA of two floats
-                if (hfaType == TYP_DOUBLE)
-                {
-                    // We have an HFA of one double
-                    useType = TYP_DOUBLE;
-                }
-                else
-                {
-                    assert(hfaType == TYP_FLOAT);
-
-                    // We have an HFA of two floats
-                    // This should be passed or returned in two FP registers
-                    useType = TYP_UNKNOWN;
-                }
-            }
-            else
-            {
-                // We don't have an HFA
-                useType = TYP_UNKNOWN;
-            }
-            break;
-#endif // _TARGET_ARM_
+        {
+            BYTE gcPtr = 0;
+            // Check if this pointer-sized struct is wrapping a GC object
+            info.compCompHnd->getClassGClayout(clsHnd, &gcPtr);
+            useType = getJitGCType(gcPtr);
+        }
+        break;
 
         default:
             useType = TYP_UNKNOWN;
@@ -802,11 +773,11 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
     else
 #endif // UNIX_AMD64_ABI
 
-        // The largest primitive type is 8 bytes (TYP_DOUBLE)
+        // The largest arg passed in a single register is MAX_PASS_SINGLEREG_BYTES,
         // so we can skip calling getPrimitiveTypeForStruct when we
         // have a struct that is larger than that.
         //
-        if (structSize <= sizeof(double))
+        if (structSize <= MAX_PASS_SINGLEREG_BYTES)
     {
         // We set the "primitive" useType based upon the structSize
         // and also examine the clsHnd to see if it is an HFA of count one
@@ -829,14 +800,21 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
         //
         if (structSize <= MAX_PASS_MULTIREG_BYTES)
         {
-            // Structs that are HFA's are passed by value in multiple registers
-            if (IsHfa(clsHnd)
+            // Structs that are HFA's are passed by value in multiple registers.
+            // Arm64 Windows VarArg methods arguments will not classify HFA types, they will need to be treated
+            // as if they are not HFA types.
+            var_types hfaType;
 #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
-                && !isVarArg // Arm64 Windows VarArg methods arguments will not
-                             // classify HFA types, they will need to be treated
-                             // as if they are not HFA types.
-#endif                       // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
-                )
+            if (isVarArg)
+            {
+                hfaType = TYP_UNDEF;
+            }
+            else
+#endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
+            {
+                hfaType = GetHfaType(clsHnd);
+            }
+            if (varTypeIsValidHfaType(hfaType))
             {
                 // HFA's of count one should have been handled by getPrimitiveTypeForStruct
                 assert(GetHfaCount(clsHnd) >= 2);
@@ -851,7 +829,6 @@ var_types Compiler::getArgTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
             {
 
 #ifdef UNIX_AMD64_ABI
-
                 // The case of (structDesc.eightByteCount == 1) should have already been handled
                 if ((structDesc.eightByteCount > 1) || !structDesc.passedInRegisters)
                 {
@@ -1035,10 +1012,10 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
     // Check for cases where a small struct is returned in a register
     // via a primitive type.
     //
-    // The largest primitive type is 8 bytes (TYP_DOUBLE)
+    // The largest "primitive type" is MAX_PASS_SINGLEREG_BYTES
     // so we can skip calling getPrimitiveTypeForStruct when we
     // have a struct that is larger than that.
-    if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= sizeof(double)))
+    if (canReturnInRegister && (useType == TYP_UNKNOWN) && (structSize <= MAX_PASS_SINGLEREG_BYTES))
     {
         // We set the "primitive" useType based upon the structSize
         // and also examine the clsHnd to see if it is an HFA of count one
@@ -1070,7 +1047,7 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE clsHnd,
     // because when HFA are enabled, normally we would use two FP registers to pass or return it
     //
     // But if we don't have support for multiple register return types, we have to change this.
-    // Since we what we have an 8-byte struct (float + float)  we change useType to TYP_I_IMPL
+    // Since what we have is an 8-byte struct (float + float)  we change useType to TYP_I_IMPL
     // so that the struct is returned instead using an 8-byte integer register.
     //
     if ((FEATURE_MULTIREG_RET == 0) && (useType == TYP_UNKNOWN) && (structSize == (2 * sizeof(float))) && IsHfa(clsHnd))
diff --git a/src/jit/compiler.h b/src/jit/compiler.h
index 400e61b1f3d2..1546048983b8 100644
--- a/src/jit/compiler.h
+++ b/src/jit/compiler.h
@@ -595,11 +595,9 @@ class LclVarDsc
     unsigned char lvIsMultiRegRet : 1; // true if this is a multireg LclVar struct assigned from a multireg call
 
 #ifdef FEATURE_HFA
-    unsigned char _lvIsHfa : 1;          // Is this a struct variable who's class handle is an HFA type
-    unsigned char _lvIsHfaRegArg : 1;    // Is this a HFA argument variable?    // TODO-CLEANUP: Remove this and replace
-                                         // with (lvIsRegArg && lvIsHfa())
-    unsigned char _lvHfaTypeIsFloat : 1; // Is the HFA type float or double?
-#endif                                   // FEATURE_HFA
+    unsigned char _lvIsHfa : 1;      // Is this a struct variable who's class handle is an HFA type
+    unsigned char _lvHfaLogSize : 2; // The base-2 log of the HFA element size in 4-byte units (e.g. 0 for TYP_FLOAT)
+#endif                               // FEATURE_HFA
 
 #ifdef DEBUG
     // TODO-Cleanup: See the note on lvSize() - this flag is only in use by asserts that are checking for struct
@@ -682,54 +680,32 @@ class LclVarDsc
     bool lvIsHfaRegArg() const
     {
 #ifdef FEATURE_HFA
-        return _lvIsHfaRegArg;
+        return lvIsRegArg && _lvIsHfa;
 #else
         return false;
 #endif
     }
 
-    void lvSetIsHfaRegArg(bool value = true)
-    {
-#ifdef FEATURE_HFA
-        _lvIsHfaRegArg = value;
-#endif
-    }
-
-    bool lvHfaTypeIsFloat() const
-    {
-#ifdef FEATURE_HFA
-        return _lvHfaTypeIsFloat;
-#else
-        return false;
-#endif
-    }
-
-    void lvSetHfaTypeIsFloat(bool value)
-    {
-#ifdef FEATURE_HFA
-        _lvHfaTypeIsFloat = value;
-#endif
-    }
-
-    // on Arm64 - Returns 1-4 indicating the number of register slots used by the HFA
-    // on Arm32 - Returns the total number of single FP register slots used by the HFA, max is 8
+    //------------------------------------------------------------------------------
+    // lvHfaSlots: Get the number of slots used by an HFA local
+    //
+    // Return Value:
+    //    On Arm64 - Returns 1-4 indicating the number of register slots used by the HFA
+    //    On Arm32 - Returns the total number of single FP register slots used by the HFA, max is 8
     //
     unsigned lvHfaSlots() const
     {
         assert(lvIsHfa());
         assert(varTypeIsStruct(lvType));
+        unsigned slots = 0;
 #ifdef _TARGET_ARM_
-        return lvExactSize / sizeof(float);
-#else  //  _TARGET_ARM64_
-        if (lvHfaTypeIsFloat())
-        {
-            return lvExactSize / sizeof(float);
-        }
-        else
-        {
-            return lvExactSize / sizeof(double);
-        }
+        slots = lvExactSize / sizeof(float);
+        assert(slots <= 8);
+#elif defined(_TARGET_ARM64_)
+        return lvExactSize >> (2 + _lvHfaLogSize);
+        assert(slots <= 4);
 #endif //  _TARGET_ARM64_
+        return slots;
     }
 
     // lvIsMultiRegArgOrRet()
@@ -750,7 +726,7 @@ class LclVarDsc
     regNumberSmall _lvOtherReg; // Used for "upper half" of long var.
 #endif                          // !defined(_TARGET_64BIT_)
 
-    regNumberSmall _lvArgReg; // The register in which this argument is passed.
+    regNumberSmall _lvArgReg; // The (first) register in which this argument is passed.
 
 #if FEATURE_MULTIREG_ARGS
     regNumberSmall _lvOtherArgReg; // Used for the second part of the struct passed in a register.
@@ -1030,14 +1006,22 @@ class LclVarDsc
     {
         return isFloatRegType(lvType) || lvIsHfaRegArg();
     }
+
     var_types GetHfaType() const
     {
-        return lvIsHfa() ? (lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE) : TYP_UNDEF;
+#ifdef FEATURE_HFA
+        assert(lvIsHfa());
+        return hfaTypeForLogSize(_lvHfaLogSize);
+#endif // FEATURE_HFA
+        return TYP_UNDEF;
     }
+
     void SetHfaType(var_types type)
     {
-        assert(varTypeIsFloating(type));
-        lvSetHfaTypeIsFloat(type == TYP_FLOAT);
+#ifdef FEATURE_HFA
+        assert(lvIsHfa());
+        _lvHfaLogSize = logSizeForHfaType(type);
+#endif // FEATURE_HFA
     }
 
     var_types lvaArgType();
@@ -1487,8 +1471,8 @@ struct fgArgTabEntry
     bool _isSplit : 1; // True when this argument is split between the registers and OutArg area
 #endif                 // FEATURE_ARG_SPLIT
 #ifdef FEATURE_HFA
-    bool _isHfaArg : 1;    // True when the argument is an HFA type.
-    bool _isDoubleHfa : 1; // True when the argument is an HFA, with an element type of DOUBLE.
+    bool          _isHfaArg : 1;   // True when the argument is an HFA type.
+    unsigned char _hfaLogSize : 2; // The base 2 log of the HFA element size in 4-byte units (e.g. 0 for TYP_FLOAT)
 #endif
 
     bool isLateArg()
@@ -1589,10 +1573,12 @@ struct fgArgTabEntry
     var_types getHfaType()
     {
 #ifdef FEATURE_HFA
-        return _isHfaArg ? (_isDoubleHfa ? TYP_DOUBLE : TYP_FLOAT) : TYP_UNDEF;
-#else
+        if (_isHfaArg)
+        {
+            return hfaTypeForLogSize(_hfaLogSize);
+        }
+#endif // FEATURE_HFA
         return TYP_UNDEF;
-#endif
     }
 
     void setHfaType(var_types type, unsigned hfaSlots)
@@ -1607,29 +1593,35 @@ struct fgArgTabEntry
             // Note that hfaSlots is the number of registers we will use. For ARM, that is twice
             // the number of "double registers".
             unsigned numHfaRegs = hfaSlots;
-            if (isPassedInRegisters())
-            {
 #ifdef _TARGET_ARM_
-                if (type == TYP_DOUBLE)
-                {
-                    // Must be an even number of registers.
-                    assert((numRegs & 1) == 0);
-                    numHfaRegs = hfaSlots / 2;
-                }
+            if (type == TYP_DOUBLE)
+            {
+                // Must be an even number of registers.
+                assert((numRegs & 1) == 0);
+                numHfaRegs = hfaSlots / 2;
+            }
 #endif // _TARGET_ARM_
-                if (_isHfaArg)
+
+            if (!_isHfaArg)
+            {
+                // We haven't previously set this; do so now.
+                unsigned logSize = logSizeForHfaType(type);
+                if (isPassedInRegisters())
                 {
-                    // This should already be set correctly.
-                    assert(numRegs == numHfaRegs);
-                    assert(_isDoubleHfa == (type == TYP_DOUBLE));
+                    numRegs = numHfaRegs;
                 }
-                else
+                _isHfaArg   = true;
+                _hfaLogSize = logSize;
+            }
+            else
+            {
+                // We've already set this; ensure that it's consistent.
+                if (isPassedInRegisters())
                 {
-                    numRegs = numHfaRegs;
+                    assert(numRegs == numHfaRegs);
                 }
+                assert(_hfaLogSize == logSizeForHfaType(type));
             }
-            _isDoubleHfa = (type == TYP_DOUBLE);
-            _isHfaArg    = true;
         }
 #endif // FEATURE_HFA
     }
@@ -1701,22 +1693,30 @@ struct fgArgTabEntry
     {
         unsigned size = getSlotCount();
 #ifdef FEATURE_HFA
-#ifdef _TARGET_ARM_
-        // We counted the number of regs, but if they are DOUBLE hfa regs we have to double the size.
-        if (isHfaRegArg && (hfaType == TYP_DOUBLE))
+        if (isHfaRegArg)
         {
-            assert(!isSplit);
-            size <<= 1;
-        }
+#ifdef _TARGET_ARM_
+            // We counted the number of regs, but if they are DOUBLE hfa regs we have to double the size.
+            if (hfaType == TYP_DOUBLE)
+            {
+                assert(!isSplit);
+                size <<= 1;
+            }
 #elif defined(_TARGET_ARM64_)
-        // We counted the number of regs, but if they are FLOAT hfa regs we have to halve the size.
-        if (isHfaRegArg && (hfaType == TYP_FLOAT))
-        {
-            // Round up in case of odd HFA count.
-            size = (size + 1) >> 1;
-        }
+            // We counted the number of regs, but if they are FLOAT hfa regs we have to halve the size,
+            // or if they are SIMD16 vector hfa regs we have to double the size.
+            if (hfaType == TYP_FLOAT)
+            {
+                // Round up in case of odd HFA count.
+                size = (size + 1) >> 1;
+            }
+            else if (hfaType == TYP_SIMD16)
+            {
+                size <<= 1;
+            }
 #endif // _TARGET_ARM64_
-#endif
+        }
+#endif // FEATURE_HFA
         return size;
     }
 
@@ -7612,6 +7612,18 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     // Should we support SIMD intrinsics?
     bool featureSIMD;
 
+    // Should we recognize SIMD types?
+    // We always do this on ARM64 to support HVA types.
+    __declspec(property(get = GetSupportSIMDTypes)) bool supportSIMDTypes;
+    bool GetSupportSIMDTypes()
+    {
+#ifdef _TARGET_ARM64_
+        return true;
+#else
+        return featureSIMD;
+#endif
+    }
+
     // Have we identified any SIMD types?
     // This is currently used by struct promotion to avoid getting type information for a struct
     // field to see if it is a SIMD type, if we haven't seen any SIMD types or operations in
diff --git a/src/jit/compiler.hpp b/src/jit/compiler.hpp
index 901a58ec9e35..0b30114f6768 100644
--- a/src/jit/compiler.hpp
+++ b/src/jit/compiler.hpp
@@ -2919,7 +2919,7 @@ inline regNumber genMapFloatRegArgNumToRegNum(unsigned argNum)
 
 __forceinline regNumber genMapRegArgNumToRegNum(unsigned argNum, var_types type)
 {
-    if (varTypeIsFloating(type))
+    if (varTypeUsesFloatArgReg(type))
     {
         return genMapFloatRegArgNumToRegNum(argNum);
     }
@@ -2957,7 +2957,7 @@ inline regMaskTP genMapFloatRegArgNumToRegMask(unsigned argNum)
 __forceinline regMaskTP genMapArgNumToRegMask(unsigned argNum, var_types type)
 {
     regMaskTP result;
-    if (varTypeIsFloating(type))
+    if (varTypeUsesFloatArgReg(type))
     {
         result = genMapFloatRegArgNumToRegMask(argNum);
 #ifdef _TARGET_ARM_
@@ -3076,7 +3076,7 @@ inline unsigned genMapFloatRegNumToRegArgNum(regNumber regNum)
 
 inline unsigned genMapRegNumToRegArgNum(regNumber regNum, var_types type)
 {
-    if (varTypeIsFloating(type))
+    if (varTypeUsesFloatArgReg(type))
     {
         return genMapFloatRegNumToRegArgNum(regNum);
     }
diff --git a/src/jit/dll/jit.nativeproj b/src/jit/dll/jit.nativeproj
index 7b8dacb78c52..56e94be9cac8 100644
--- a/src/jit/dll/jit.nativeproj
+++ b/src/jit/dll/jit.nativeproj
@@ -31,7 +31,7 @@
 
     <LinkModuleDefinitionFile>$(OutputName).def</LinkModuleDefinitionFile>
 
-    <ClDefines Condition="'$(BuildArchitecture)' == 'amd64'">$(ClDefines);FEATURE_SIMD</ClDefines>
+    <ClDefines Condition="'$(BuildArchitecture)' == 'amd64' or '$(BuildArchitecture)' == 'arm64'">$(ClDefines);FEATURE_SIMD</ClDefines>
 
     <Win32DllLibs>$(SdkLibPath)\kernel32.lib;$(SdkLibPath)\user32.lib;$(SdkLibPath)\advapi32.lib;$(SdkLibPath)\oleaut32.lib;$(SdkLibPath)\uuid.lib</Win32DllLibs>
     <Win32DllLibs>$(Win32DllLibs);$(ClrLibPath)\utilcode.lib</Win32DllLibs>
diff --git a/src/jit/flowgraph.cpp b/src/jit/flowgraph.cpp
index 3075cf3bc12d..7eebedd04e1f 100644
--- a/src/jit/flowgraph.cpp
+++ b/src/jit/flowgraph.cpp
@@ -23258,7 +23258,7 @@ GenTreeStmt* Compiler::fgInlinePrependStatements(InlineInfo* inlineInfo)
                     if (varTypeIsStruct(argType))
                     {
                         structHnd = gtGetStructHandleIfPresent(argNode);
-                        noway_assert(structHnd != NO_CLASS_HANDLE);
+                        noway_assert((structHnd != NO_CLASS_HANDLE) || (argType != TYP_STRUCT));
                     }
 
                     // Unsafe value cls check is not needed for
diff --git a/src/jit/gentree.cpp b/src/jit/gentree.cpp
index 833d115cb2cc..5fa34833e0ba 100644
--- a/src/jit/gentree.cpp
+++ b/src/jit/gentree.cpp
@@ -17653,7 +17653,8 @@ GenTreeSIMD* Compiler::gtNewSIMDNode(
     assert(op1 != nullptr);
     SetOpLclRelatedToSIMDIntrinsic(op1);
 
-    return new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, baseType, size);
+    GenTreeSIMD* simdNode = new (this, GT_SIMD) GenTreeSIMD(type, op1, simdIntrinsicID, baseType, size);
+    return simdNode;
 }
 
 GenTreeSIMD* Compiler::gtNewSIMDNode(
@@ -17663,7 +17664,8 @@ GenTreeSIMD* Compiler::gtNewSIMDNode(
     SetOpLclRelatedToSIMDIntrinsic(op1);
     SetOpLclRelatedToSIMDIntrinsic(op2);
 
-    return new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size);
+    GenTreeSIMD* simdNode = new (this, GT_SIMD) GenTreeSIMD(type, op1, op2, simdIntrinsicID, baseType, size);
+    return simdNode;
 }
 
 //-------------------------------------------------------------------
@@ -18035,7 +18037,7 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HA
         case Compiler::SPK_PrimitiveType:
         {
             assert(returnType != TYP_UNKNOWN);
-            assert(!varTypeIsStruct(returnType));
+            assert(returnType != TYP_STRUCT);
             m_regType[0] = returnType;
             break;
         }
@@ -18046,7 +18048,7 @@ void ReturnTypeDesc::InitializeStructReturnType(Compiler* comp, CORINFO_CLASS_HA
             var_types hfaType = comp->GetHfaType(retClsHnd);
 
             // We should have an hfa struct type
-            assert(varTypeIsFloating(hfaType));
+            assert(varTypeIsValidHfaType(hfaType));
 
             // Note that the retail build issues a warning about a potential divsion by zero without this Max function
             unsigned elemSize = Max((unsigned)1, EA_SIZE_IN_BYTES(emitActualTypeSize(hfaType)));
diff --git a/src/jit/gentree.h b/src/jit/gentree.h
index 5d45427a395b..b294748b67c8 100644
--- a/src/jit/gentree.h
+++ b/src/jit/gentree.h
@@ -3532,6 +3532,9 @@ struct GenTreeCall final : public GenTree
         return varTypeIsLong(gtType);
 #elif FEATURE_MULTIREG_RET && defined(_TARGET_ARM_)
         return varTypeIsLong(gtType) || (varTypeIsStruct(gtType) && !HasRetBufArg());
+#elif defined(FEATURE_HFA) && defined(_TARGET_ARM64_)
+        // SIMD types are returned in vector regs on ARM64.
+        return (gtType == TYP_STRUCT) && !HasRetBufArg();
 #elif FEATURE_MULTIREG_RET
         return varTypeIsStruct(gtType) && !HasRetBufArg();
 #else
diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp
index ce5c4740c7d3..5a175fd966e3 100644
--- a/src/jit/importer.cpp
+++ b/src/jit/importer.cpp
@@ -1217,7 +1217,7 @@ GenTree* Compiler::impAssignStructPtr(GenTree*             destAddr,
                 // If it is a multi-reg struct return, don't change the oper to GT_LCL_FLD.
                 // That is, the IR will be of the form lclVar = call for multi-reg return
                 //
-                GenTree* lcl = destAddr->gtOp.gtOp1;
+                GenTreeLclVar* lcl = destAddr->gtOp.gtOp1->AsLclVar();
                 if (src->AsCall()->HasMultiRegRetVal())
                 {
                     // Mark the struct LclVar as used in a MultiReg return context
@@ -1227,7 +1227,7 @@ GenTree* Compiler::impAssignStructPtr(GenTree*             destAddr,
                     lcl->gtFlags |= GTF_DONT_CSE;
                     lvaTable[lcl->gtLclVarCommon.gtLclNum].lvIsMultiRegRet = true;
                 }
-                else // The call result is not a multireg return
+                else if (lcl->gtType != src->gtType)
                 {
                     // We change this to a GT_LCL_FLD (from a GT_ADDR of a GT_LCL_VAR)
                     lcl->ChangeOper(GT_LCL_FLD);
@@ -1532,7 +1532,7 @@ var_types Compiler::impNormStructType(CORINFO_CLASS_HANDLE structHnd,
 
 #ifdef FEATURE_SIMD
     // Check to see if this is a SIMD type.
-    if (featureSIMD && !mayContainGCPtrs)
+    if (supportSIMDTypes && !mayContainGCPtrs)
     {
         unsigned originalSize = info.compCompHnd->getClassSize(structHnd);
 
@@ -9001,7 +9001,7 @@ GenTree* Compiler::impFixupStructReturnType(GenTree* op, CORINFO_CLASS_HANDLE re
     {
         // It is possible that we now have a lclVar of scalar type.
         // If so, don't transform it to GT_LCL_FLD.
-        if (varTypeIsStruct(lvaTable[op->AsLclVar()->gtLclNum].lvType))
+        if (lvaTable[op->AsLclVar()->gtLclNum].lvType != info.compRetNativeType)
         {
             op->ChangeOper(GT_LCL_FLD);
         }
@@ -18935,7 +18935,7 @@ void Compiler::impInlineInitVars(InlineInfo* pInlineInfo)
         if ((!foundSIMDType || (type == TYP_STRUCT)) && isSIMDorHWSIMDClass(&(lclVarInfo[i + argCnt].lclVerTypeInfo)))
         {
             foundSIMDType = true;
-            if (featureSIMD && type == TYP_STRUCT)
+            if (supportSIMDTypes && type == TYP_STRUCT)
             {
                 var_types structType = impNormStructType(lclVarInfo[i + argCnt].lclVerTypeInfo.GetClassHandle());
                 lclVarInfo[i + argCnt].lclTypeInfo = structType;
diff --git a/src/jit/lclvars.cpp b/src/jit/lclvars.cpp
index 0f6cdf217348..7012299ac92f 100644
--- a/src/jit/lclvars.cpp
+++ b/src/jit/lclvars.cpp
@@ -124,7 +124,7 @@ void Compiler::lvaInitTypeRef()
     info.compILargsCount = info.compArgsCount;
 
 #ifdef FEATURE_SIMD
-    if (featureSIMD && (info.compRetNativeType == TYP_STRUCT))
+    if (supportSIMDTypes && (info.compRetNativeType == TYP_STRUCT))
     {
         var_types structType = impNormStructType(info.compMethodInfo->args.retTypeClass);
         info.compRetType     = structType;
@@ -149,7 +149,7 @@ void Compiler::lvaInitTypeRef()
         if ((howToReturnStruct == SPK_PrimitiveType) || (howToReturnStruct == SPK_EnclosingType))
         {
             assert(returnType != TYP_UNKNOWN);
-            assert(!varTypeIsStruct(returnType));
+            assert(returnType != TYP_STRUCT);
 
             info.compRetNativeType = returnType;
 
@@ -397,7 +397,7 @@ void Compiler::lvaInitThisPtr(InitVarDscInfo* varDscInfo)
         {
             varDsc->lvType = TYP_BYREF;
 #ifdef FEATURE_SIMD
-            if (featureSIMD)
+            if (supportSIMDTypes)
             {
                 var_types simdBaseType = TYP_UNKNOWN;
                 var_types type         = impNormStructType(info.compClassHnd, nullptr, nullptr, &simdBaseType);
@@ -505,7 +505,7 @@ void Compiler::lvaInitRetBuffArg(InitVarDscInfo* varDscInfo)
             }
         }
 #ifdef FEATURE_SIMD
-        else if (featureSIMD && varTypeIsSIMD(info.compRetType))
+        else if (supportSIMDTypes && varTypeIsSIMD(info.compRetType))
         {
             varDsc->lvSIMDType = true;
             varDsc->lvBaseType =
@@ -598,8 +598,9 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
             // If the argType is a struct, then check if it is an HFA
             if (varTypeIsStruct(argType))
             {
-                hfaType  = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF
-                isHfaArg = varTypeIsFloating(hfaType);
+                // hfaType is set to float, double or SIMD type if it is an HFA, otherwise TYP_UNDEF.
+                hfaType  = GetHfaType(typeHnd);
+                isHfaArg = varTypeIsValidHfaType(hfaType);
             }
         }
         else if (info.compIsVarArgs)
@@ -616,7 +617,7 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
 
         if (isHfaArg)
         {
-            // We have an HFA argument, so from here on out treat the type as a float or double.
+            // We have an HFA argument, so from here on out treat the type as a float, double or vector.
             // The orginal struct type is available by using origArgType
             // We also update the cSlots to be the number of float/double fields in the HFA
             argType = hfaType;
@@ -819,15 +820,20 @@ void Compiler::lvaInitUserArgs(InitVarDscInfo* varDscInfo)
             {
                 // We need to save the fact that this HFA is enregistered
                 varDsc->lvSetIsHfa();
-                varDsc->lvSetIsHfaRegArg();
                 varDsc->SetHfaType(hfaType);
-                varDsc->lvIsMultiRegArg = (varDsc->lvHfaSlots() > 1);
+                // Note that we can have HVAs of SIMD types even if we are not recognizing intrinsics.
+                // In that case, we won't have normalized the vector types on the varDsc, so if we have a single vector
+                // register, we need to set the type now. Otherwise, later we'll assume this is passed by reference.
+                if (varDsc->lvHfaSlots() != 1)
+                {
+                    varDsc->lvIsMultiRegArg = true;
+                }
             }
 
             varDsc->lvIsRegArg = 1;
 
 #if FEATURE_MULTIREG_ARGS
-            if (varTypeIsStruct(argType))
+            if (argType == TYP_STRUCT)
             {
 #if defined(UNIX_AMD64_ABI)
                 varDsc->lvArgReg = genMapRegArgNumToRegNum(firstAllocatedRegArgNum, firstEightByteType);
@@ -2513,10 +2519,10 @@ void Compiler::lvaSetStruct(unsigned varNum, CORINFO_CLASS_HANDLE typeHnd, bool
             if (varDsc->lvExactSize <= MAX_PASS_MULTIREG_BYTES)
             {
                 var_types hfaType = GetHfaType(typeHnd); // set to float or double if it is an HFA, otherwise TYP_UNDEF
-                if (varTypeIsFloating(hfaType))
+                if (varTypeIsValidHfaType(hfaType))
                 {
                     varDsc->_lvIsHfa = true;
-                    varDsc->lvSetHfaTypeIsFloat(hfaType == TYP_FLOAT);
+                    varDsc->SetHfaType(hfaType);
 
                     // hfa variables can never contain GC pointers
                     assert(varDsc->lvStructGcCount == 0);
@@ -2588,8 +2594,7 @@ void Compiler::lvaSetStructUsedAsVarArg(unsigned varNum)
     LclVarDsc* varDsc = &lvaTable[varNum];
     // For varargs methods incoming and outgoing arguments should not be treated
     // as HFA.
-    varDsc->_lvIsHfa          = false;
-    varDsc->_lvHfaTypeIsFloat = false;
+    varDsc->_lvIsHfa = false;
 #endif // defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
 #endif // FEATURE_HFA
 }
@@ -6914,16 +6919,9 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r
         }
     }
 
-    if (varDsc->lvIsHfaRegArg())
+    if (varDsc->lvIsHfa())
     {
-        if (varDsc->lvHfaTypeIsFloat())
-        {
-            printf(" (enregistered HFA: float) ");
-        }
-        else
-        {
-            printf(" (enregistered HFA: double)");
-        }
+        printf(" HFA(%s) ", varTypeName(varDsc->GetHfaType()));
     }
 
     if (varDsc->lvDoNotEnregister)
diff --git a/src/jit/lower.cpp b/src/jit/lower.cpp
index a2f7080a9d40..c23527612fb7 100644
--- a/src/jit/lower.cpp
+++ b/src/jit/lower.cpp
@@ -5691,7 +5691,8 @@ void Lowering::ContainCheckRet(GenTreeOp* ret)
         {
             GenTreeLclVarCommon* lclVarCommon = op1->AsLclVarCommon();
             LclVarDsc*           varDsc       = &(comp->lvaTable[lclVarCommon->gtLclNum]);
-            assert(varDsc->lvIsMultiRegRet);
+            // This must be a multi-reg return or an HFA of a single element.
+            assert(varDsc->lvIsMultiRegRet || (varDsc->lvIsHfa() && varTypeIsValidHfaType(varDsc->lvType)));
 
             // Mark var as contained if not enregistrable.
             if (!varTypeIsEnregisterableStruct(op1))
diff --git a/src/jit/lsraarmarch.cpp b/src/jit/lsraarmarch.cpp
index 251bf53ab267..9a54c3227978 100644
--- a/src/jit/lsraarmarch.cpp
+++ b/src/jit/lsraarmarch.cpp
@@ -208,7 +208,7 @@ int LinearScan::BuildCall(GenTreeCall* call)
         assert(retTypeDesc != nullptr);
         dstCandidates = retTypeDesc->GetABIReturnRegs();
     }
-    else if (varTypeIsFloating(registerType))
+    else if (varTypeUsesFloatArgReg(registerType))
     {
         dstCandidates = RBM_FLOATRET;
     }
diff --git a/src/jit/lsrabuild.cpp b/src/jit/lsrabuild.cpp
index 5d8df339a67d..946756ad6e71 100644
--- a/src/jit/lsrabuild.cpp
+++ b/src/jit/lsrabuild.cpp
@@ -3060,7 +3060,7 @@ int LinearScan::BuildReturn(GenTree* tree)
         regMaskTP useCandidates = RBM_NONE;
 
 #if FEATURE_MULTIREG_RET
-        if (varTypeIsStruct(tree))
+        if (tree->TypeGet() == TYP_STRUCT)
         {
             // op1 has to be either an lclvar or a multi-reg returning call
             if (op1->OperGet() == GT_LCL_VAR)
@@ -3081,6 +3081,16 @@ int LinearScan::BuildReturn(GenTree* tree)
                 return srcCount;
             }
         }
+        else if (varTypeIsSIMD(tree))
+        {
+#ifdef _TARGET_ARM64_
+            useCandidates = allSIMDRegs();
+            BuildUse(op1, useCandidates);
+            return 1;
+#else  // !_TARGET_ARM64_
+            assert(!"Unexpected SIMD return type");
+#endif // !_TARGET_ARM64_
+        }
         else
 #endif // FEATURE_MULTIREG_RET
         {
@@ -3199,7 +3209,7 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node)
         GenTreeObj* obj  = op1->AsObj();
         GenTree*    addr = obj->Addr();
         unsigned    size = obj->gtBlkSize;
-        assert(size <= TARGET_POINTER_SIZE);
+        assert(size <= MAX_PASS_SINGLEREG_BYTES);
         if (addr->OperIsLocalAddr())
         {
             // We don't need a source register.
diff --git a/src/jit/morph.cpp b/src/jit/morph.cpp
index 2369d1eda53d..2d84fbd09d36 100644
--- a/src/jit/morph.cpp
+++ b/src/jit/morph.cpp
@@ -828,6 +828,7 @@ void fgArgTabEntry::Dump()
 {
     printf("fgArgTabEntry[arg %u", argNum);
     printf(" %d.%s", node->gtTreeID, GenTree::OpName(node->gtOper));
+    printf(" %s", varTypeName(argType));
     if (regNum != REG_STK)
     {
         printf(", %u reg%s:", numRegs, numRegs == 1 ? "" : "s");
@@ -867,7 +868,7 @@ void fgArgTabEntry::Dump()
     }
     if (isHfaRegArg)
     {
-        printf(", isHfa");
+        printf(", isHfa(%s)", varTypeName(getHfaType()));
     }
     if (isBackFilled)
     {
@@ -2300,12 +2301,16 @@ void fgArgInfo::EvalArgsToTemps()
                     {
                         setupArg = compiler->fgMorphCopyBlock(setupArg);
 #if defined(_TARGET_ARMARCH_) || defined(UNIX_AMD64_ABI)
-                        // This scalar LclVar widening step is only performed for ARM and AMD64 unix.
-                        //
-                        CORINFO_CLASS_HANDLE clsHnd     = compiler->lvaGetStruct(tmpVarNum);
-                        unsigned             structSize = varDsc->lvExactSize;
+                        if (lclVarType == TYP_STRUCT)
+                        {
+                            // This scalar LclVar widening step is only performed for ARM architectures.
+                            //
+                            CORINFO_CLASS_HANDLE clsHnd     = compiler->lvaGetStruct(tmpVarNum);
+                            unsigned             structSize = varDsc->lvExactSize;
 
-                        scalarType = compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg);
+                            scalarType =
+                                compiler->getPrimitiveTypeForStruct(structSize, clsHnd, curArgTabEntry->isVararg);
+                        }
 #endif // _TARGET_ARMARCH_ || defined (UNIX_AMD64_ABI)
                     }
 
@@ -2391,7 +2396,7 @@ void fgArgInfo::EvalArgsToTemps()
 
 #else // !defined(_TARGET_AMD64_) || defined(UNIX_AMD64_ABI)
 
-            if (varTypeIsStruct(defArg))
+            if (defArg->TypeGet() == TYP_STRUCT)
             {
                 clsHnd = compiler->gtGetStructHandleIfPresent(defArg);
                 noway_assert(clsHnd != NO_CLASS_HANDLE);
@@ -3079,7 +3084,7 @@ void Compiler::fgInitArgInfo(GenTreeCall* call)
 
 #ifdef FEATURE_HFA
         hfaType  = GetHfaType(argx);
-        isHfaArg = varTypeIsFloating(hfaType);
+        isHfaArg = varTypeIsValidHfaType(hfaType);
 
 #if defined(_TARGET_WINDOWS_) && defined(_TARGET_ARM64_)
         // Make sure for vararg methods isHfaArg is not true.
@@ -3872,7 +3877,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                 {
                     if (isPow2(passingSize))
                     {
-                        canTransform = true;
+                        canTransform = (!argEntry->isHfaArg || (passingSize == genTypeSize(argEntry->getHfaType())));
                     }
 
 #if defined(_TARGET_ARM64_) || defined(UNIX_AMD64_ABI)
@@ -3957,15 +3962,16 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                 }
                 else
                 {
-                    // We have a struct argument that's less than pointer size, and it is either a power of 2,
+                    // We have a struct argument that fits into a register, and it is either a power of 2,
                     // or a local.
-                    // Change our GT_OBJ into a GT_IND of the correct type.
+                    // Change our argument, as needed, into a value of the appropriate type.
                     CLANG_FORMAT_COMMENT_ANCHOR;
 
 #ifdef _TARGET_ARM_
                     assert((size == 1) || ((structBaseType == TYP_DOUBLE) && (size == 2)));
 #else
-                    assert(size == 1);
+                    assert((size == 1) ||
+                           (varTypeIsSIMD(structBaseType) && size == (genTypeSize(structBaseType) / REGSIZE_BYTES)));
 #endif
 
                     assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize));
@@ -4043,7 +4049,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                                 copyBlkClass = objClass;
                             }
                         }
-                        else if (!varTypeIsIntegralOrI(varDsc->TypeGet()))
+                        else if (genActualType(varDsc->TypeGet()) != structBaseType)
                         {
                             // Not a promoted struct, so just swizzle the type by using GT_LCL_FLD
                             argObj->ChangeOper(GT_LCL_FLD);
@@ -4057,42 +4063,39 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
                     }
                     assert(varTypeCanReg(argObj->TypeGet()) ||
                            ((copyBlkClass != NO_CLASS_HANDLE) && varTypeCanReg(structBaseType)));
-
-                    size = 1;
                 }
 #endif // !_TARGET_X86_
 
 #ifndef UNIX_AMD64_ABI
                 // We still have a struct unless we converted the GT_OBJ into a GT_IND above...
-                if (varTypeIsStruct(structBaseType) && !argEntry->passedByRef)
+                if (isHfaArg && passUsingFloatRegs)
                 {
-                    if (isHfaArg && passUsingFloatRegs)
-                    {
-                        size = argEntry->numRegs;
-                    }
-                    else
-                    {
-                        // If the valuetype size is not a multiple of TARGET_POINTER_SIZE,
-                        // we must copyblk to a temp before doing the obj to avoid
-                        // the obj reading memory past the end of the valuetype
-                        CLANG_FORMAT_COMMENT_ANCHOR;
+                    size = argEntry->numRegs;
+                }
+                else if (structBaseType == TYP_STRUCT)
+                {
+                    // If the valuetype size is not a multiple of TARGET_POINTER_SIZE,
+                    // we must copyblk to a temp before doing the obj to avoid
+                    // the obj reading memory past the end of the valuetype
+                    CLANG_FORMAT_COMMENT_ANCHOR;
 
-                        if (roundupSize > originalSize)
-                        {
-                            copyBlkClass = objClass;
+                    if (roundupSize > originalSize)
+                    {
+                        copyBlkClass = objClass;
 
-                            // There are a few special cases where we can omit using a CopyBlk
-                            // where we normally would need to use one.
+                        // There are a few special cases where we can omit using a CopyBlk
+                        // where we normally would need to use one.
 
-                            if (argObj->gtObj.gtOp1->IsLocalAddrExpr() != nullptr) // Is the source a LclVar?
-                            {
-                                copyBlkClass = NO_CLASS_HANDLE;
-                            }
+                        if (argObj->OperIs(GT_OBJ) &&
+                            argObj->AsObj()->gtGetOp1()->IsLocalAddrExpr() != nullptr) // Is the source a LclVar?
+                        {
+                            copyBlkClass = NO_CLASS_HANDLE;
                         }
-
-                        size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
                     }
+
+                    size = roundupSize / TARGET_POINTER_SIZE; // Normalize size to number of pointer sized items
                 }
+
 #endif // !UNIX_AMD64_ABI
             }
         }
@@ -4159,7 +4162,7 @@ GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
 #if FEATURE_MULTIREG_ARGS
         if (isStructArg)
         {
-            if (size > 1 || isHfaArg)
+            if ((size > 1) || (isHfaArg && (argx->TypeGet() == TYP_STRUCT)))
             {
                 hasMultiregStructArgs = true;
             }
@@ -4376,9 +4379,35 @@ void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
         }
 
         unsigned size = (fgEntryPtr->numRegs + fgEntryPtr->numSlots);
-        if ((size > 1) || fgEntryPtr->isHfaArg)
+        if ((size > 1) || (fgEntryPtr->isHfaArg && (argx->TypeGet() == TYP_STRUCT)))
         {
             foundStructArg = true;
+            if (fgEntryPtr->isHfaArg)
+            {
+                var_types hfaType = fgEntryPtr->hfaType;
+                unsigned  structSize;
+                if (argx->OperIs(GT_OBJ))
+                {
+                    structSize = argx->AsObj()->gtBlkSize;
+                }
+                else
+                {
+                    assert(argx->OperIs(GT_LCL_VAR));
+                    structSize = lvaGetDesc(argx->AsLclVar()->gtLclNum)->lvExactSize;
+                }
+                assert(structSize > 0);
+                if (structSize == genTypeSize(hfaType))
+                {
+                    if (argx->OperIs(GT_OBJ))
+                    {
+                        fgMorphBlkToInd(argx->AsObj(), hfaType);
+                    }
+                    else
+                    {
+                        argx->gtType = hfaType;
+                    }
+                }
+            }
             if (varTypeIsStruct(argx) && !argx->OperIs(GT_FIELD_LIST))
             {
                 arg = fgMorphMultiregStructArg(arg, fgEntryPtr);
@@ -4490,14 +4519,19 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
 #if FEATURE_MULTIREG_ARGS
     // Examine 'arg' and setup argValue objClass and structSize
     //
-    CORINFO_CLASS_HANDLE objClass   = gtGetStructHandleIfPresent(arg);
-    GenTree*             argValue   = arg; // normally argValue will be arg, but see right below
-    unsigned             structSize = 0;
+    CORINFO_CLASS_HANDLE objClass = gtGetStructHandleIfPresent(arg);
+    noway_assert(objClass != NO_CLASS_HANDLE);
+    GenTree* argValue   = arg; // normally argValue will be arg, but see right below
+    unsigned structSize = 0;
 
-    if (arg->OperGet() == GT_OBJ)
+    if (arg->TypeGet() != TYP_STRUCT)
+    {
+        structSize = genTypeSize(arg->TypeGet());
+        assert(structSize == info.compCompHnd->getClassSize(objClass));
+    }
+    else if (arg->OperGet() == GT_OBJ)
     {
         GenTreeObj* argObj = arg->AsObj();
-        objClass           = argObj->gtClass;
         structSize         = argObj->Size();
         assert(structSize == info.compCompHnd->getClassSize(objClass));
 
@@ -4527,7 +4561,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
     }
     else
     {
-        objClass   = gtGetStructHandleIfPresent(arg);
         structSize = info.compCompHnd->getClassSize(objClass);
     }
     noway_assert(objClass != NO_CLASS_HANDLE);
@@ -4538,8 +4571,8 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
     unsigned  elemSize                = 0;
     var_types type[MAX_ARG_REG_COUNT] = {}; // TYP_UNDEF = 0
 
-    hfaType = GetHfaType(objClass); // set to float or double if it is an HFA, otherwise TYP_UNDEF
-    if (varTypeIsFloating(hfaType)
+    hfaType = fgEntryPtr->hfaType;
+    if (varTypeIsValidHfaType(hfaType)
 #if !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
         && !fgEntryPtr->isVararg
 #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
@@ -4657,8 +4690,13 @@ GenTree* Compiler::fgMorphMultiregStructArg(GenTree* arg, fgArgTabEntry* fgEntry
 #endif // !defined(_HOST_UNIX_) && defined(_TARGET_ARM64_)
             )
         {
-            // We have a HFA struct
-            noway_assert(elemType == (varDsc->lvHfaTypeIsFloat() ? TYP_FLOAT : TYP_DOUBLE));
+            // We have a HFA struct.
+            // Note that GetHfaType may not be the same as elemType, since TYP_SIMD8 is handled the same as TYP_DOUBLE.
+            var_types useElemType = elemType;
+#ifdef _TARGET_ARM64_
+            useElemType = (elemType == TYP_SIMD8) ? TYP_DOUBLE : useElemType;
+#endif // _TARGET_ARM64_
+            noway_assert(useElemType == varDsc->GetHfaType());
             noway_assert(elemSize == genTypeSize(elemType));
             noway_assert(elemCount == (varDsc->lvExactSize / elemSize));
             noway_assert(elemSize * elemCount == varDsc->lvExactSize);
@@ -5291,7 +5329,7 @@ void Compiler::fgFixupStructReturn(GenTree* callNode)
 
 #if FEATURE_MULTIREG_RET
     // Either we don't have a struct now or if struct, then it is a struct returned in regs or in return buffer.
-    assert(!varTypeIsStruct(call) || call->HasMultiRegRetVal() || callHasRetBuffArg);
+    assert((call->gtType != TYP_STRUCT) || call->HasMultiRegRetVal() || callHasRetBuffArg);
 #else // !FEATURE_MULTIREG_RET
     // No more struct returns
     assert(call->TypeGet() != TYP_STRUCT);
@@ -7103,7 +7141,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee)
 
 #elif defined(_TARGET_ARM64_) // ARM64
                 var_types hfaType  = GetHfaType(argx);
-                bool      isHfaArg = varTypeIsFloating(hfaType);
+                bool      isHfaArg = varTypeIsValidHfaType(hfaType);
                 size_t    size     = 1;
 
                 if (isHfaArg)
@@ -17461,23 +17499,28 @@ void Compiler::fgMarkImplicitByRefArgs()
 
         if (varDsc->lvIsParam && varTypeIsStruct(varDsc))
         {
-            size_t size;
+            size_t size = varDsc->lvExactSize;
+            assert(size == info.compCompHnd->getClassSize(varDsc->lvVerTypeInfo.GetClassHandle()));
 
-            if (varDsc->lvSize() > REGSIZE_BYTES)
+            bool isPassedByReference;
+#if defined(_TARGET_AMD64_)
+            isPassedByReference = (size > REGSIZE_BYTES || (size & (size - 1)) != 0);
+#elif defined(_TARGET_ARM64_)
+            if (size > TARGET_POINTER_SIZE)
             {
-                size = varDsc->lvSize();
+                CORINFO_CLASS_HANDLE clsHnd = varDsc->lvVerTypeInfo.GetClassHandleForValueClass();
+                structPassingKind    howToPassStruct;
+                var_types            type =
+                    getArgTypeForStruct(clsHnd, &howToPassStruct, this->info.compIsVarArgs, varDsc->lvExactSize);
+                isPassedByReference = (howToPassStruct == SPK_ByReference);
             }
             else
             {
-                CORINFO_CLASS_HANDLE typeHnd = varDsc->lvVerTypeInfo.GetClassHandle();
-                size                         = info.compCompHnd->getClassSize(typeHnd);
+                isPassedByReference = false;
             }
-
-#if defined(_TARGET_AMD64_)
-            if (size > REGSIZE_BYTES || (size & (size - 1)) != 0)
-#elif defined(_TARGET_ARM64_)
-            if ((size > TARGET_POINTER_SIZE) && !lvaIsMultiregStruct(varDsc, this->info.compIsVarArgs))
 #endif
+
+            if (isPassedByReference)
             {
                 // Previously nobody was ever setting lvIsParam and lvIsTemp on the same local
                 // So I am now using it to indicate that this is one of the weird implicit
@@ -17623,8 +17666,7 @@ void Compiler::fgRetypeImplicitByRefArgs()
                     // the parameter which is really a pointer to the struct.
                     fieldVarDsc->lvIsRegArg      = false;
                     fieldVarDsc->lvIsMultiRegArg = false;
-                    fieldVarDsc->lvSetIsHfaRegArg(false);
-                    fieldVarDsc->lvArgReg = REG_NA;
+                    fieldVarDsc->lvArgReg        = REG_NA;
 #if FEATURE_MULTIREG_ARGS
                     fieldVarDsc->lvOtherArgReg = REG_NA;
 #endif
diff --git a/src/jit/register_arg_convention.h b/src/jit/register_arg_convention.h
index 28f29b7c13c3..ad20b4a0f543 100644
--- a/src/jit/register_arg_convention.h
+++ b/src/jit/register_arg_convention.h
@@ -58,7 +58,7 @@ struct InitVarDscInfo
     // return ref to current register arg for this type
     unsigned& regArgNum(var_types type)
     {
-        return varTypeIsFloating(type) ? floatRegArgNum : intRegArgNum;
+        return varTypeUsesFloatArgReg(type) ? floatRegArgNum : intRegArgNum;
     }
 
     // Allocate a set of contiguous argument registers. "type" is either an integer
@@ -110,7 +110,7 @@ struct InitVarDscInfo
     // return max register arg for this type
     unsigned maxRegArgNum(var_types type)
     {
-        return varTypeIsFloating(type) ? maxFloatRegArgNum : maxIntRegArgNum;
+        return varTypeUsesFloatArgReg(type) ? maxFloatRegArgNum : maxIntRegArgNum;
     }
 
     bool enoughAvailRegs(var_types type, unsigned numRegs = 1);
diff --git a/src/jit/simd.cpp b/src/jit/simd.cpp
index b4cecb3e2046..d2a959abb57b 100644
--- a/src/jit/simd.cpp
+++ b/src/jit/simd.cpp
@@ -121,7 +121,7 @@ int Compiler::getSIMDTypeAlignment(var_types simdType)
 //
 var_types Compiler::getBaseTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeHnd, unsigned* sizeBytes /*= nullptr */)
 {
-    assert(featureSIMD);
+    assert(supportSIMDTypes);
 
     if (m_simdHandleCache == nullptr)
     {
diff --git a/src/jit/target.h b/src/jit/target.h
index 561db79c6ae5..e2707284f785 100644
--- a/src/jit/target.h
+++ b/src/jit/target.h
@@ -246,6 +246,7 @@ typedef unsigned char   regNumberSmall;
   #define FEATURE_MULTIREG_ARGS_OR_RET  1  // Support for passing and/or returning single values in more than one register
   #define FEATURE_MULTIREG_ARGS         0  // Support for passing a single argument in more than one register  
   #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register
+  #define MAX_PASS_SINGLEREG_BYTES      8  // Maximum size of a struct passed in a single register (double).
   #define MAX_PASS_MULTIREG_BYTES       0  // No multireg arguments (note this seems wrong as MAX_ARG_REG_COUNT is 2)
   #define MAX_RET_MULTIREG_BYTES        8  // Maximum size of a struct that could be returned in more than one register
 
@@ -531,6 +532,7 @@ typedef unsigned char   regNumberSmall;
   #define FEATURE_FASTTAILCALL     1       // Tail calls made as epilog+jmp
   #define FEATURE_TAILCALL_OPT     1       // opportunistic Tail calls (i.e. without ".tail" prefix) made as fast tail calls.
   #define FEATURE_SET_FLAGS        0       // Set to true to force the JIT to mark the trees with GTF_SET_FLAGS when the flags need to be set
+  #define MAX_PASS_SINGLEREG_BYTES      8  // Maximum size of a struct passed in a single register (double).
 #ifdef    UNIX_AMD64_ABI
   #define FEATURE_MULTIREG_ARGS_OR_RET  1  // Support for passing and/or returning single values in more than one register
   #define FEATURE_MULTIREG_ARGS         1  // Support for passing a single argument in more than one register  
@@ -912,6 +914,7 @@ typedef unsigned char   regNumberSmall;
   #define FEATURE_MULTIREG_ARGS         1  // Support for passing a single argument in more than one register (including passing HFAs)
   #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register (including HFA returns)
   #define FEATURE_STRUCT_CLASSIFIER     0  // Uses a classifier function to determine is structs are passed/returned in more than one register
+  #define MAX_PASS_SINGLEREG_BYTES      8  // Maximum size of a struct passed in a single register (double).
   #define MAX_PASS_MULTIREG_BYTES      32  // Maximum size of a struct that could be passed in more than one register (Max is an HFA of 4 doubles)
   #define MAX_RET_MULTIREG_BYTES       32  // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles)
   #define MAX_ARG_REG_COUNT             4  // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA)
@@ -1219,9 +1222,10 @@ typedef unsigned char   regNumberSmall;
   #define FEATURE_MULTIREG_ARGS         1  // Support for passing a single argument in more than one register  
   #define FEATURE_MULTIREG_RET          1  // Support for returning a single value in more than one register  
   #define FEATURE_STRUCT_CLASSIFIER     0  // Uses a classifier function to determine is structs are passed/returned in more than one register
-  #define MAX_PASS_MULTIREG_BYTES      32  // Maximum size of a struct that could be passed in more than one register (max is 4 doubles using an HFA)
-  #define MAX_RET_MULTIREG_BYTES       32  // Maximum size of a struct that could be returned in more than one register (Max is an HFA of 4 doubles)
-  #define MAX_ARG_REG_COUNT             4  // Maximum registers used to pass a single argument in multiple registers. (max is 4 floats or doubles using an HFA)
+  #define MAX_PASS_SINGLEREG_BYTES     16  // Maximum size of a struct passed in a single register (16-byte vector).
+  #define MAX_PASS_MULTIREG_BYTES      64  // Maximum size of a struct that could be passed in more than one register (max is 4 16-byte vectors using an HVA)
+  #define MAX_RET_MULTIREG_BYTES       64  // Maximum size of a struct that could be returned in more than one register (Max is an HVA of 4 16-byte vectors)
+  #define MAX_ARG_REG_COUNT             4  // Maximum registers used to pass a single argument in multiple registers. (max is 4 128-bit vectors using an HVA)
   #define MAX_RET_REG_COUNT             4  // Maximum registers used to return a value.
 
   #define NOGC_WRITE_BARRIERS      1       // We have specialized WriteBarrier JIT Helpers that DO-NOT trash the RBM_CALLEE_TRASH registers
@@ -1944,10 +1948,10 @@ inline regNumber regNextOfType(regNumber reg, var_types type)
  *  Type checks
  */
 
-inline bool isFloatRegType(int /* s/b "var_types" */ type)
+inline bool isFloatRegType(var_types type)
 {
 #if CPU_HAS_FP_SUPPORT
-    return type == TYP_DOUBLE || type == TYP_FLOAT;
+    return varTypeUsesFloatReg(type);
 #else
     return false;
 #endif
diff --git a/src/jit/vartype.h b/src/jit/vartype.h
index 04793ea86830..1d7f3c07fb6c 100644
--- a/src/jit/vartype.h
+++ b/src/jit/vartype.h
@@ -176,7 +176,7 @@ inline bool varTypeIsI(T vt)
 template <class T>
 inline bool varTypeCanReg(T vt)
 {
-    return ((varTypeClassification[TypeGet(vt)] & (VTF_INT | VTF_I | VTF_FLT)) != 0);
+    return (TypeGet(vt) != TYP_STRUCT);
 }
 
 template <class T>
@@ -276,6 +276,128 @@ inline bool varTypeIsEnregisterableStruct(T vt)
     return (TypeGet(vt) != TYP_STRUCT);
 }
 
+template <class T>
+inline bool varTypeUsesFloatReg(T vt)
+{
+    // Note that not all targets support SIMD, but if they don't, varTypeIsSIMD will
+    // always return false.
+    return varTypeIsFloating(vt) || varTypeIsSIMD(vt);
+}
+
+template <class T>
+inline bool varTypeUsesFloatArgReg(T vt)
+{
+#ifdef _TARGET_ARM64_
+    // Arm64 passes SIMD types in floating point registers.
+    return varTypeUsesFloatReg(vt);
+#else
+    // Other targets pass them as regular structs - by reference or by value.
+    return varTypeIsFloating(vt);
+#endif
+}
+
+//------------------------------------------------------------------------
+// varTypeIsValidHfaType: Determine if the type is a valid HFA type
+//
+// Arguments:
+//    vt - the type of interest
+//
+// Return Value:
+//    Returns true iff the type is a valid HFA type.
+//
+// Notes:
+//    This should only be called with the return value from GetHfaType().
+//    The only valid values are TYP_UNDEF, for which this returns false,
+//    TYP_FLOAT, TYP_DOUBLE, or (ARM64-only) TYP_SIMD*.
+//
+template <class T>
+inline bool varTypeIsValidHfaType(T vt)
+{
+#ifdef FEATURE_HFA
+    bool isValid = (TypeGet(vt) != TYP_UNDEF);
+    if (isValid)
+    {
+#ifdef _TARGET_ARM64_
+        assert(varTypeUsesFloatReg(vt));
+#else  // !_TARGET_ARM64_
+        assert(varTypeIsFloating(vt));
+#endif // !_TARGET_ARM64_
+    }
+    return isValid;
+#else  // !FEATURE_HFA
+    return false;
+#endif // !FEATURE_HFA
+}
+
+#ifdef FEATURE_HFA
+//------------------------------------------------------------------------
+// logSizeForHfaType: Get the logSize for an HFA type
+//
+// Arguments:
+//    vt - the type of interest
+//
+// Return Value:
+//    Returns base 2 log of the size of the type in 4-byte units.
+//
+// Notes:
+//    This should only be called with a valid HFA type.
+//    The only valid values are TYP_FLOAT, TYP_DOUBLE, or (ARM64-only) TYP_SIMD*.
+//
+template <class T>
+inline unsigned char logSizeForHfaType(T vt)
+{
+    unsigned char logSize = 0;
+    switch (vt)
+    {
+        case TYP_FLOAT:
+            logSize = 0;
+            break;
+#ifdef _TARGET_ARM64_
+        case TYP_SIMD16:
+            logSize = 2;
+            break;
+        case TYP_SIMD8:
+// We don't distinguish between HFAs of double and HFAs of SIMD8.
+#endif // _TARGET_ARM64_
+        case TYP_DOUBLE:
+            logSize = 1;
+            break;
+        default:
+            assert(!"unexpected type for logSizeForHfaType");
+            break;
+    }
+    return logSize;
+}
+
+// hfaTypeForLogSize: Get HFA type for a given logSize
+//
+// Arguments:
+//    logSize - base 2 log of the size of the type in 4-byte units.
+//
+// Return Value:
+//    Returns the HFA type
+//
+inline var_types hfaTypeForLogSize(unsigned char logSize)
+{
+    switch (logSize)
+    {
+        case 0:
+            return TYP_FLOAT;
+        case 1:
+            // We don't distinguish between HFAs of double and HFAs of SIMD8.
+            return TYP_DOUBLE;
+#ifdef _TARGET_ARM64_
+        case 2:
+            return TYP_SIMD16;
+#endif // _TARGET_ARM64_
+        default:
+            assert(!"unexpected logSize for logSizeForHfaType");
+            return TYP_UNDEF;
+    }
+}
+
+#endif // FEATURE_HFA
+
 /*****************************************************************************/
 #endif // _VARTYPE_H_
 /*****************************************************************************/
diff --git a/src/vm/argdestination.h b/src/vm/argdestination.h
index 386ba57c821f..8ddd7b210412 100644
--- a/src/vm/argdestination.h
+++ b/src/vm/argdestination.h
@@ -60,22 +60,24 @@ class ArgDestination
     //  fieldBytes - size of the structure
     void CopyHFAStructToRegister(void *src, int fieldBytes)
     {
-        // We are either copying either a float or double HFA and need to
+        // We are copying a float, double or vector HFA/HVA and need to
         // enregister each field.
 
         int floatRegCount = m_argLocDescForStructInRegs->m_cFloatReg;
-        bool typeFloat = m_argLocDescForStructInRegs->m_isSinglePrecision;
+        int hfaFieldSize = m_argLocDescForStructInRegs->m_hfaFieldSize;
         UINT64* dest = (UINT64*) this->GetDestinationAddress();
 
         for (int i = 0; i < floatRegCount; ++i) 
         {
             // Copy 4 or 8 bytes from src.
-            UINT64 val = typeFloat ? *((UINT32*)src + i) : *((UINT64*)src + i);
+            UINT64 val = (hfaFieldSize == 4) ? *((UINT32*)src) : *((UINT64*)src);
             // Always store 8 bytes
             *(dest++) = val;
-            // For now, always zero the next 8 bytes.
-            // (When HVAs are supported we will get the next 8 bytes from src.)
-            *(dest++) = 0;
+            // Either zero the next 8 bytes or get the next 8 bytes from src for 16-byte vector.
+            *(dest++) = (hfaFieldSize == 16) ? *((UINT64*)src + 1) : 0;
+
+            // Increment src by the appropriate amount.
+            src = (void*)((char*)src + hfaFieldSize);
         }
     }
 
diff --git a/src/vm/arm64/CallDescrWorkerARM64.asm b/src/vm/arm64/CallDescrWorkerARM64.asm
index fe277ceb6282..9f2ec2461159 100644
--- a/src/vm/arm64/CallDescrWorkerARM64.asm
+++ b/src/vm/arm64/CallDescrWorkerARM64.asm
@@ -93,7 +93,7 @@ LNoFloatingPoint
         bne     LNoDoubleReturn
 
 LFloatReturn
-        str     d0, [x19, #(CallDescrData__returnValue + 0)]
+        str     q0, [x19, #(CallDescrData__returnValue + 0)]
         b       LReturnDone
 
 LNoDoubleReturn
@@ -117,6 +117,16 @@ LNoFloatHFAReturn
 
 LNoDoubleHFAReturn
 
+        ;;VectorHFAReturn  return case
+        cmp     w3, #64
+        bne     LNoVectorHFAReturn
+
+        stp     q0, q1, [x19, #(CallDescrData__returnValue + 0)]
+        stp     q2, q3, [x19, #(CallDescrData__returnValue + 0x20)]
+        b       LReturnDone
+
+LNoVectorHFAReturn
+
         EMIT_BREAKPOINT ; Unreachable
 
 LIntReturn
diff --git a/src/vm/arm64/asmconstants.h b/src/vm/arm64/asmconstants.h
index caffa809eb50..8c99ed841967 100644
--- a/src/vm/arm64/asmconstants.h
+++ b/src/vm/arm64/asmconstants.h
@@ -61,7 +61,7 @@ ASMCONSTANTS_C_ASSERT(SIZEOF__FloatArgumentRegisters == sizeof(FloatArgumentRegi
 #define CallDescrData__fpReturnSize             0x20
 #define CallDescrData__pTarget                  0x28
 #define CallDescrData__pRetBuffArg              0x30
-#define CallDescrData__returnValue              0x38
+#define CallDescrData__returnValue              0x40
 
 ASMCONSTANTS_C_ASSERT(CallDescrData__pSrc                 == offsetof(CallDescrData, pSrc))
 ASMCONSTANTS_C_ASSERT(CallDescrData__numStackSlots        == offsetof(CallDescrData, numStackSlots))
diff --git a/src/vm/arm64/asmhelpers.asm b/src/vm/arm64/asmhelpers.asm
index 7d8aad3e481b..abfd67313a52 100644
--- a/src/vm/arm64/asmhelpers.asm
+++ b/src/vm/arm64/asmhelpers.asm
@@ -692,8 +692,9 @@ NoFloatingPointRetVal
 
         ; x0 = fpRetSize
 
-        ; return value is stored before float argument registers
-        add         x1, sp, #(__PWTB_FloatArgumentRegisters - 0x20)
+        ; The return value is stored before float argument registers
+        ; The maximum size of a return value is 0x40 (HVA of 4x16)
+        add         x1, sp, #(__PWTB_FloatArgumentRegisters - 0x40)
         bl          setStubReturnValue
 
         EPILOG_WITH_TRANSITION_BLOCK_RETURN
diff --git a/src/vm/arm64/calldescrworkerarm64.S b/src/vm/arm64/calldescrworkerarm64.S
index f987d402ddee..8e8084ba3496 100644
--- a/src/vm/arm64/calldescrworkerarm64.S
+++ b/src/vm/arm64/calldescrworkerarm64.S
@@ -85,7 +85,7 @@ LOCAL_LABEL(NoFloatingPoint):
     bne     LOCAL_LABEL(NoDoubleReturn)
 
 LOCAL_LABEL(FloatReturn):
-    str     d0, [x19, #(CallDescrData__returnValue + 0)]
+    str     q0, [x19, #(CallDescrData__returnValue + 0)]
     b       LOCAL_LABEL(ReturnDone)
 
 LOCAL_LABEL(NoDoubleReturn):
@@ -97,6 +97,7 @@ LOCAL_LABEL(NoDoubleReturn):
     stp     s0, s1, [x19, #(CallDescrData__returnValue + 0)]
     stp     s2, s3, [x19, #(CallDescrData__returnValue + 0x08)]
     b       LOCAL_LABEL(ReturnDone)
+
 LOCAL_LABEL(NoFloatHFAReturn):
 
     //DoubleHFAReturn  return case
@@ -109,6 +110,16 @@ LOCAL_LABEL(NoFloatHFAReturn):
 
 LOCAL_LABEL(NoDoubleHFAReturn):
 
+    //VectorHFAReturn  return case
+    cmp     w3, #64
+    bne     LOCAL_LABEL(LNoVectorHFAReturn)
+
+    stp     q0, q1, [x19, #(CallDescrData__returnValue + 0)]
+    stp     q2, q3, [x19, #(CallDescrData__returnValue + 0x20)]
+    b       LOCAL_LABEL(ReturnDone)
+
+LOCAL_LABEL(LNoVectorHFAReturn):
+
     EMIT_BREAKPOINT // Unreachable
 
 LOCAL_LABEL(IntReturn):
diff --git a/src/vm/arm64/cgencpu.h b/src/vm/arm64/cgencpu.h
index fd1fbafe96da..a2cac4eb7c20 100644
--- a/src/vm/arm64/cgencpu.h
+++ b/src/vm/arm64/cgencpu.h
@@ -51,7 +51,7 @@ extern PCODE GetPreStubEntryPoint();
 #define CACHE_LINE_SIZE                         64
 #define LOG2SLOT                                LOG2_PTRSIZE
 
-#define ENREGISTERED_RETURNTYPE_MAXSIZE         32  // bytes (four FP registers: d0,d1,d2 and d3)
+#define ENREGISTERED_RETURNTYPE_MAXSIZE         64  // bytes (four vector registers: q0,q1,q2 and q3)
 #define ENREGISTERED_RETURNTYPE_INTEGER_MAXSIZE 16  // bytes (two int registers: x0 and x1)
 #define ENREGISTERED_PARAMTYPE_MAXSIZE          16  // bytes (max value type size that can be passed by value)
 
diff --git a/src/vm/callhelpers.h b/src/vm/callhelpers.h
index db9cfad6cb1e..d04412b68e17 100644
--- a/src/vm/callhelpers.h
+++ b/src/vm/callhelpers.h
@@ -39,8 +39,13 @@ struct CallDescrData
     // Return value
     //
 #ifdef ENREGISTERED_RETURNTYPE_MAXSIZE
+#ifdef _TARGET_ARM64_
+    // Use NEON128 to ensure proper alignment for vectors.
+    __declspec(align(16)) NEON128 returnValue[ENREGISTERED_RETURNTYPE_MAXSIZE / sizeof(NEON128)];
+#else
     // Use UINT64 to ensure proper alignment
     UINT64 returnValue[ENREGISTERED_RETURNTYPE_MAXSIZE / sizeof(UINT64)];
+#endif
 #else
     UINT64 returnValue;
 #endif
diff --git a/src/vm/callingconvention.h b/src/vm/callingconvention.h
index 7368fecac816..cb117278b511 100644
--- a/src/vm/callingconvention.h
+++ b/src/vm/callingconvention.h
@@ -50,8 +50,7 @@ struct ArgLocDesc
 #endif // UNIX_AMD64_ABI
 
 #if defined(_TARGET_ARM64_)
-    bool    m_isSinglePrecision;  // For determining if HFA is single or double
-                                  // precision
+    int      m_hfaFieldSize;      // Size of HFA field
 #endif // defined(_TARGET_ARM64_)
 
 #if defined(_TARGET_ARM_)
@@ -76,7 +75,7 @@ struct ArgLocDesc
         m_fRequires64BitAlignment = FALSE;
 #endif
 #if defined(_TARGET_ARM64_)
-        m_isSinglePrecision = FALSE;
+        m_hfaFieldSize = 0;
 #endif // defined(_TARGET_ARM64_)
 #if defined(UNIX_AMD64_ABI)
         m_eeClass = NULL;
@@ -589,10 +588,19 @@ class ArgIteratorTemplate : public ARGITERATOR_BASE
             if (!m_argTypeHandle.IsNull() && m_argTypeHandle.IsHFA())
             {
                 CorElementType type = m_argTypeHandle.GetHFAType();
-                bool isFloatType = (type == ELEMENT_TYPE_R4);
+                int hfaFieldSize = 0;
+                switch (type)
+                {
+                case ELEMENT_TYPE_R4: hfaFieldSize = 4; break;
+                case ELEMENT_TYPE_R8: hfaFieldSize = 8; break;
+#ifdef _TARGET_ARM64_
+                case ELEMENT_TYPE_VALUETYPE: hfaFieldSize = 16; break;
+#endif
+                default: _ASSERTE(!"Invalid HFA Type");
+                }
 
-                pLoc->m_cFloatReg = isFloatType ? GetArgSize()/sizeof(float): GetArgSize()/sizeof(double);
-                pLoc->m_isSinglePrecision = isFloatType;
+                pLoc->m_cFloatReg = GetArgSize()/hfaFieldSize;
+                pLoc->m_hfaFieldSize = hfaFieldSize;
             }
             else
             {
@@ -1297,16 +1305,25 @@ int ArgIteratorTemplate<ARGITERATOR_BASE>::GetNextOffset()
         if (thValueType.IsHFA())
         {
             CorElementType type = thValueType.GetHFAType();
-            bool isFloatType = (type == ELEMENT_TYPE_R4);
 
-            cFPRegs = (type == ELEMENT_TYPE_R4)? (argSize/sizeof(float)): (argSize/sizeof(double));
+            int hfaFieldSize = 0;
+            switch (type)
+            {
+            case ELEMENT_TYPE_R4: hfaFieldSize = 4; break;
+            case ELEMENT_TYPE_R8: hfaFieldSize = 8; break;
+#ifdef _TARGET_ARM64_
+            case ELEMENT_TYPE_VALUETYPE: hfaFieldSize = 16; break;
+#endif
+            default: _ASSERTE(!"Invalid HFA Type");
+            }
+            cFPRegs = argSize/hfaFieldSize;
 
             m_argLocDescForStructInRegs.Init();
             m_argLocDescForStructInRegs.m_cFloatReg = cFPRegs;
             m_argLocDescForStructInRegs.m_idxFloatReg = m_idxFPReg;
 
-            m_argLocDescForStructInRegs.m_isSinglePrecision = isFloatType;
-                
+            m_argLocDescForStructInRegs.m_hfaFieldSize = hfaFieldSize;
+
             m_hasArgLocDescForStructInRegs = true;
         }
         else 
@@ -1474,9 +1491,17 @@ void ArgIteratorTemplate<ARGITERATOR_BASE>::ComputeReturnFlags()
             {
                 CorElementType hfaType = thValueType.GetHFAType();
 
-                flags |= (hfaType == ELEMENT_TYPE_R4) ? 
-                    ((4 * sizeof(float)) << RETURN_FP_SIZE_SHIFT) : 
-                    ((4 * sizeof(double)) << RETURN_FP_SIZE_SHIFT);
+                int hfaFieldSize = 0;
+                switch (hfaType)
+                {
+                case ELEMENT_TYPE_R4: hfaFieldSize = 4; break;
+                case ELEMENT_TYPE_R8: hfaFieldSize = 8; break;
+#ifdef _TARGET_ARM64_
+                case ELEMENT_TYPE_VALUETYPE: hfaFieldSize = 16; break;
+#endif
+                default: _ASSERTE(!"Invalid HFA Type");
+                }
+                flags |= ((4 * hfaFieldSize) << RETURN_FP_SIZE_SHIFT);
 
                 break;
             }
diff --git a/src/vm/class.cpp b/src/vm/class.cpp
index 1e551a9c5d50..2cce9092118c 100644
--- a/src/vm/class.cpp
+++ b/src/vm/class.cpp
@@ -1172,6 +1172,47 @@ bool MethodTable::IsHFA()
 }
 #endif // !FEATURE_HFA
 
+//*******************************************************************************
+int MethodTable::GetVectorSize()
+{
+#if defined(_TARGET_ARM64_ ) || defined(_TARGET_XARCH)
+    if (IsIntrinsicType())
+    {
+        LPCUTF8 namespaceName;
+        LPCUTF8 className = GetFullyQualifiedNameInfo(&namespaceName);
+        int vectorSize = 0;
+
+        if (strcmp(className, "Vector`1") == 0)
+        {
+            vectorSize = GetNumInstanceFieldBytes();
+            _ASSERTE(strcmp(namespaceName, "System.Numerics") == 0);
+            return vectorSize;
+        }
+        if (strcmp(className, "Vector128`1") == 0)
+        {
+            vectorSize = 16;
+        }
+#ifdef _TARGET_XARCH_
+        else if (strcmp(className, "Vector256`1") == 0)
+        {
+            vectorSize = 32;
+        }
+#elif defined(_TARGET_ARM64_)
+        else if (strcmp(className, "Vector64`1") == 0)
+        {
+            vectorSize = 8;
+        }
+#endif
+        if (vectorSize != 0)
+        {
+            _ASSERTE(strcmp(namespaceName, "System.Runtime.Intrinsics") == 0);
+            return vectorSize;
+        }
+    }
+#endif // defined(_TARGET_ARM64_ ) || defined(_TARGET_XARCH)
+    return 0;
+}
+
 //*******************************************************************************
 CorElementType MethodTable::GetHFAType()
 {
@@ -1191,17 +1232,28 @@ CorElementType MethodTable::GetHFAType()
         _ASSERTE(pMT->IsValueType());
         _ASSERTE(pMT->GetNumInstanceFields() > 0);
 
+        int vectorSize = pMT->GetVectorSize();
+        if (vectorSize != 0)
+        {
+            return (vectorSize == 8) ? ELEMENT_TYPE_R8 : ELEMENT_TYPE_VALUETYPE;
+        }
+
         PTR_FieldDesc pFirstField = pMT->GetApproxFieldDescListRaw();
 
         CorElementType fieldType = pFirstField->GetFieldType();
-        
+
         // All HFA fields have to be of the same type, so we can just return the type of the first field
         switch (fieldType)
         {
         case ELEMENT_TYPE_VALUETYPE:
             pMT = pFirstField->LookupApproxFieldTypeHandle().GetMethodTable();
+            vectorSize = pMT->GetVectorSize();
+            if (vectorSize != 0)
+            {
+                return (vectorSize == 8) ? ELEMENT_TYPE_R8 : ELEMENT_TYPE_VALUETYPE;
+            }
             break;
-            
+
         case ELEMENT_TYPE_R4:
         case ELEMENT_TYPE_R8:
             return fieldType;
@@ -1212,7 +1264,7 @@ CorElementType MethodTable::GetHFAType()
             _ASSERTE(false);
             return ELEMENT_TYPE_END;
         }
-    }    
+    }
 }
 
 bool MethodTable::IsNativeHFA()
@@ -1231,6 +1283,7 @@ CorElementType MethodTable::GetNativeHFAType()
 //
 // When FEATURE_HFA is defined, we cache the value; otherwise we recompute it with each
 // call. The latter is only for the armaltjit and the arm64altjit.
+//
 bool
 #if defined(FEATURE_HFA)
 EEClass::CheckForHFA(MethodTable ** pByValueClassCache)
@@ -1249,20 +1302,17 @@ EEClass::CheckForHFA()
     if (HasExplicitFieldOffsetLayout())
         return false;
 
-    // The SIMD Intrinsic types are meant to be handled specially and should not be treated as HFA
-    if (GetMethodTable()->IsIntrinsicType())
+    // The SIMD Intrinsic Vector types appear to have multiple fields, but need to be treated
+    // as an opaque type of a single vector.
+    if (GetMethodTable()->GetVectorSize() != 0)
     {
-        LPCUTF8 namespaceName;
-        LPCUTF8 className = GetMethodTable()->GetFullyQualifiedNameInfo(&namespaceName);
-
-        if ((strcmp(className, "Vector256`1") == 0) || (strcmp(className, "Vector128`1") == 0) ||
-            (strcmp(className, "Vector64`1") == 0))
-        {
-            assert(strcmp(namespaceName, "System.Runtime.Intrinsics") == 0);
-            return false;
-        }
+#if defined(FEATURE_HFA)
+        GetMethodTable()->SetIsHFA();
+#endif
+        return true;
     }
 
+    int elemSize = 0;
     CorElementType hfaType = ELEMENT_TYPE_END;
 
     FieldDesc *pFieldDescList = GetFieldDescList();
@@ -1273,13 +1323,43 @@ EEClass::CheckForHFA()
 
         switch (fieldType)
         {
+#ifdef _TARGET_ARM64_
         case ELEMENT_TYPE_VALUETYPE:
+            {
+                // hfa/hva types are unique by size, except for Vector64 which we can conveniently
+                // treat as if it were a double for ABI purposes. However, it only qualifies as
+                // an HVA if all fields are the same type. This will ensure that we only
+                // consider it an HVA if all the fields are ELEMENT_TYPE_VALUETYPE (which have been
+                // determined above to be vectors) of the same size.
+                MethodTable* pMT;
+#if defined(FEATURE_HFA)
+                pMT = pByValueClassCache[i];
+#else
+                pMT = pFD->LookupApproxFieldTypeHandle().AsMethodTable();
+#endif
+                int thisElemSize = pMT->GetVectorSize();
+                if (thisElemSize != 0)
+                {
+                    if (elemSize == 0)
+                    {
+                        elemSize = thisElemSize;
+                    }
+                    else if ((thisElemSize != elemSize) || (hfaType != ELEMENT_TYPE_VALUETYPE))
+                    {
+                        return false;
+                    }
+                }
+                else
+                {
 #if defined(FEATURE_HFA)
-            fieldType = pByValueClassCache[i]->GetHFAType();
+                    fieldType = pByValueClassCache[i]->GetHFAType();
 #else
-            fieldType = pFD->LookupApproxFieldTypeHandle().AsMethodTable()->GetHFAType();
+                    fieldType = pFD->LookupApproxFieldTypeHandle().AsMethodTable()->GetHFAType();
 #endif
+                }
+            }
             break;
+#endif // _TARGET_ARM64_
 
         case ELEMENT_TYPE_R4:
         case ELEMENT_TYPE_R8:
@@ -1308,11 +1388,26 @@ EEClass::CheckForHFA()
         }
     }
 
-    if (hfaType == ELEMENT_TYPE_END)
+    switch (hfaType)
+    {
+    case ELEMENT_TYPE_R4:
+        elemSize = 4;
+        break;
+    case ELEMENT_TYPE_R8:
+        elemSize = 8;
+#ifdef _TARGET_ARM64_
+    case ELEMENT_TYPE_VALUETYPE:
+        // Should already have set elemSize, but be conservative
+        if (elemSize == 0)
+        {
+            return false;
+        }
+        break;
+#endif
+    default:
+        // ELEMENT_TYPE_END
         return false;
-
-    int elemSize = (hfaType == ELEMENT_TYPE_R8) ? sizeof(double) : sizeof(float);
-
+    }
     // Note that we check the total size, but do not perform any checks on number of fields:
     // - Type of fields can be HFA valuetype itself
     // - Managed C++ HFA valuetypes have just one <alignment member> of type float to signal that 
@@ -1327,7 +1422,7 @@ EEClass::CheckForHFA()
     if (totalSize / elemSize > 4)
         return false;
 
-    // All the above tests passed. It's HFA!
+    // All the above tests passed. It's HFA(/HVA)!
 #if defined(FEATURE_HFA)
     GetMethodTable()->SetIsHFA();
 #endif
@@ -1399,7 +1494,16 @@ CorElementType EEClassLayoutInfo::GetNativeHFATypeRaw()
     if (hfaType == ELEMENT_TYPE_END)
         return ELEMENT_TYPE_END;
 
-    int elemSize = (hfaType == ELEMENT_TYPE_R8) ? sizeof(double) : sizeof(float);
+    int elemSize = 1;
+    switch (hfaType)
+    {
+    case ELEMENT_TYPE_R4: elemSize = sizeof(float); break;
+    case ELEMENT_TYPE_R8: elemSize = sizeof(double); break;
+#ifdef _TARGET_ARM64_
+    case ELEMENT_TYPE_VALUETYPE: elemSize = 16; break;
+#endif
+    default: _ASSERTE(!"Invalid HFA Type");
+    }
 
     // Note that we check the total size, but do not perform any checks on number of fields:
     // - Type of fields can be HFA valuetype itself
diff --git a/src/vm/class.h b/src/vm/class.h
index 2853aee330e2..fae62795a8b7 100644
--- a/src/vm/class.h
+++ b/src/vm/class.h
@@ -414,8 +414,11 @@ class EEClassLayoutInfo
 #endif // UNIX_AMD64_ABI
 #ifdef FEATURE_HFA
             // HFA type of the unmanaged layout
+            // Note that these are not flags, they are discrete values.
             e_R4_HFA                    = 0x10,
             e_R8_HFA                    = 0x20,
+            e_16_HFA                    = 0x30,
+            e_HFATypeFlags              = 0x30,
 #endif
         };
 
@@ -526,15 +529,19 @@ class EEClassLayoutInfo
         bool IsNativeHFA()
         {
             LIMITED_METHOD_CONTRACT;
-            return (m_bFlags & (e_R4_HFA | e_R8_HFA)) != 0;
+            return (m_bFlags & e_HFATypeFlags) != 0;
         }
 
         CorElementType GetNativeHFAType()
         {
             LIMITED_METHOD_CONTRACT;
-            if (IsNativeHFA())                      
-                return (m_bFlags & e_R4_HFA) ? ELEMENT_TYPE_R4 : ELEMENT_TYPE_R8;
-            return ELEMENT_TYPE_END;
+            switch (m_bFlags & e_HFATypeFlags)
+            {
+            case e_R4_HFA: return ELEMENT_TYPE_R4;
+            case e_R8_HFA: return ELEMENT_TYPE_R8;
+            case e_16_HFA: return ELEMENT_TYPE_VALUETYPE;
+            default:       return ELEMENT_TYPE_END;
+            }
         }
 #else // !FEATURE_HFA
         bool IsNativeHFA()
@@ -580,7 +587,13 @@ class EEClassLayoutInfo
         void SetNativeHFAType(CorElementType hfaType)
         {
             LIMITED_METHOD_CONTRACT;
-            m_bFlags |= (hfaType == ELEMENT_TYPE_R4) ? e_R4_HFA : e_R8_HFA;
+            switch (hfaType)
+            {
+            case ELEMENT_TYPE_R4: m_bFlags |= e_R4_HFA; break;
+            case ELEMENT_TYPE_R8: m_bFlags |= e_R8_HFA; break;
+            case ELEMENT_TYPE_VALUETYPE: m_bFlags |= e_16_HFA; break;
+            default: _ASSERTE(!"Invalid HFA Type");
+            }
         }
 #endif
 #ifdef UNIX_AMD64_ABI
diff --git a/src/vm/methodtable.h b/src/vm/methodtable.h
index 74febebc39bc..154efa2ee4aa 100644
--- a/src/vm/methodtable.h
+++ b/src/vm/methodtable.h
@@ -1929,6 +1929,9 @@ class MethodTable
     bool IsHFA();
 #endif // FEATURE_HFA
 
+    // Returns the size in bytes of this type if it is a HW vector type; 0 otherwise.
+    int GetVectorSize();
+
     // Get the HFA type. This is supported both with FEATURE_HFA, in which case it
     // depends on the cached bit on the class, or without, in which case it is recomputed
     // for each invocation.