diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp
index 4c6e7aa4c28f32..01fb2a16af7cc3 100644
--- a/src/coreclr/jit/abi.cpp
+++ b/src/coreclr/jit/abi.cpp
@@ -125,7 +125,13 @@ var_types ABIPassingSegment::GetRegisterType() const
 #ifdef FEATURE_SIMD
             case 16:
                 return TYP_SIMD16;
-#endif
+#ifdef TARGET_XARCH
+            case 32:
+                return TYP_SIMD32;
+            case 64:
+                return TYP_SIMD64;
+#endif // TARGET_XARCH
+#endif // FEATURE_SIMD
             default:
                 assert(!"Unexpected size for floating point register");
                 return TYP_UNDEF;
diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp
index 2a6b01d3fe0b01..b723707105ae3b 100644
--- a/src/coreclr/jit/codegenxarch.cpp
+++ b/src/coreclr/jit/codegenxarch.cpp
@@ -6063,7 +6063,7 @@ void CodeGen::genCall(GenTreeCall* call)
                 }
                 else
 #endif // TARGET_X86
-                    if (varTypeIsFloating(returnType))
+                    if (varTypeUsesFloatReg(returnType))
                     {
                         returnReg = REG_FLOATRET;
                     }
@@ -6158,7 +6158,7 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA
         }
         else
         {
-            assert(!varTypeIsStruct(call));
+            assert(!varTypeIsStruct(call) || varTypeIsSIMD(call));
 
             if (call->TypeIs(TYP_REF))
             {
@@ -6168,6 +6168,10 @@ void CodeGen::genCallInstruction(GenTreeCall* call X86_ARG(target_ssize_t stackA
             {
                 params.retSize = EA_BYREF;
             }
+            else if (varTypeIsSIMD(call))
+            {
+                params.retSize = emitTypeSize(call->TypeGet());
+            }
         }
     }
 
@@ -11488,7 +11492,12 @@ void CodeGen::genClearAvxStateInEpilog()
         //   VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX
         //   register) and before any call to an unknown function.
 
-        instGen(INS_vzeroupper);
+        // Skip vzeroupper when the method returns a 256-bit or wider SIMD value in a register,
+        // as vzeroupper would destroy the upper bits of the return value.
+        if (genTypeSize(m_compiler->info.compRetNativeType) <= 16)
+        {
+            instGen(INS_vzeroupper);
+        }
     }
 }
 
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 379c33ab5a5042..52c584b4b34861 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -793,6 +793,36 @@ var_types Compiler::getReturnTypeForStruct(CORINFO_CLASS_HANDLE     clsHnd,
 #endif
 
 #ifdef UNIX_AMD64_ABI
+    // Opaque SIMD types (Vector64/128/256/512) should be returned in a single
+    // vector register (XMM/YMM/ZMM), matching the argument passing convention.
+    // We must handle this before the SysV struct classifier because the classifier
+    // produces [SSE, SSE] for 16-byte types (no SSEUP support), which splits the
+    // return into xmm0:xmm1 and adds unnecessary shuffling overhead.
+    // Use isHWSIMDClass (namespace check) instead of isOpaqueSIMDType (handle cache)
+    // because the SIMD handle cache may not be initialized this early in compilation.
+    if (isHWSIMDClass(clsHnd))
+    {
+        var_types simdType = getSIMDTypeForSize(structSize);
+        if (simdType != TYP_UNDEF && simdType != TYP_SIMD12)
+        {
+            bool canReturnAsSingleSimd = (structSize <= 16) || (structSize == 32 && canUseVexEncoding()) ||
+                                         (structSize == 64 && canUseEvexEncoding());
+
+            if (canReturnAsSingleSimd)
+            {
+                howToReturnStruct = SPK_PrimitiveType;
+                useType           = simdType;
+
+                if (wbReturnStruct != nullptr)
+                {
+                    *wbReturnStruct = howToReturnStruct;
+                }
+
+                return useType;
+            }
+        }
+    }
+
     // An 8-byte struct may need to be returned in a floating point register
     // So we always consult the struct "Classifier" routine
     //
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index ff4bd66b0d9e92..04dcec9f0bfef8 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -9526,6 +9526,14 @@ class Compiler
     int getSIMDTypeAlignment(var_types simdType);
 
 public:
+    // Returns true if the given SIMD type should be passed in a single vector register
+    // for ABI purposes. True for opaque SIMD types like Vector64/128/256/512 but false
+    // for decomposable types like Plane, Quaternion, Vector2, Vector3, Vector4.
+    bool isSingleRegisterSIMDType(var_types type, ClassLayout* layout) const
+    {
+        return varTypeIsSIMD(type) && (type != TYP_SIMD12) && (layout != nullptr) && isOpaqueSIMDType(layout);
+    }
+
     // Get the number of bytes in a System.Numeric.Vector<T> for the current compilation.
     // Note - cannot be used for System.Runtime.Intrinsic
     uint32_t getVectorTByteLength()
diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp
index ba199aafdeedea..d5a25e55f66b73 100644
--- a/src/coreclr/jit/lsrabuild.cpp
+++ b/src/coreclr/jit/lsrabuild.cpp
@@ -4297,13 +4297,13 @@ int LinearScan::BuildReturn(GenTree* tree)
             SingleTypeRegSet useCandidates = RBM_NONE;
 
 #if FEATURE_MULTIREG_RET
-#ifdef TARGET_ARM64
+#if defined(TARGET_ARM64) || defined(TARGET_AMD64)
             if (varTypeIsSIMD(tree) && !op1->IsMultiRegLclVar())
             {
                 BuildUse(op1, RBM_DOUBLERET.GetFloatRegSet());
                 return 1;
             }
-#endif // TARGET_ARM64
+#endif // TARGET_ARM64 || TARGET_AMD64
 
             if (varTypeIsStruct(tree))
             {
diff --git a/src/coreclr/jit/targetamd64.cpp b/src/coreclr/jit/targetamd64.cpp
index a9b6b5c0e5e2a5..fa28a8868b5f59 100644
--- a/src/coreclr/jit/targetamd64.cpp
+++ b/src/coreclr/jit/targetamd64.cpp
@@ -63,47 +63,74 @@ ABIPassingInformation SysVX64Classifier::Classify(Compiler*    comp,
                                                   ClassLayout* structLayout,
                                                   WellKnownArg wellKnownParam)
 {
-    bool                                                canEnreg = false;
+    bool                                                canEnreg           = false;
+    bool                                                handleAsSingleSimd = false;
     SYSTEMV_AMD64_CORINFO_STRUCT_REG_PASSING_DESCRIPTOR structDesc;
-    if (varTypeIsStruct(type))
+
+    // SIMD vector types (Vector64, Vector128, Vector256, Vector512) correspond to
+    // native __m64/__m128/__m256/__m512 and should be passed in a single vector
+    // register (XMM/YMM/ZMM) when hardware supports it.
+    // Exclude SIMD12 (Vector3) and non-opaque SIMD types (Plane, Quaternion, etc.)
+    // which have decomposable fields and should use normal struct classification.
+    if (comp->isSingleRegisterSIMDType(type, structLayout))
     {
-        comp->eeGetSystemVAmd64PassStructInRegisterDescriptor(structLayout->GetClassHandle(), &structDesc);
+        unsigned simdSize = genTypeSize(type);
+        if ((simdSize <= 16) || (simdSize == 32 && comp->canUseVexEncoding()) ||
+            (simdSize == 64 && comp->canUseEvexEncoding()))
+        {
+            handleAsSingleSimd = true;
+            canEnreg           = (m_floatRegs.Count() > 0);
+        }
+    }
 
-        if (structDesc.passedInRegisters)
+    if (!handleAsSingleSimd)
+    {
+        if (varTypeIsStruct(type))
         {
-            unsigned intRegCount   = 0;
-            unsigned floatRegCount = 0;
+            comp->eeGetSystemVAmd64PassStructInRegisterDescriptor(structLayout->GetClassHandle(), &structDesc);
 
-            for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
+            if (structDesc.passedInRegisters)
             {
-                if (structDesc.IsIntegralSlot(i))
-                {
-                    intRegCount++;
-                }
-                else if (structDesc.IsSseSlot(i))
-                {
-                    floatRegCount++;
-                }
-                else
+                unsigned intRegCount   = 0;
+                unsigned floatRegCount = 0;
+
+                for (unsigned int i = 0; i < structDesc.eightByteCount; i++)
                 {
-                    assert(!"Invalid eightbyte classification type.");
-                    break;
+                    if (structDesc.IsIntegralSlot(i))
+                    {
+                        intRegCount++;
+                    }
+                    else if (structDesc.IsSseSlot(i))
+                    {
+                        floatRegCount++;
+                    }
+                    else
+                    {
+                        assert(!"Invalid eightbyte classification type.");
+                        break;
+                    }
                 }
-            }
 
-            canEnreg = (intRegCount <= m_intRegs.Count()) && (floatRegCount <= m_floatRegs.Count());
+                canEnreg = (intRegCount <= m_intRegs.Count()) && (floatRegCount <= m_floatRegs.Count());
+            }
+        }
+        else
+        {
+            unsigned availRegs = varTypeUsesFloatArgReg(type) ? m_floatRegs.Count() : m_intRegs.Count();
+            canEnreg           = availRegs > 0;
         }
-    }
-    else
-    {
-        unsigned availRegs = varTypeUsesFloatArgReg(type) ? m_floatRegs.Count() : m_intRegs.Count();
-        canEnreg           = availRegs > 0;
     }
 
     ABIPassingInformation info;
     if (canEnreg)
     {
-        if (varTypeIsStruct(type))
+        if (handleAsSingleSimd)
+        {
+            regNumber reg = m_floatRegs.Dequeue();
+            info          = ABIPassingInformation::FromSegmentByValue(comp,
+                                                                      ABIPassingSegment::InRegister(reg, 0, genTypeSize(type)));
+        }
+        else if (varTypeIsStruct(type))
         {
             info = ABIPassingInformation(comp, structDesc.eightByteCount);
 
diff --git a/src/coreclr/pal/inc/unixasmmacrosamd64.inc b/src/coreclr/pal/inc/unixasmmacrosamd64.inc
index 90c8947e754297..d02a145e78c696 100644
--- a/src/coreclr/pal/inc/unixasmmacrosamd64.inc
+++ b/src/coreclr/pal/inc/unixasmmacrosamd64.inc
@@ -259,6 +259,78 @@ C_FUNC(\Name\()_End):
 
 .endm
 
+// Save upper 128 bits of YMM0-YMM7 and upper 256 bits of ZMM0-ZMM7.
+// Uses g_avxSupported / g_avx512Supported runtime flags.
+// ofs: base offset into extra locals area for the 384 bytes of save space.
+// Clobbers r11.
+.macro SAVE_UPPER_VECTOR_REGISTERS ofs
+
+        PREPARE_EXTERNAL_VAR g_avxSupported, r11
+        cmp             byte ptr [r11], 0
+        je              LOCAL_LABEL(SaveUpperVecDone_\@)
+
+        vextractf128    xmmword ptr [rsp + \ofs + 0x00], ymm0, 1
+        vextractf128    xmmword ptr [rsp + \ofs + 0x10], ymm1, 1
+        vextractf128    xmmword ptr [rsp + \ofs + 0x20], ymm2, 1
+        vextractf128    xmmword ptr [rsp + \ofs + 0x30], ymm3, 1
+        vextractf128    xmmword ptr [rsp + \ofs + 0x40], ymm4, 1
+        vextractf128    xmmword ptr [rsp + \ofs + 0x50], ymm5, 1
+        vextractf128    xmmword ptr [rsp + \ofs + 0x60], ymm6, 1
+        vextractf128    xmmword ptr [rsp + \ofs + 0x70], ymm7, 1
+
+        PREPARE_EXTERNAL_VAR g_avx512Supported, r11
+        cmp             byte ptr [r11], 0
+        je              LOCAL_LABEL(SaveUpperVecDone_\@)
+
+        vextractf64x4   ymmword ptr [rsp + \ofs + 0x80],  zmm0, 1
+        vextractf64x4   ymmword ptr [rsp + \ofs + 0xA0],  zmm1, 1
+        vextractf64x4   ymmword ptr [rsp + \ofs + 0xC0],  zmm2, 1
+        vextractf64x4   ymmword ptr [rsp + \ofs + 0xE0],  zmm3, 1
+        vextractf64x4   ymmword ptr [rsp + \ofs + 0x100], zmm4, 1
+        vextractf64x4   ymmword ptr [rsp + \ofs + 0x120], zmm5, 1
+        vextractf64x4   ymmword ptr [rsp + \ofs + 0x140], zmm6, 1
+        vextractf64x4   ymmword ptr [rsp + \ofs + 0x160], zmm7, 1
+
+LOCAL_LABEL(SaveUpperVecDone_\@):
+
+.endm
+
+// Restore upper 128 bits of YMM0-YMM7 and upper 256 bits of ZMM0-ZMM7.
+// Must be called AFTER RESTORE_FLOAT_ARGUMENT_REGISTERS (which restores XMM lower 128 bits).
+// ofs: same base offset used in SAVE_UPPER_VECTOR_REGISTERS.
+// Clobbers r11.
+.macro RESTORE_UPPER_VECTOR_REGISTERS ofs
+
+        PREPARE_EXTERNAL_VAR g_avxSupported, r11
+        cmp             byte ptr [r11], 0
+        je              LOCAL_LABEL(RestoreUpperVecDone_\@)
+
+        vinsertf128     ymm0, ymm0, xmmword ptr [rsp + \ofs + 0x00], 1
+        vinsertf128     ymm1, ymm1, xmmword ptr [rsp + \ofs + 0x10], 1
+        vinsertf128     ymm2, ymm2, xmmword ptr [rsp + \ofs + 0x20], 1
+        vinsertf128     ymm3, ymm3, xmmword ptr [rsp + \ofs + 0x30], 1
+        vinsertf128     ymm4, ymm4, xmmword ptr [rsp + \ofs + 0x40], 1
+        vinsertf128     ymm5, ymm5, xmmword ptr [rsp + \ofs + 0x50], 1
+        vinsertf128     ymm6, ymm6, xmmword ptr [rsp + \ofs + 0x60], 1
+        vinsertf128     ymm7, ymm7, xmmword ptr [rsp + \ofs + 0x70], 1
+
+        PREPARE_EXTERNAL_VAR g_avx512Supported, r11
+        cmp             byte ptr [r11], 0
+        je              LOCAL_LABEL(RestoreUpperVecDone_\@)
+
+        vinsertf64x4    zmm0, zmm0, ymmword ptr [rsp + \ofs + 0x80],  1
+        vinsertf64x4    zmm1, zmm1, ymmword ptr [rsp + \ofs + 0xA0],  1
+        vinsertf64x4    zmm2, zmm2, ymmword ptr [rsp + \ofs + 0xC0],  1
+        vinsertf64x4    zmm3, zmm3, ymmword ptr [rsp + \ofs + 0xE0],  1
+        vinsertf64x4    zmm4, zmm4, ymmword ptr [rsp + \ofs + 0x100], 1
+        vinsertf64x4    zmm5, zmm5, ymmword ptr [rsp + \ofs + 0x120], 1
+        vinsertf64x4    zmm6, zmm6, ymmword ptr [rsp + \ofs + 0x140], 1
+        vinsertf64x4    zmm7, zmm7, ymmword ptr [rsp + \ofs + 0x160], 1
+
+LOCAL_LABEL(RestoreUpperVecDone_\@):
+
+.endm
+
 // Stack layout:
 //
 // (stack parameters)
diff --git a/src/coreclr/tools/Common/JitInterface/SystemVStructClassificator.cs b/src/coreclr/tools/Common/JitInterface/SystemVStructClassificator.cs
index 943f23ce102a4a..b0c32f62a37f82 100644
--- a/src/coreclr/tools/Common/JitInterface/SystemVStructClassificator.cs
+++ b/src/coreclr/tools/Common/JitInterface/SystemVStructClassificator.cs
@@ -214,18 +214,48 @@ private static bool ClassifyEightBytes(TypeDesc typeDesc,
                 return true;
             }
 
-            // The SIMD and Int128 Intrinsic types are meant to be handled specially and should not be passed as struct registers
+            // SIMD Intrinsic types should be classified as SSE to pass in XMM registers.
+            // We must NOT fall through to normal field enumeration because the internal fields
+            // (e.g., Vector64<T>._00 is ulong) would classify as INTEGER instead of SSE.
             if (typeDesc.IsIntrinsic)
             {
                 InstantiatedType instantiatedType = typeDesc as InstantiatedType;
                 if (instantiatedType != null)
                 {
-                    if (VectorFieldLayoutAlgorithm.IsVectorType(instantiatedType) ||
-                        VectorOfTFieldLayoutAlgorithm.IsVectorOfTType(instantiatedType) ||
-                        Int128FieldLayoutAlgorithm.IsIntegerType(instantiatedType))
+                    if (Int128FieldLayoutAlgorithm.IsIntegerType(instantiatedType))
                     {
                         return false;
                     }
+
+                    if (VectorFieldLayoutAlgorithm.IsVectorType(instantiatedType) ||
+                        VectorOfTFieldLayoutAlgorithm.IsVectorOfTType(instantiatedType))
+                    {
+                        int structSize = typeDesc.GetElementSize().AsInt;
+
+                        // The entry point (GetSystemVAmd64PassStructInRegisterDescriptor) already
+                        // filters out types > CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS,
+                        // so only Vector64 (8B) and Vector128 (16B) can reach here. Vector256/512
+                        // are handled by the JIT's handleAsSingleSimd path and never consult this
+                        // classification.
+                        Debug.Assert(structSize <= CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS);
+
+                        // Directly classify each 8-byte chunk as SSE.
+                        for (int offset = 0; offset < structSize; offset += SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES)
+                        {
+                            int eightByteSize = Math.Min(SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES, structSize - offset);
+                            int normalizedOffset = offset + startOffsetOfStruct;
+
+                            helper.FieldClassifications[helper.CurrentUniqueOffsetField] = SystemVClassificationTypeSSE;
+                            helper.FieldSizes[helper.CurrentUniqueOffsetField] = eightByteSize;
+                            helper.FieldOffsets[helper.CurrentUniqueOffsetField] = normalizedOffset;
+                            helper.CurrentUniqueOffsetField++;
+                        }
+
+                        helper.LargestFieldOffset = startOffsetOfStruct + structSize - Math.Min(structSize, SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES);
+
+                        AssignClassifiedEightByteTypes(ref helper);
+                        return true;
+                    }
                 }
             }
 
diff --git a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S
index 9b2345591947d6..28d7889c2344dd 100644
--- a/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S
+++ b/src/coreclr/vm/amd64/CachedInterfaceDispatchCoreCLR.S
@@ -49,15 +49,26 @@ LEAF_END RhpVTableOffsetDispatch, _TEXT
 //  [rsp+0] m_ReturnAddress: contains the return address of caller to stub
 NESTED_ENTRY RhpInterfaceDispatchSlow, _TEXT, NoHandler
 
-        PROLOG_WITH_TRANSITION_BLOCK
+        PROLOG_WITH_TRANSITION_BLOCK 384
+
+        // r11 holds indirection cell from the caller. Save before clobbering.
+        mov             r10, r11
+
+        SAVE_UPPER_VECTOR_REGISTERS 0x00
 
         lea             rdi, [rsp + __PWTB_TransitionBlock]         // pTransitionBlock
-        mov             rsi, r11                                    // indirection cell
+        mov             rsi, r10                                    // indirection cell
 
         call            C_FUNC(CID_ResolveWorker)
+        mov             r10, rax
 
-        EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
-        TAILJMP_RAX
+        RESTORE_FLOAT_ARGUMENT_REGISTERS __PWTB_FloatArgumentRegisters
+        RESTORE_UPPER_VECTOR_REGISTERS 0x00
+
+        free_stack      __PWTB_StackAlloc
+        POP_ARGUMENT_REGISTERS
+        POP_CALLEE_SAVED_REGISTERS
+        jmp             r10
 
 NESTED_END RhpInterfaceDispatchSlow, _TEXT
 
@@ -65,15 +76,26 @@ NESTED_END RhpInterfaceDispatchSlow, _TEXT
 //    r11                    contains the address of the indirection cell (which is the MethodPtrAux field of the delegate)
 NESTED_ENTRY CID_VirtualOpenDelegateDispatch, _TEXT, NoHandler
 
-        PROLOG_WITH_TRANSITION_BLOCK
+        PROLOG_WITH_TRANSITION_BLOCK 384
+
+        // r11 holds indirection cell from the caller. Save before clobbering.
+        mov             r10, r11
+
+        SAVE_UPPER_VECTOR_REGISTERS 0x00
 
         lea             rdi, [rsp + __PWTB_TransitionBlock]         // pTransitionBlock
-        mov             rsi, r11                                    // indirection cell
+        mov             rsi, r10                                    // indirection cell
 
         call            C_FUNC(CID_VirtualOpenDelegateDispatchWorker)
+        mov             r10, rax
 
-        EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
-        TAILJMP_RAX
+        RESTORE_FLOAT_ARGUMENT_REGISTERS __PWTB_FloatArgumentRegisters
+        RESTORE_UPPER_VECTOR_REGISTERS 0x00
+
+        free_stack      __PWTB_StackAlloc
+        POP_ARGUMENT_REGISTERS
+        POP_CALLEE_SAVED_REGISTERS
+        jmp             r10
 
 NESTED_END CID_VirtualOpenDelegateDispatch, _TEXT
 
diff --git a/src/coreclr/vm/amd64/externalmethodfixupthunk.S b/src/coreclr/vm/amd64/externalmethodfixupthunk.S
index f6b89f1b98f065..9be7cdb64e4d6d 100644
--- a/src/coreclr/vm/amd64/externalmethodfixupthunk.S
+++ b/src/coreclr/vm/amd64/externalmethodfixupthunk.S
@@ -9,16 +9,23 @@
 
 NESTED_ENTRY DelayLoad_MethodCall, _TEXT, NoHandler
 
-        PROLOG_WITH_TRANSITION_BLOCK 0, 0x10, rdx, rcx, 0
+        PROLOG_WITH_TRANSITION_BLOCK 384, 0x10, rdx, rcx, 0
+
+        SAVE_UPPER_VECTOR_REGISTERS 0x00
 
         lea     rdi, [rsp + __PWTB_TransitionBlock] // pTransitionBlock
         mov     rsi, rax                            // pIndirection
 
         call            C_FUNC(ExternalMethodFixupWorker)
+        mov             r10, rax
 
-        EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
+        RESTORE_FLOAT_ARGUMENT_REGISTERS __PWTB_FloatArgumentRegisters
+        RESTORE_UPPER_VECTOR_REGISTERS 0x00
 
-        TAILJMP_RAX
+        free_stack      __PWTB_StackAlloc
+        POP_ARGUMENT_REGISTERS
+        POP_CALLEE_SAVED_REGISTERS
+        jmp             r10
 
 NESTED_END DelayLoad_MethodCall, _TEXT
 
diff --git a/src/coreclr/vm/amd64/theprestubamd64.S b/src/coreclr/vm/amd64/theprestubamd64.S
index 8d601c0ab9f280..e2ac738a496d0b 100644
--- a/src/coreclr/vm/amd64/theprestubamd64.S
+++ b/src/coreclr/vm/amd64/theprestubamd64.S
@@ -6,9 +6,12 @@
 #include "asmconstants.h"
 
 NESTED_ENTRY ThePreStub, _TEXT, NoHandler
-        PROLOG_WITH_TRANSITION_BLOCK 8, 0, 0, 0, 0
+        // Extra locals: 8 (Swift return buffer) + 384 (upper vector halves)
+        PROLOG_WITH_TRANSITION_BLOCK 392, 0, 0, 0, 0
         mov             [rsp], rax // Return buffer in Swift calling convention
 
+        SAVE_UPPER_VECTOR_REGISTERS 0x08
+
         //
         // call PreStubWorker
         //
@@ -18,7 +21,13 @@ NESTED_ENTRY ThePreStub, _TEXT, NoHandler
         mov             r10, rax
 
         mov             rax, [rsp]
-        EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
+
+        RESTORE_FLOAT_ARGUMENT_REGISTERS __PWTB_FloatArgumentRegisters
+        RESTORE_UPPER_VECTOR_REGISTERS 0x08
+
+        free_stack      __PWTB_StackAlloc
+        POP_ARGUMENT_REGISTERS
+        POP_CALLEE_SAVED_REGISTERS
         jmp             r10
 
 NESTED_END ThePreStub, _TEXT
diff --git a/src/coreclr/vm/amd64/unixasmhelpers.S b/src/coreclr/vm/amd64/unixasmhelpers.S
index 53115def14a26b..43321e9baacdc2 100644
--- a/src/coreclr/vm/amd64/unixasmhelpers.S
+++ b/src/coreclr/vm/amd64/unixasmhelpers.S
@@ -136,14 +136,22 @@ NESTED_END OnHijackTripThread, _TEXT
 #ifdef FEATURE_TIERED_COMPILATION
 
 NESTED_ENTRY OnCallCountThresholdReachedStub, _TEXT, NoHandler
-        PROLOG_WITH_TRANSITION_BLOCK
+        PROLOG_WITH_TRANSITION_BLOCK 384
+
+        SAVE_UPPER_VECTOR_REGISTERS 0x00
 
         lea     rdi, [rsp + __PWTB_TransitionBlock] // TransitionBlock *
         mov     rsi, rax // stub-identifying token, see OnCallCountThresholdReachedStub
         call    C_FUNC(OnCallCountThresholdReached)
+        mov     r10, rax
 
-        EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
-        TAILJMP_RAX
+        RESTORE_FLOAT_ARGUMENT_REGISTERS __PWTB_FloatArgumentRegisters
+        RESTORE_UPPER_VECTOR_REGISTERS 0x00
+
+        free_stack      __PWTB_StackAlloc
+        POP_ARGUMENT_REGISTERS
+        POP_CALLEE_SAVED_REGISTERS
+        jmp     r10
 NESTED_END OnCallCountThresholdReachedStub, _TEXT
 
 NESTED_ENTRY JIT_Patchpoint, _TEXT, NoHandler
diff --git a/src/coreclr/vm/amd64/virtualcallstubamd64.S b/src/coreclr/vm/amd64/virtualcallstubamd64.S
index 822eaaf2718f6e..581ba97cce3ce7 100644
--- a/src/coreclr/vm/amd64/virtualcallstubamd64.S
+++ b/src/coreclr/vm/amd64/virtualcallstubamd64.S
@@ -26,20 +26,32 @@
 
 NESTED_ENTRY ResolveWorkerAsmStub, _TEXT, NoHandler
 
-        PROLOG_WITH_TRANSITION_BLOCK 0, 8, rdx, 0, 0
+        PROLOG_WITH_TRANSITION_BLOCK 384, 8, rdx, 0, 0
+
+        // r11 holds indirection cell + flags from the caller. Save it before
+        // SAVE_UPPER_VECTOR_REGISTERS clobbers r11 as a scratch register.
+        mov             r10, r11
+
+        SAVE_UPPER_VECTOR_REGISTERS 0x00
 
         // token stored in rdx by prolog
 
         lea             rdi, [rsp + __PWTB_TransitionBlock]        // pTransitionBlock
-        mov             rsi, r11                                   // indirection cell + flags
+        mov             rsi, r10                                   // indirection cell + flags
         mov             rcx,  rsi
         and             rcx,  7                                    // flags
         sub             rsi, rcx                                   // indirection cell
 
         call            C_FUNC(VSD_ResolveWorker)
+        mov             r10, rax
+
+        RESTORE_FLOAT_ARGUMENT_REGISTERS __PWTB_FloatArgumentRegisters
+        RESTORE_UPPER_VECTOR_REGISTERS 0x00
 
-        EPILOG_WITH_TRANSITION_BLOCK_TAILCALL
-        TAILJMP_RAX
+        free_stack      __PWTB_StackAlloc
+        POP_ARGUMENT_REGISTERS
+        POP_CALLEE_SAVED_REGISTERS
+        jmp             r10
 
 NESTED_END ResolveWorkerAsmStub, _TEXT
 
diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp
index a1bf0bc4c6bfa2..9aabcd273c494d 100644
--- a/src/coreclr/vm/codeman.cpp
+++ b/src/coreclr/vm/codeman.cpp
@@ -1271,6 +1271,9 @@ void EEJitManager::SetCpuInfo()
     if (((cpuFeatures & XArchIntrinsicConstants_Avx) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX))
     {
         CPUCompileFlags.Set(InstructionSet_AVX);
+#ifdef UNIX_AMD64_ABI
+        g_avxSupported = 1;
+#endif
     }
 
     if (((cpuFeatures & XArchIntrinsicConstants_Avx2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX2))
@@ -1283,6 +1286,9 @@ void EEJitManager::SetCpuInfo()
     if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512))
     {
         CPUCompileFlags.Set(InstructionSet_AVX512);
+#ifdef UNIX_AMD64_ABI
+        g_avx512Supported = 1;
+#endif
     }
 
     // x86-64-vFuture
diff --git a/src/coreclr/vm/methodtable.cpp b/src/coreclr/vm/methodtable.cpp
index 470d35b23cbbfc..5d0885d11ff49d 100644
--- a/src/coreclr/vm/methodtable.cpp
+++ b/src/coreclr/vm/methodtable.cpp
@@ -2131,29 +2131,57 @@ bool MethodTable::ClassifyEightBytesWithManagedLayout(SystemVStructRegisterPassi
         return true;
     }
 
-    // The SIMD Intrinsic types are meant to be handled specially and should not be passed as struct registers
+    // SIMD Intrinsic types should be classified as SSE to pass in XMM registers.
+    // We must NOT fall through to normal field enumeration because the internal fields
+    // (e.g., Vector64<T>._00 is ulong) would classify as INTEGER instead of SSE.
     if (IsIntrinsicType())
     {
         LPCUTF8 namespaceName;
         LPCUTF8 className = GetFullyQualifiedNameInfo(&namespaceName);
 
+        bool isSIMDType = false;
+        unsigned structSize = 0;
+
         if ((strcmp(className, "Vector512`1") == 0) || (strcmp(className, "Vector256`1") == 0) ||
             (strcmp(className, "Vector128`1") == 0) || (strcmp(className, "Vector64`1") == 0))
         {
             assert(strcmp(namespaceName, "System.Runtime.Intrinsics") == 0);
-
-            LOG((LF_JIT, LL_EVERYTHING, "%*s**** ClassifyEightBytesWithManagedLayout: struct %s is a SIMD intrinsic type; will not be enregistered\n",
-                nestingLevel * 5, "", this->GetDebugClassName()));
-
-            return false;
+            structSize = GetNumInstanceFieldBytes();
+            isSIMDType = true;
+        }
+        else if ((strcmp(className, "Vector`1") == 0) && (strcmp(namespaceName, "System.Numerics") == 0))
+        {
+            structSize = GetNumInstanceFieldBytes();
+            isSIMDType = true;
         }
 
-        if ((strcmp(className, "Vector`1") == 0) && (strcmp(namespaceName, "System.Numerics") == 0))
+        if (isSIMDType)
         {
-            LOG((LF_JIT, LL_EVERYTHING, "%*s**** ClassifyEightBytesWithManagedLayout: struct %s is a SIMD intrinsic type; will not be enregistered\n",
-                nestingLevel * 5, "", this->GetDebugClassName()));
+            // All callers (SystemVAmd64CheckForPassStructInRegister, ClassifyEightBytes)
+            // already filter out structs > CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS (16 bytes).
+            // Only Vector64 (8B) and Vector128 (16B) can reach here. Vector256/512 are handled
+            // by the JIT's handleAsSingleSimd path and never consult this classification.
+            _ASSERTE(structSize <= CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS);
 
-            return false;
+            // Directly classify each 8-byte chunk as SSE so the JIT passes them in XMM registers.
+            for (unsigned offset = 0; offset < structSize; offset += SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES)
+            {
+                unsigned eightByteSize = min(static_cast<unsigned>(SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES), structSize - offset);
+                unsigned normalizedOffset = offset + startOffsetOfStruct;
+
+                helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField] = SystemVClassificationTypeSSE;
+                helperPtr->fieldSizes[helperPtr->currentUniqueOffsetField] = eightByteSize;
+                helperPtr->fieldOffsets[helperPtr->currentUniqueOffsetField] = normalizedOffset;
+                helperPtr->currentUniqueOffsetField++;
+
+                LOG((LF_JIT, LL_EVERYTHING, "%*s**** ClassifyEightBytesWithManagedLayout: SIMD type %s eightbyte at offset %u classified as SSE\n",
+                    nestingLevel * 5, "", this->GetDebugClassName(), normalizedOffset));
+            }
+
+            helperPtr->largestFieldOffset = (int)(startOffsetOfStruct + structSize - min(structSize, static_cast<unsigned>(SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES)));
+
+            AssignClassifiedEightByteTypes(helperPtr, nestingLevel);
+            return true;
         }
     }
 
@@ -2359,29 +2387,57 @@ bool MethodTable::ClassifyEightBytesWithNativeLayout(SystemVStructRegisterPassin
         numIntroducedFields = pNativeLayoutInfo->GetSize() / pNativeFieldDescs->NativeSize();
     }
 
-    // The SIMD Intrinsic types are meant to be handled specially and should not be passed as struct registers
+    // SIMD Intrinsic types should be classified as SSE to pass in XMM registers.
+    // We must NOT fall through to normal field enumeration because the internal fields
+    // (e.g., Vector64<T>._00 is ulong) would classify as INTEGER instead of SSE.
     if (IsIntrinsicType())
     {
         LPCUTF8 namespaceName;
         LPCUTF8 className = GetFullyQualifiedNameInfo(&namespaceName);
 
+        bool isSIMDType = false;
+        unsigned structSize = 0;
+
         if ((strcmp(className, "Vector512`1") == 0) || (strcmp(className, "Vector256`1") == 0) ||
             (strcmp(className, "Vector128`1") == 0) || (strcmp(className, "Vector64`1") == 0))
         {
             assert(strcmp(namespaceName, "System.Runtime.Intrinsics") == 0);
-
-            LOG((LF_JIT, LL_EVERYTHING, "%*s**** ClassifyEightBytesWithNativeLayout: struct %s is a SIMD intrinsic type; will not be enregistered\n",
-                nestingLevel * 5, "", this->GetDebugClassName()));
-
-            return false;
+            structSize = pNativeLayoutInfo->GetSize();
+            isSIMDType = true;
+        }
+        else if ((strcmp(className, "Vector`1") == 0) && (strcmp(namespaceName, "System.Numerics") == 0))
+        {
+            structSize = pNativeLayoutInfo->GetSize();
+            isSIMDType = true;
         }
 
-        if ((strcmp(className, "Vector`1") == 0) && (strcmp(namespaceName, "System.Numerics") == 0))
+        if (isSIMDType)
         {
-            LOG((LF_JIT, LL_EVERYTHING, "%*s**** ClassifyEightBytesWithNativeLayout: struct %s is a SIMD intrinsic type; will not be enregistered\n",
-                nestingLevel * 5, "", this->GetDebugClassName()));
+            // All callers (SystemVAmd64CheckForPassNativeStructInRegister) already filter out
+            // structs > CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS (16 bytes).
+            // Only Vector64 (8B) and Vector128 (16B) can reach here. Vector256/512 are handled
+            // by the JIT's handleAsSingleSimd path and never consult this classification.
+            _ASSERTE(structSize <= CLR_SYSTEMV_MAX_STRUCT_BYTES_TO_PASS_IN_REGISTERS);
 
-            return false;
+            // Directly classify each 8-byte chunk as SSE so the JIT passes them in XMM registers.
+            for (unsigned offset = 0; offset < structSize; offset += SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES)
+            {
+                unsigned eightByteSize = min(static_cast<unsigned>(SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES), structSize - offset);
+                unsigned normalizedOffset = offset + startOffsetOfStruct;
+
+                helperPtr->fieldClassifications[helperPtr->currentUniqueOffsetField] = SystemVClassificationTypeSSE;
+                helperPtr->fieldSizes[helperPtr->currentUniqueOffsetField] = eightByteSize;
+                helperPtr->fieldOffsets[helperPtr->currentUniqueOffsetField] = normalizedOffset;
+                helperPtr->currentUniqueOffsetField++;
+
+                LOG((LF_JIT, LL_EVERYTHING, "%*s**** ClassifyEightBytesWithNativeLayout: SIMD type %s eightbyte at offset %u classified as SSE\n",
+                    nestingLevel * 5, "", this->GetDebugClassName(), normalizedOffset));
+            }
+
+            helperPtr->largestFieldOffset = (int)(startOffsetOfStruct + structSize - min(structSize, static_cast<unsigned>(SYSTEMV_EIGHT_BYTE_SIZE_IN_BYTES)));
+
+            AssignClassifiedEightByteTypes(helperPtr, nestingLevel);
+            return true;
         }
     }
 
diff --git a/src/coreclr/vm/vars.cpp b/src/coreclr/vm/vars.cpp
index f6fd1eac774627..678a1989cf54c9 100644
--- a/src/coreclr/vm/vars.cpp
+++ b/src/coreclr/vm/vars.cpp
@@ -101,6 +101,11 @@ GVAL_IMPL_INIT(DWORD, g_TlsIndex, TLS_OUT_OF_INDEXES);
 GVAL_IMPL_INIT(DWORD, g_offsetOfCurrentThreadInfo, 0);
 GVAL_IMPL_INIT(DWORD, g_gcNotificationFlags, 0);
 
+#if defined(TARGET_AMD64) && defined(UNIX_AMD64_ABI)
+extern "C" uint8_t g_avxSupported = 0;
+extern "C" uint8_t g_avx512Supported = 0;
+#endif
+
 
 MethodTable* g_pCastHelpers;
 
diff --git a/src/coreclr/vm/vars.hpp b/src/coreclr/vm/vars.hpp
index 246291fdeab900..b310e8d51d981f 100644
--- a/src/coreclr/vm/vars.hpp
+++ b/src/coreclr/vm/vars.hpp
@@ -365,6 +365,11 @@ GVAL_DECL(DWORD,            g_TlsIndex);
 GVAL_DECL(DWORD,            g_offsetOfCurrentThreadInfo);
 GVAL_DECL(DWORD,            g_gcNotificationFlags);
 
+#if defined(TARGET_AMD64) && defined(UNIX_AMD64_ABI)
+extern "C" uint8_t g_avxSupported;
+extern "C" uint8_t g_avx512Supported;
+#endif
+
 GPTR_DECL(MethodTable,      g_pEHClass);
 GPTR_DECL(MethodTable,      g_pExceptionServicesInternalCallsClass);
 GPTR_DECL(MethodTable,      g_pStackFrameIteratorClass);
diff --git a/src/tests/JIT/Directed/VectorABI/VectorMgdMgd256.cs b/src/tests/JIT/Directed/VectorABI/VectorMgdMgd256.cs
new file mode 100644
index 00000000000000..77e20531c1b113
--- /dev/null
+++ b/src/tests/JIT/Directed/VectorABI/VectorMgdMgd256.cs
@@ -0,0 +1,266 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+// Test passing and returning HVA (homogeneous vector aggregate) structs containing
+// Vector256 and Vector512 elements.
+//
+// On System V x64:
+//   - A single Vector256/512 is passed in a single YMM/ZMM register via handleAsSingleSimd.
+//   - HVA structs containing multiple vectors are larger than 16 bytes and are passed on the stack
+//     per the System V ABI (structs > 2 eightbytes go on the stack).
+//   - This test verifies that both single-vector and multi-vector HVAs are handled correctly,
+//     and that values are not corrupted during argument/return value passing.
+
+public static class VectorMgdMgd256
+{
+    private const int PASS = 100;
+    private const int FAIL = 0;
+
+    public const int DefaultSeed = 20010415;
+    public static int Seed = Environment.GetEnvironmentVariable("CORECLR_SEED") switch
+    {
+        string seedStr when seedStr.Equals("random", StringComparison.OrdinalIgnoreCase) => new Random().Next(),
+        string seedStr when int.TryParse(seedStr, out int envSeed) => envSeed,
+        _ => DefaultSeed
+    };
+
+    static Random random = new Random(Seed);
+
+    static bool isPassing = true;
+
+    static void Check(string msg, bool condition)
+    {
+        if (!condition)
+        {
+            Console.WriteLine($"FAIL: {msg}");
+            isPassing = false;
+        }
+    }
+
+    // ======== HVA structs with Vector256 ========
+
+    public struct HVA256_01 { public Vector256<int> v0; }
+    public struct HVA256_02 { public Vector256<int> v0; public Vector256<int> v1; }
+    public struct HVA256_03 { public Vector256<int> v0; public Vector256<int> v1; public Vector256<int> v2; }
+
+    // ======== HVA structs with Vector512 ========
+
+    public struct HVA512_01 { public Vector512<int> v0; }
+    public struct HVA512_02 { public Vector512<int> v0; public Vector512<int> v1; }
+
+    // ======== Single Vector256/512 argument tests ========
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector256<int> PassSingle256(Vector256<int> a) => a;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector256<int> Add256(Vector256<int> a, Vector256<int> b) => a + b;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector256<int> PassMany256(Vector256<int> a, Vector256<int> b, Vector256<int> c, Vector256<int> d)
+        => a + b + c + d;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector256<int> Mixed256(int x, Vector256<int> v, long y)
+        => v + Vector256.Create(x + (int)y);
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector512<int> PassSingle512(Vector512<int> a) => a;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector512<int> Add512(Vector512<int> a, Vector512<int> b) => a + b;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector512<int> Mixed512(int x, Vector512<int> v, long y)
+        => v + Vector512.Create(x + (int)y);
+
+    // ======== HVA argument tests (passed on stack on SysV x64) ========
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static HVA256_01 PassHVA256_01(HVA256_01 h) => h;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static HVA256_02 PassHVA256_02(HVA256_02 h) => h;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static HVA256_03 PassHVA256_03(HVA256_03 h) => h;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static HVA512_01 PassHVA512_01(HVA512_01 h) => h;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static HVA512_02 PassHVA512_02(HVA512_02 h) => h;
+
+    // ======== HVA return tests ========
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static HVA256_01 ReturnHVA256_01(Vector256<int> v0) => new HVA256_01 { v0 = v0 };
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static HVA256_02 ReturnHVA256_02(Vector256<int> v0, Vector256<int> v1) => new HVA256_02 { v0 = v0, v1 = v1 };
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static HVA512_01 ReturnHVA512_01(Vector512<int> v0) => new HVA512_01 { v0 = v0 };
+
+    // ======== Mixed HVA + scalar argument tests ========
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static int HVA256WithScalars(int a, HVA256_01 h, int b)
+        => a + b + h.v0.GetElement(0);
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static int HVA512WithScalars(long a, HVA512_01 h, long b)
+        => (int)(a + b) + h.v0.GetElement(0);
+
+    // ======== Reflection tests (forces real calling convention) ========
+
+    static void TestReflection256()
+    {
+        var v = Vector256.Create(42);
+        var h01 = new HVA256_01 { v0 = v };
+
+        var method = typeof(VectorMgdMgd256).GetMethod(nameof(PassHVA256_01),
+            System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Static);
+        var result = (HVA256_01)method.Invoke(null, new object[] { h01 });
+        Check("Reflection PassHVA256_01.v0", result.v0 == v);
+
+        var h02 = new HVA256_02 { v0 = Vector256.Create(10), v1 = Vector256.Create(20) };
+        var method2 = typeof(VectorMgdMgd256).GetMethod(nameof(PassHVA256_02),
+            System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Static);
+        var result2 = (HVA256_02)method2.Invoke(null, new object[] { h02 });
+        Check("Reflection PassHVA256_02.v0", result2.v0 == Vector256.Create(10));
+        Check("Reflection PassHVA256_02.v1", result2.v1 == Vector256.Create(20));
+    }
+
+    static void TestReflection512()
+    {
+        var v = Vector512.Create(42);
+        var h01 = new HVA512_01 { v0 = v };
+
+        var method = typeof(VectorMgdMgd256).GetMethod(nameof(PassHVA512_01),
+            System.Reflection.BindingFlags.NonPublic | System.Reflection.BindingFlags.Static);
+        var result = (HVA512_01)method.Invoke(null, new object[] { h01 });
+        Check("Reflection PassHVA512_01.v0", result.v0 == v);
+    }
+
+    [Fact]
+    public static int TestEntryPoint()
+    {
+        Console.WriteLine($"Vector256<int>.Count = {Vector256<int>.Count}");
+        Console.WriteLine($"Vector512<int>.Count = {Vector512<int>.Count}");
+
+        if (Avx.IsSupported)
+        {
+            // ---- Single Vector256 tests ----
+            Console.WriteLine("=== Single Vector256 tests ===");
+
+            var v256 = Vector256.Create(1, 2, 3, 4, 5, 6, 7, 8);
+            Check("PassSingle256", PassSingle256(v256) == v256);
+
+            Check("Add256", Add256(Vector256.Create(1), Vector256.Create(2)) == Vector256.Create(3));
+
+            Check("PassMany256", PassMany256(
+                Vector256.Create(1), Vector256.Create(2), Vector256.Create(3), Vector256.Create(4)) == Vector256.Create(10));
+
+            Check("Mixed256", Mixed256(3, Vector256.Create(10), 7L) == Vector256.Create(20));
+
+            // ---- HVA256 argument tests ----
+            Console.WriteLine("=== HVA256 argument tests ===");
+
+            var hva256_01 = new HVA256_01 { v0 = Vector256.Create(random.Next(100)) };
+            var r01 = PassHVA256_01(hva256_01);
+            Check("PassHVA256_01.v0", r01.v0 == hva256_01.v0);
+
+            var hva256_02 = new HVA256_02 { v0 = Vector256.Create(random.Next(100)), v1 = Vector256.Create(random.Next(100)) };
+            var r02 = PassHVA256_02(hva256_02);
+            Check("PassHVA256_02.v0", r02.v0 == hva256_02.v0);
+            Check("PassHVA256_02.v1", r02.v1 == hva256_02.v1);
+
+            var hva256_03 = new HVA256_03
+            {
+                v0 = Vector256.Create(random.Next(100)),
+                v1 = Vector256.Create(random.Next(100)),
+                v2 = Vector256.Create(random.Next(100))
+            };
+            var r03 = PassHVA256_03(hva256_03);
+            Check("PassHVA256_03.v0", r03.v0 == hva256_03.v0);
+            Check("PassHVA256_03.v1", r03.v1 == hva256_03.v1);
+            Check("PassHVA256_03.v2", r03.v2 == hva256_03.v2);
+
+            // ---- HVA256 return tests ----
+            Console.WriteLine("=== HVA return tests ===");
+
+            var retH01 = ReturnHVA256_01(Vector256.Create(77));
+            Check("ReturnHVA256_01.v0", retH01.v0 == Vector256.Create(77));
+
+            var retH02 = ReturnHVA256_02(Vector256.Create(10), Vector256.Create(20));
+            Check("ReturnHVA256_02.v0", retH02.v0 == Vector256.Create(10));
+            Check("ReturnHVA256_02.v1", retH02.v1 == Vector256.Create(20));
+
+            // ---- Mixed scalar + HVA256 tests ----
+            Console.WriteLine("=== Mixed scalar + HVA tests ===");
+
+            var hMixed = new HVA256_01 { v0 = Vector256.Create(100) };
+            Check("HVA256WithScalars", HVA256WithScalars(1, hMixed, 2) == 103);
+
+            // ---- Reflection256 tests ----
+            Console.WriteLine("=== Reflection tests ===");
+
+            TestReflection256();
+        }
+        else
+        {
+            Console.WriteLine("=== Skipping Vector256 tests: AVX not supported ===");
+        }
+
+        if (Avx512F.IsSupported)
+        {
+            // ---- Single Vector512 tests ----
+            Console.WriteLine("=== Single Vector512 tests ===");
+
+            var v512 = Vector512.Create(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            Check("PassSingle512", PassSingle512(v512) == v512);
+
+            Check("Add512", Add512(Vector512.Create(1), Vector512.Create(2)) == Vector512.Create(3));
+
+            Check("Mixed512", Mixed512(3, Vector512.Create(10), 7L) == Vector512.Create(20));
+
+            // ---- HVA512 argument tests ----
+            Console.WriteLine("=== HVA512 argument tests ===");
+
+            var hva512_01 = new HVA512_01 { v0 = Vector512.Create(random.Next(100)) };
+            var r512_01 = PassHVA512_01(hva512_01);
+            Check("PassHVA512_01.v0", r512_01.v0 == hva512_01.v0);
+
+            var hva512_02 = new HVA512_02 { v0 = Vector512.Create(random.Next(100)), v1 = Vector512.Create(random.Next(100)) };
+            var r512_02 = PassHVA512_02(hva512_02);
+            Check("PassHVA512_02.v0", r512_02.v0 == hva512_02.v0);
+            Check("PassHVA512_02.v1", r512_02.v1 == hva512_02.v1);
+
+            // ---- HVA512 return tests ----
+            var retH512 = ReturnHVA512_01(Vector512.Create(99));
+            Check("ReturnHVA512_01.v0", retH512.v0 == Vector512.Create(99));
+
+            // ---- Mixed scalar + HVA512 tests ----
+            var hMixed512 = new HVA512_01 { v0 = Vector512.Create(200) };
+            Check("HVA512WithScalars", HVA512WithScalars(1L, hMixed512, 2L) == 203);
+
+            // ---- Reflection512 tests ----
+            TestReflection512();
+        }
+        else
+        {
+            Console.WriteLine("=== Skipping Vector512 tests: AVX-512 not supported ===");
+        }
+
+        Console.WriteLine(isPassing ? "Test Passed" : "Test FAILED");
+
+        return isPassing ? PASS : FAIL;
+    }
+}
diff --git a/src/tests/JIT/Directed/VectorABI/VectorMgdMgd256_ro.csproj b/src/tests/JIT/Directed/VectorABI/VectorMgdMgd256_ro.csproj
new file mode 100644
index 00000000000000..347e499430f708
--- /dev/null
+++ b/src/tests/JIT/Directed/VectorABI/VectorMgdMgd256_ro.csproj
@@ -0,0 +1,9 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <Optimize>True</Optimize>
+    <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="VectorMgdMgd256.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/Directed/VectorABI/VectorNumericsRegPass.cs b/src/tests/JIT/Directed/VectorABI/VectorNumericsRegPass.cs
new file mode 100644
index 00000000000000..5642198760cc7d
--- /dev/null
+++ b/src/tests/JIT/Directed/VectorABI/VectorNumericsRegPass.cs
@@ -0,0 +1,132 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Numerics;
+using System.Runtime.CompilerServices;
+using Xunit;
+
+// Test that System.Numerics.Vector<T> types are passed in registers on Unix AMD64
+// for the active hardware-dependent vector size (for example, 16 bytes with SSE or
+// 32 bytes on AVX-capable machines) according to the System V ABI.
+
+public static class VectorNumericsRegPass
+{
+    private const int PASS = 100;
+    private const int FAIL = 0;
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector<float> AddVectors(Vector<float> a, Vector<float> b)
+    {
+        return a + b;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector<int> MultiplyVectors(Vector<int> a, Vector<int> b)
+    {
+        return a * b;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector<double> SubtractVectors(Vector<double> a, Vector<double> b)
+    {
+        return a - b;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static float SumVector(Vector<float> v)
+    {
+        float sum = 0;
+        for (int i = 0; i < Vector<float>.Count; i++)
+        {
+            sum += v[i];
+        }
+        return sum;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector<float> PassMultipleVectors(Vector<float> a, Vector<float> b, Vector<float> c, int scalar)
+    {
+        return (a + b) * c + new Vector<float>(scalar);
+    }
+
+    [Fact]
+    public static int TestEntryPoint()
+    {
+        Console.WriteLine($"Vector<float>.Count = {Vector<float>.Count}");
+        Console.WriteLine($"Vector<float> size = {Vector<float>.Count * sizeof(float)} bytes");
+        Console.WriteLine($"Vector<int>.Count = {Vector<int>.Count}");
+        Console.WriteLine($"Vector<double>.Count = {Vector<double>.Count}");
+
+        // Test with float vectors
+        var vf1 = new Vector<float>(1.0f);
+        var vf2 = new Vector<float>(2.0f);
+        var vfResult = AddVectors(vf1, vf2);
+        
+        for (int i = 0; i < Vector<float>.Count; i++)
+        {
+            if (Math.Abs(vfResult[i] - 3.0f) > 0.001f)
+            {
+                Console.WriteLine($"FAIL: Float vector addition failed at index {i}: {vfResult[i]} != 3.0");
+                return FAIL;
+            }
+        }
+
+        // Test with int vectors
+        var vi1 = new Vector<int>(5);
+        var vi2 = new Vector<int>(3);
+        var viResult = MultiplyVectors(vi1, vi2);
+        
+        for (int i = 0; i < Vector<int>.Count; i++)
+        {
+            if (viResult[i] != 15)
+            {
+                Console.WriteLine($"FAIL: Int vector multiplication failed at index {i}: {viResult[i]} != 15");
+                return FAIL;
+            }
+        }
+
+        // Test with double vectors
+        var vd1 = new Vector<double>(10.0);
+        var vd2 = new Vector<double>(3.0);
+        var vdResult = SubtractVectors(vd1, vd2);
+        
+        for (int i = 0; i < Vector<double>.Count; i++)
+        {
+            if (Math.Abs(vdResult[i] - 7.0) > 0.001)
+            {
+                Console.WriteLine($"FAIL: Double vector subtraction failed at index {i}: {vdResult[i]} != 7.0");
+                return FAIL;
+            }
+        }
+
+        // Test sum operation
+        var vfSum = new Vector<float>(4.0f);
+        float sum = SumVector(vfSum);
+        float expectedSum = 4.0f * Vector<float>.Count;
+        if (Math.Abs(sum - expectedSum) > 0.001f)
+        {
+            Console.WriteLine($"FAIL: Sum operation failed: {sum} != {expectedSum}");
+            return FAIL;
+        }
+
+        // Test multiple vector parameters with scalar mixing
+        var va = new Vector<float>(1.0f);
+        var vb = new Vector<float>(2.0f);
+        var vc = new Vector<float>(3.0f);
+        var vmResult = PassMultipleVectors(va, vb, vc, 5);
+        
+        // (1 + 2) * 3 + 5 = 14
+        for (int i = 0; i < Vector<float>.Count; i++)
+        {
+            if (Math.Abs(vmResult[i] - 14.0f) > 0.001f)
+            {
+                Console.WriteLine($"FAIL: Multiple parameter test failed at index {i}: {vmResult[i]} != 14.0");
+                return FAIL;
+            }
+        }
+
+        Console.WriteLine("PASS: All tests succeeded!");
+        return PASS;
+    }
+}
diff --git a/src/tests/JIT/Directed/VectorABI/VectorNumericsRegPass.csproj b/src/tests/JIT/Directed/VectorABI/VectorNumericsRegPass.csproj
new file mode 100644
index 00000000000000..297241bb63ea66
--- /dev/null
+++ b/src/tests/JIT/Directed/VectorABI/VectorNumericsRegPass.csproj
@@ -0,0 +1,9 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <Optimize>True</Optimize>
+    <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="VectorNumericsRegPass.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/Directed/VectorABI/VectorRegPassSysV.cs b/src/tests/JIT/Directed/VectorABI/VectorRegPassSysV.cs
new file mode 100644
index 00000000000000..34ddb8fbbf938f
--- /dev/null
+++ b/src/tests/JIT/Directed/VectorABI/VectorRegPassSysV.cs
@@ -0,0 +1,309 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using Xunit;
+
+// Test that Vector128, Vector256, and Vector512 are correctly passed as arguments
+// and returned from methods on System V x64 (Linux), verifying the single-register
+// SIMD passing path in the JIT's SysVX64Classifier.
+//
+// Vector128 (16B) -> XMM register
+// Vector256 (32B) -> YMM register (requires AVX)
+// Vector512 (64B) -> ZMM register (requires AVX-512)
+
+public static class VectorRegPassSysV
+{
+    private const int PASS = 100;
+    private const int FAIL = 0;
+
+    // --- Vector128 tests ---
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<int> AddVec128(Vector128<int> a, Vector128<int> b)
+    {
+        return a + b;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<int> PassManyVec128(Vector128<int> a, Vector128<int> b, Vector128<int> c,
+                                         Vector128<int> d, Vector128<int> e, int scalar)
+    {
+        return a + b + c + d + e + Vector128.Create(scalar);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<float> MixedArgsVec128(int x, Vector128<float> v, long y)
+    {
+        return v + Vector128.Create((float)(x + y));
+    }
+
+    // --- Vector256 tests (require AVX) ---
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector256<int> AddVec256(Vector256<int> a, Vector256<int> b)
+    {
+        return a + b;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector256<int> PassManyVec256(Vector256<int> a, Vector256<int> b, Vector256<int> c,
+                                         Vector256<int> d, int scalar)
+    {
+        return a + b + c + d + Vector256.Create(scalar);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector256<float> MixedArgsVec256(int x, Vector256<float> v, long y)
+    {
+        return v + Vector256.Create((float)(x + y));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector256<int> ReturnVec256(int value)
+    {
+        return Vector256.Create(value);
+    }
+
+    // --- Vector512 tests (require AVX-512) ---
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector512<int> AddVec512(Vector512<int> a, Vector512<int> b)
+    {
+        return a + b;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector512<int> ReturnVec512(int value)
+    {
+        return Vector512.Create(value);
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector512<float> MixedArgsVec512(int x, Vector512<float> v, long y)
+    {
+        return v + Vector512.Create((float)(x + y));
+    }
+
+    // --- Vector64 tests ---
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<int> AddVec64(Vector64<int> a, Vector64<int> b)
+    {
+        return a + b;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector64<float> ReturnVec64(float a, float b)
+    {
+        return Vector64.Create(a, b);
+    }
+
+    // --- Chained return tests (return of one call feeds into the next) ---
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<float> ChainVec128(Vector128<float> a, Vector128<float> b,
+                                        Vector128<float> c, Vector128<float> d)
+    {
+        return AddVec128F(AddVec128F(a, b), AddVec128F(c, d));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<float> AddVec128F(Vector128<float> a, Vector128<float> b)
+    {
+        return a + b;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector256<float> ChainVec256(Vector256<float> a, Vector256<float> b,
+                                        Vector256<float> c, Vector256<float> d)
+    {
+        return AddVec256F(AddVec256F(a, b), AddVec256F(c, d));
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector256<float> AddVec256F(Vector256<float> a, Vector256<float> b)
+    {
+        return a + b;
+    }
+
+    // --- Vector128<double> return tests ---
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<double> AddVec128D(Vector128<double> a, Vector128<double> b)
+    {
+        return a + b;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector256<double> AddVec256D(Vector256<double> a, Vector256<double> b)
+    {
+        return a + b;
+    }
+
+    // --- Multi-size mixing: different vector widths in one call ---
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static Vector128<int> MixedSizes(Vector128<int> a, Vector256<int> b, int scalar)
+    {
+        return a + b.GetLower() + Vector128.Create(scalar);
+    }
+
+    // --- Return into struct field ---
+
+    struct VectorPair128
+    {
+        public Vector128<int> Lo;
+        public Vector128<int> Hi;
+    }
+
+    [MethodImpl(MethodImplOptions.NoInlining)]
+    static VectorPair128 ReturnPair128(Vector128<int> a, Vector128<int> b)
+    {
+        VectorPair128 result;
+        result.Lo = AddVec128(a, Vector128.Create(1));
+        result.Hi = AddVec128(b, Vector128.Create(2));
+        return result;
+    }
+
+    // --- Test helpers ---
+
+    static bool Check<T>(T actual, T expected, string testName) where T : IEquatable<T>
+    {
+        if (!actual.Equals(expected))
+        {
+            Console.WriteLine($"FAIL: {testName}: expected {expected}, got {actual}");
+            return false;
+        }
+        return true;
+    }
+
+    [Fact]
+    public static int TestEntryPoint()
+    {
+        bool pass = true;
+
+        // --- Vector64 tests ---
+        Console.WriteLine("=== Vector64 tests ===");
+
+        pass &= Check(AddVec64(Vector64.Create(1, 2), Vector64.Create(10, 20)),
+            Vector64.Create(11, 22), "AddVec64");
+
+        pass &= Check(ReturnVec64(3.0f, 4.0f), Vector64.Create(3.0f, 4.0f), "ReturnVec64");
+
+        // --- Vector128 tests (always available on x64) ---
+        Console.WriteLine("=== Vector128 tests ===");
+
+        var v128a = Vector128.Create(1, 2, 3, 4);
+        var v128b = Vector128.Create(10, 20, 30, 40);
+        pass &= Check(AddVec128(v128a, v128b), Vector128.Create(11, 22, 33, 44), "AddVec128");
+
+        pass &= Check(
+            PassManyVec128(
+                Vector128.Create(1), Vector128.Create(2), Vector128.Create(3),
+                Vector128.Create(4), Vector128.Create(5), 100),
+            Vector128.Create(115),
+            "PassManyVec128");
+
+        pass &= Check(MixedArgsVec128(3, Vector128.Create(10.0f), 7L),
+            Vector128.Create(20.0f), "MixedArgsVec128");
+
+        // --- Vector128<double> tests ---
+        pass &= Check(AddVec128D(Vector128.Create(1.5, 2.5), Vector128.Create(3.0, 4.0)),
+            Vector128.Create(4.5, 6.5), "AddVec128D");
+
+        // --- Vector128 chained return tests ---
+        // ChainVec128: (1+2) + (0.5+0.1) = 3.6
+        pass &= Check(
+            ChainVec128(Vector128.Create(1.0f), Vector128.Create(2.0f),
+                        Vector128.Create(0.5f), Vector128.Create(0.1f)),
+            Vector128.Create(3.6f), "ChainVec128");
+
+        // --- Vector256 tests ---
+        Console.WriteLine("=== Vector256 tests ===");
+        Console.WriteLine($"  Avx.IsSupported = {Avx.IsSupported}");
+
+        if (Avx.IsSupported)
+        {
+            var v256a = Vector256.Create(1, 2, 3, 4, 5, 6, 7, 8);
+            var v256b = Vector256.Create(10, 20, 30, 40, 50, 60, 70, 80);
+            pass &= Check(AddVec256(v256a, v256b),
+                Vector256.Create(11, 22, 33, 44, 55, 66, 77, 88), "AddVec256");
+
+            pass &= Check(
+                PassManyVec256(
+                    Vector256.Create(1), Vector256.Create(2), Vector256.Create(3),
+                    Vector256.Create(4), 100),
+                Vector256.Create(110),
+                "PassManyVec256");
+
+            pass &= Check(MixedArgsVec256(3, Vector256.Create(10.0f), 7L),
+                Vector256.Create(20.0f), "MixedArgsVec256");
+
+            pass &= Check(ReturnVec256(42), Vector256.Create(42), "ReturnVec256");
+
+            // --- Vector256<double> tests ---
+            pass &= Check(AddVec256D(Vector256.Create(1.0), Vector256.Create(2.0)),
+                Vector256.Create(3.0), "AddVec256D");
+
+            // --- Vector256 chained return tests ---
+            pass &= Check(
+                ChainVec256(Vector256.Create(1.0f), Vector256.Create(2.0f),
+                            Vector256.Create(0.5f), Vector256.Create(0.1f)),
+                Vector256.Create(3.6f), "ChainVec256");
+        }
+        else
+        {
+            Console.WriteLine("  Skipping Vector256 tests because AVX is not supported.");
+        }
+
+        // --- Vector512 tests ---
+        Console.WriteLine("=== Vector512 tests ===");
+        Console.WriteLine($"  Avx512F.IsSupported = {Avx512F.IsSupported}");
+
+        if (Avx512F.IsSupported)
+        {
+            var v512a = Vector512.Create(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+            var v512b = Vector512.Create(10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, 120, 130, 140, 150, 160);
+            pass &= Check(AddVec512(v512a, v512b),
+                Vector512.Create(11, 22, 33, 44, 55, 66, 77, 88, 99, 110, 121, 132, 143, 154, 165, 176),
+                "AddVec512");
+
+            pass &= Check(ReturnVec512(99), Vector512.Create(99), "ReturnVec512");
+
+            pass &= Check(MixedArgsVec512(3, Vector512.Create(10.0f), 7L),
+                Vector512.Create(20.0f), "MixedArgsVec512");
+        }
+        else
+        {
+            Console.WriteLine("  Skipping Vector512 tests because AVX-512 is not supported.");
+        }
+
+        // --- Multi-size mixing tests ---
+        Console.WriteLine("=== Multi-size mixing tests ===");
+
+        if (Avx.IsSupported)
+        {
+            pass &= Check(
+                MixedSizes(Vector128.Create(1), Vector256.Create(10), 5),
+                Vector128.Create(16), "MixedSizes_128_256");
+        }
+        else
+        {
+            Console.WriteLine("  Skipping multi-size mixing tests because AVX is not supported.");
+        }
+
+        // --- Return into struct field tests ---
+        Console.WriteLine("=== Return into struct tests ===");
+
+        var pair = ReturnPair128(Vector128.Create(10), Vector128.Create(20));
+        pass &= Check(pair.Lo, Vector128.Create(11), "ReturnPair128.Lo");
+        pass &= Check(pair.Hi, Vector128.Create(22), "ReturnPair128.Hi");
+
+        Console.WriteLine(pass ? "PASS" : "FAIL");
+        return pass ? PASS : FAIL;
+    }
+}
diff --git a/src/tests/JIT/Directed/VectorABI/VectorRegPassSysV.csproj b/src/tests/JIT/Directed/VectorABI/VectorRegPassSysV.csproj
new file mode 100644
index 00000000000000..fc2728afce92c6
--- /dev/null
+++ b/src/tests/JIT/Directed/VectorABI/VectorRegPassSysV.csproj
@@ -0,0 +1,9 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <Optimize>True</Optimize>
+    <AllowUnsafeBlocks>True</AllowUnsafeBlocks>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="VectorRegPassSysV.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/Stress/ABI/ABIs.cs b/src/tests/JIT/Stress/ABI/ABIs.cs
index fbe547f14a2d7b..0c593c8f8b2a87 100644
--- a/src/tests/JIT/Stress/ABI/ABIs.cs
+++ b/src/tests/JIT/Stress/ABI/ABIs.cs
@@ -48,7 +48,7 @@ internal class Win86Abi : IAbi
             {
                 typeof(byte), typeof(short), typeof(int), typeof(long),
                 typeof(float), typeof(double), typeof(Int128),
-                typeof(Vector<int>), typeof(Vector128<int>), typeof(Vector256<int>),
+                typeof(Vector<int>), typeof(Vector128<int>), typeof(Vector256<int>), typeof(Vector512<int>),
                 typeof(S1P), typeof(S2P), typeof(S2U), typeof(S3U),
                 typeof(S4P), typeof(S4U), typeof(S5U), typeof(S6U),
                 typeof(S7U), typeof(S8P), typeof(S8U), typeof(S9U),
@@ -106,7 +106,7 @@ internal class SysVAbi : IAbi
             {
                 typeof(byte), typeof(short), typeof(int), typeof(long),
                 typeof(float), typeof(double),
-                typeof(Vector<int>), typeof(Vector128<int>), typeof(Vector256<int>),
+                typeof(Vector<int>), typeof(Vector128<int>), typeof(Vector256<int>), typeof(Vector512<int>),
                 typeof(Int128),
                 typeof(S1P), typeof(S2P), typeof(S2U), typeof(S3U),
                 typeof(S4P), typeof(S4U), typeof(S5U), typeof(S6U),