dotnet · tannergooding · Feb 15, 2023 · Feb 9, 2023 · Feb 10, 2023 · Feb 10, 2023
@@ -7005,6 +7005,13 @@ encode_patch (MonoAotCompile *acfg, MonoJumpInfo *patch_info, guint8 *buf, guint
 		encode_value (((guint32 *)patch_info->data.target) [MINI_LS_WORD_IDX], p, &p);
 		encode_value (((guint32 *)patch_info->data.target) [MINI_MS_WORD_IDX], p, &p);
 		break;
+	case MONO_PATCH_INFO_X128:
+	case MONO_PATCH_INFO_X128_GOT:
+		encode_value (((guint32 *)patch_info->data.target) [(MINI_LS_WORD_IDX * 2) + MINI_LS_WORD_IDX], p, &p);
+		encode_value (((guint32 *)patch_info->data.target) [(MINI_LS_WORD_IDX * 2) + MINI_MS_WORD_IDX], p, &p);
+		encode_value (((guint32 *)patch_info->data.target) [(MINI_MS_WORD_IDX * 2) + MINI_LS_WORD_IDX], p, &p);
+		encode_value (((guint32 *)patch_info->data.target) [(MINI_MS_WORD_IDX * 2) + MINI_MS_WORD_IDX], p, &p);
+		break;
 	case MONO_PATCH_INFO_VTABLE:
 	case MONO_PATCH_INFO_CLASS:
 	case MONO_PATCH_INFO_IID:

@@ -3842,6 +3842,26 @@ decode_patch (MonoAotModule *aot_module, MonoMemPool *mp, MonoJumpInfo *ji, guin
 		*(double*)ji->data.target = *(double*)&v;
 		break;
 	}
+	case MONO_PATCH_INFO_X128:
+	case MONO_PATCH_INFO_X128_GOT: {
+		guint32 val [4];
+		guint64 v[2];
+
+		// FIXME: Align to 16 bytes ?
+		ji->data.target = mono_mem_manager_alloc0 (mem_manager, 16);
+
+		val [0] = decode_value (p, &p);
+		val [1] = decode_value (p, &p);
+		val [2] = decode_value (p, &p);
+		val [3] = decode_value (p, &p);
+
+		v[0] = ((guint64)val [1] << 32) | ((guint64)val [0]);
+		((guint64*)ji->data.target)[0] = v[0];
+
+		v[1] = ((guint64)val [3] << 32) | ((guint64)val [2]);
+		((guint64*)ji->data.target)[1] = v[1];
+		break;
+	}
 	case MONO_PATCH_INFO_LDSTR:
 		image = load_image (aot_module, decode_value (p, &p), error);
 		mono_error_cleanup (error); /* FIXME don't swallow the error */

@@ -300,10 +300,15 @@ mono_constant_fold_ins (MonoCompile *cfg, MonoInst *ins, MonoInst *arg1, MonoIns
 		}
 		break;
 	case OP_XMOVE:
-		if (arg1->opcode == OP_XZERO) {
+		if ((arg1->opcode == OP_XZERO) || (arg1->opcode == OP_XONES)) {
 			ALLOC_DEST (cfg, dest, ins);
-			dest->opcode = OP_XZERO;
+			dest->opcode = arg1->opcode;
 			dest->sreg1 = -1;
+		} else if (arg1->opcode == OP_XCONST) {
+			ALLOC_DEST (cfg, dest, ins);
+			dest->opcode = arg1->opcode;
+			dest->sreg1 = -1;
+			dest->inst_p0 = arg1->inst_p0;
 		}
 		break;
 	case OP_COMPARE:

@@ -769,6 +769,7 @@ cvttps2dq: dest:x src1:x len:5 clob:1
 xmove: dest:x src1:x len:5
 xzero: dest:x len:5
 xones: dest:x len:5
+xconst: dest:x len:12
 
 iconv_to_x: dest:x src1:i len:5
 extract_i4: dest:i src1:x len:5

@@ -678,7 +678,7 @@ optimize_initlocals (MonoCompile *cfg)
 				//printf ("DEAD: "); mono_print_ins (ins);
 				if (cfg->disable_initlocals_opt_refs && var->type == STACK_OBJ)
 					continue;
-				if ((ins->opcode == OP_ICONST) || (ins->opcode == OP_I8CONST) || (ins->opcode == OP_R8CONST) || (ins->opcode == OP_R4CONST)) {
+				if ((ins->opcode == OP_ICONST) || (ins->opcode == OP_I8CONST) || (ins->opcode == OP_R8CONST) || (ins->opcode == OP_R4CONST) || (ins->opcode == OP_XCONST)) {
 					NULLIFY_INS (ins);
 					MONO_VARINFO (cfg, var->inst_c0)->spill_costs -= 1;
 					/*
@@ -874,7 +874,7 @@ update_liveness2 (MonoCompile *cfg, MonoInst *ins, gboolean set_volatile, int in
 			}
 			else {
 				/* Try dead code elimination */
-				if (!cfg->disable_deadce_vars && (var != cfg->ret) && !(var->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT)) && ((ins->opcode == OP_ICONST) || (ins->opcode == OP_I8CONST) || (ins->opcode == OP_R8CONST)) && !(var->flags & MONO_INST_VOLATILE)) {
+				if (!cfg->disable_deadce_vars && (var != cfg->ret) && !(var->flags & (MONO_INST_VOLATILE|MONO_INST_INDIRECT)) && ((ins->opcode == OP_ICONST) || (ins->opcode == OP_I8CONST) || (ins->opcode == OP_R8CONST) || (ins->opcode == OP_XCONST)) && !(var->flags & MONO_INST_VOLATILE)) {
 					LIVENESS_DEBUG (printf ("\tdead def of R%d, eliminated\n", ins->dreg));
 					NULLIFY_INS (ins);
 					return;

@@ -12640,6 +12640,7 @@ mono_op_no_side_effects (int opcode)
 	case OP_VZERO:
 	case OP_XZERO:
 	case OP_XONES:
+	case OP_XCONST:
 	case OP_ICONST:
 	case OP_I8CONST:
 	case OP_ADD_IMM:

@@ -7296,6 +7296,17 @@ mono_arch_output_basic_block (MonoCompile *cfg, MonoBasicBlock *bb)
 		case OP_XONES:
 			amd64_sse_pcmpeqb_reg_reg (code, ins->dreg, ins->dreg);
 			break;
+		case OP_XCONST: {
+			if (cfg->compile_aot && cfg->code_exec_only) {
+				mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_X128_GOT, ins->inst_p0);
+				amd64_mov_reg_membase (code, AMD64_R11, AMD64_RIP, 0, sizeof(gpointer));
+				amd64_sse_movups_reg_membase (code, ins->dreg, AMD64_R11, 0);
+			} else {
+				mono_add_patch_info (cfg, offset, MONO_PATCH_INFO_X128, ins->inst_p0);
+				amd64_sse_movups_reg_membase (code, ins->dreg, AMD64_RIP, 0);
+			}
+			break;
+		}
 		case OP_ICONV_TO_R4_RAW:
 			amd64_movd_xreg_reg_size (code, ins->dreg, ins->sreg1, 4);
 			if (!cfg->r4fp)
@@ -8140,11 +8151,13 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
 	for (patch_info = cfg->patch_info; patch_info; patch_info = patch_info->next) {
 		if (patch_info->type == MONO_PATCH_INFO_EXC)
 			code_size += 40;
-		if (patch_info->type == MONO_PATCH_INFO_R8)
+		else if (patch_info->type == MONO_PATCH_INFO_X128)
+			code_size += 16 + 15; /* sizeof (Vector128<T>) + alignment */
+		else if (patch_info->type == MONO_PATCH_INFO_R8)
 			code_size += 8 + 15; /* sizeof (double) + alignment */
-		if (patch_info->type == MONO_PATCH_INFO_R4)
+		else if (patch_info->type == MONO_PATCH_INFO_R4)
 			code_size += 4 + 15; /* sizeof (float) + alignment */
-		if (patch_info->type == MONO_PATCH_INFO_GC_CARD_TABLE_ADDR)
+		else if (patch_info->type == MONO_PATCH_INFO_GC_CARD_TABLE_ADDR)
 			code_size += 8 + 7; /*sizeof (void*) + alignment */
 	}
 
@@ -8218,6 +8231,10 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
 			guint8 *pos, *patch_pos;
 			guint32 target_pos;
 
+			// FIXME: R8 and R4 only need 8 and 4 byte alignment, respectively
+			// They should be handled separately with anything needing 16 byte
+			// alignment using X128
+
 			/* The SSE opcodes require a 16 byte alignment */
 			code = (guint8*)ALIGN_TO (code, 16);
 
@@ -8244,6 +8261,31 @@ mono_arch_emit_exceptions (MonoCompile *cfg)
 			remove = TRUE;
 			break;
 		}
+		case MONO_PATCH_INFO_X128: {
+			guint8 *pos, *patch_pos;
+			guint32 target_pos;
+
+			/* The SSE opcodes require a 16 byte alignment */
+			code = (guint8*)ALIGN_TO (code, 16);
+
+			pos = cfg->native_code + patch_info->ip.i;
+			if (IS_REX (pos [0])) {
+				patch_pos = pos + 4;
+				target_pos = GPTRDIFF_TO_UINT32 (code - pos - 8);
+			}
+			else {
+				patch_pos = pos + 3;
+				target_pos = GPTRDIFF_TO_UINT32 (code - pos - 7);
+			}
+
+			memcpy (code, patch_info->data.target, 16);
+			code += 16;
+
+			*(guint32*)(patch_pos) = target_pos;
+
+			remove = TRUE;
+			break;
+		}
 		case MONO_PATCH_INFO_GC_CARD_TABLE_ADDR: {
 			guint8 *pos;
 

@@ -7844,6 +7844,94 @@ MONO_RESTORE_WARNING
 			values [ins->dreg] = LLVMConstAllOnes (type_to_llvm_type (ctx, m_class_get_byval_arg (ins->klass)));
 			break;
 		}
+		case OP_XCONST: {
+			MonoTypeEnum etype;
+			int ecount;
+
+			const char *class_name = m_class_get_name (ins->klass);
+
+			if (!strcmp(class_name, "Vector`1") || !strcmp(class_name, "Vector64`1") || !strcmp (class_name, "Vector128`1") || !strcmp (class_name, "Vector256`1") || !strcmp (class_name, "Vector512`1")) {
+				MonoType *element_type = mono_class_get_context (ins->klass)->class_inst->type_argv [0];
+				etype = element_type->type;
+				ecount = mono_class_value_size (ins->klass, NULL) / mono_class_value_size (mono_class_from_mono_type_internal(element_type), NULL);
+			} else if (!strcmp(class_name, "Vector4") || !strcmp(class_name, "Plane") || !strcmp(class_name, "Quaternion")) {
+				etype = MONO_TYPE_R4;
+				ecount = 4;
+			} else {
+				g_assert_not_reached ();
+			}
+
+			LLVMTypeRef llvm_type = primitive_type_to_llvm_type(etype);
+			LLVMValueRef vals [64];
+
+			switch (etype) {
+			case MONO_TYPE_I1:
+			case MONO_TYPE_U1: {
+				guint8* v = (guint8*)ins->inst_p0;
+
+				for (int i = 0; i < ecount; ++i) {
+					vals [i] = LLVMConstInt (llvm_type, v[i], FALSE);
+				}
+				break;
+			}
+			case MONO_TYPE_I2:
+			case MONO_TYPE_U2: {
+				guint16* v = (guint16*)ins->inst_p0;
+
+				for (int i = 0; i < ecount; ++i) {
+					vals [i] = LLVMConstInt (llvm_type, v[i], FALSE);
+				}
+				break;
+			}
+#if TARGET_SIZEOF_VOID_P == 4
+			case MONO_TYPE_I:
+			case MONO_TYPE_U:
+#endif
+			case MONO_TYPE_I4:
+			case MONO_TYPE_U4: {
+				guint32* v = (guint32*)ins->inst_p0;
+
+				for (int i = 0; i < ecount; ++i) {
+					vals [i] = LLVMConstInt (llvm_type, v[i], FALSE);
+				}
+				break;
+			}
+#if TARGET_SIZEOF_VOID_P == 8
+			case MONO_TYPE_I:
+			case MONO_TYPE_U:
+#endif
+			case MONO_TYPE_I8:
+			case MONO_TYPE_U8: {
+				guint64* v = (guint64*)ins->inst_p0;
+
+				for (int i = 0; i < ecount; ++i) {
+					vals [i] = LLVMConstInt (llvm_type, v[i], FALSE);
+				}
+				break;
+			}
+			case MONO_TYPE_R4: {
+				float* v = (float*)ins->inst_p0;
+
+				for (int i = 0; i < ecount; ++i) {
+					vals [i] = LLVMConstReal (llvm_type, v[i]);
+				}
+				break;
+			}
+			case MONO_TYPE_R8: {
+				double* v = (double*)ins->inst_p0;
+
+				for (int i = 0; i < ecount; ++i) {
+					vals [i] = LLVMConstReal (llvm_type, v[i]);
+				}
+				break;
+			}
+			default:
+				g_assert_not_reached ();
+			}
+
+			values [ins->dreg] = LLVMConstVector (vals, ecount);
+			break;
+		}
 		case OP_LOADX_MEMBASE: {
 			LLVMTypeRef t = type_to_llvm_type (ctx, m_class_get_byval_arg (ins->klass));
 			LLVMValueRef src;

@@ -1170,6 +1170,7 @@ MINI_OP(OP_CREATE_SCALAR, "create_scalar", XREG, XREG, NONE)
 MINI_OP(OP_XMOVE,   "xmove", XREG, XREG, NONE)
 MINI_OP(OP_XZERO,   "xzero", XREG, NONE, NONE)
 MINI_OP(OP_XONES,   "xones", XREG, NONE, NONE)
+MINI_OP(OP_XCONST,  "xconst", XREG, NONE, NONE)
 MINI_OP(OP_XPHI,	"xphi", XREG, NONE, NONE)
 
 /*

@@ -1281,6 +1281,8 @@ mono_patch_info_hash (gconstpointer data)
 	}
 	case MONO_PATCH_INFO_GSHAREDVT_IN_WRAPPER:
 		return hash | mono_signature_hash (ji->data.sig);
+	case MONO_PATCH_INFO_X128_GOT:
+		return hash | (guint32)*(double*)ji->data.target;
 	case MONO_PATCH_INFO_R8_GOT:
 		return hash | (guint32)*(double*)ji->data.target;
 	case MONO_PATCH_INFO_R4_GOT:
@@ -1605,6 +1607,8 @@ mono_resolve_patch_target_ext (MonoMemoryManager *mem_manager, MonoMethod *metho
 	case MONO_PATCH_INFO_R4_GOT:
 	case MONO_PATCH_INFO_R8:
 	case MONO_PATCH_INFO_R8_GOT:
+	case MONO_PATCH_INFO_X128:
+	case MONO_PATCH_INFO_X128_GOT:
 		target = patch_info->data.target;
 		break;
 	case MONO_PATCH_INFO_EXC_NAME:

@@ -19,6 +19,7 @@ PATCH_INFO(LDTOKEN, "ldtoken")
 PATCH_INFO(TYPE_FROM_HANDLE, "type_from_handle")
 PATCH_INFO(R4, "r4")
 PATCH_INFO(R8, "r8")
+PATCH_INFO(X128, "x128")
 PATCH_INFO(IP, "ip")
 PATCH_INFO(IID, "iid")
 PATCH_INFO(ADJUSTED_IID, "adjusted_iid")
@@ -76,12 +77,13 @@ PATCH_INFO(SPECIFIC_TRAMPOLINES, "specific_trampolines")
 PATCH_INFO(SPECIFIC_TRAMPOLINES_GOT_SLOTS_BASE, "specific_trampolines_got_slots_base")
 
 /*
- * PATCH_INFO_R4/R8 is handled by mono_arch_emit_exceptions () by emitting the data
- * into the text segment after the method body. These patches emit the data
+ * PATCH_INFO_* is handled by mono_arch_emit_exceptions () by emitting the data
+ * into the text segment after the method body. PATCH_INFO_*_GOT emit the data
  * elsewhere.
  */
 PATCH_INFO(R8_GOT, "r8_got")
 PATCH_INFO(R4_GOT, "r4_got")
+PATCH_INFO(X128_GOT, "x128_got")
 
 /* MonoMethod* -> the address of a memory slot which is used to cache the pinvoke address */
 PATCH_INFO(METHOD_PINVOKE_ADDR_CACHE, "pinvoke_addr_cache")