Skip to content

Commit

Permalink
Add x86 complex encodings for SIMD load-extend instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
abrown committed Apr 30, 2020
1 parent aa78811 commit 9e9250f
Show file tree
Hide file tree
Showing 3 changed files with 127 additions and 2 deletions.
35 changes: 35 additions & 0 deletions cranelift/codegen/meta/src/isa/x86/encodings.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1601,8 +1601,11 @@ fn define_simd(
let sadd_sat = shared.by_name("sadd_sat");
let scalar_to_vector = shared.by_name("scalar_to_vector");
let sload8x8 = shared.by_name("sload8x8");
let sload8x8_complex = shared.by_name("sload8x8_complex");
let sload16x4 = shared.by_name("sload16x4");
let sload16x4_complex = shared.by_name("sload16x4_complex");
let sload32x2 = shared.by_name("sload32x2");
let sload32x2_complex = shared.by_name("sload32x2_complex");
let spill = shared.by_name("spill");
let sqrt = shared.by_name("sqrt");
let sshr_imm = shared.by_name("sshr_imm");
Expand All @@ -1611,8 +1614,11 @@ fn define_simd(
let store_complex = shared.by_name("store_complex");
let uadd_sat = shared.by_name("uadd_sat");
let uload8x8 = shared.by_name("uload8x8");
let uload8x8_complex = shared.by_name("uload8x8_complex");
let uload16x4 = shared.by_name("uload16x4");
let uload16x4_complex = shared.by_name("uload16x4_complex");
let uload32x2 = shared.by_name("uload32x2");
let uload32x2_complex = shared.by_name("uload32x2_complex");
let ushr_imm = shared.by_name("ushr_imm");
let usub_sat = shared.by_name("usub_sat");
let vconst = shared.by_name("vconst");
Expand Down Expand Up @@ -1977,6 +1983,35 @@ fn define_simd(
}
}

// SIMD load extend (complex addressing)
let is_load_complex_length_two =
InstructionPredicate::new_length_equals(&*formats.load_complex, 2);
for (inst, opcodes) in &[
(uload8x8_complex, &PMOVZXBW),
(uload16x4_complex, &PMOVZXWD),
(uload32x2_complex, &PMOVZXDQ),
(sload8x8_complex, &PMOVSXBW),
(sload16x4_complex, &PMOVSXWD),
(sload32x2_complex, &PMOVSXDQ),
] {
for recipe in &[
rec_fldWithIndex,
rec_fldWithIndexDisp8,
rec_fldWithIndexDisp32,
] {
let template = recipe.opcodes(*opcodes);
let predicate = |encoding: EncodingBuilder| {
encoding
.isa_predicate(use_sse41_simd)
.inst_predicate(is_load_complex_length_two.clone())
};
e.enc32_func(inst.clone(), template.clone(), predicate);
// No infer_rex calculator for these recipes; place REX version first as in enc_x86_64.
e.enc64_func(inst.clone(), template.rex(), predicate);
e.enc64_func(inst.clone(), template, predicate);
}
}

// SIMD integer addition
for (ty, opcodes) in &[(I8, &PADDB), (I16, &PADDW), (I32, &PADDD), (I64, &PADDQ)] {
let iadd = iadd.bind(vector(*ty, sse_vector_size));
Expand Down
86 changes: 85 additions & 1 deletion cranelift/codegen/meta/src/shared/instructions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1172,6 +1172,20 @@ pub(crate) fn define(
.can_load(true),
);

ig.push(
Inst::new(
"uload8x8_complex",
r#"
Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
i16x8 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);

ig.push(
Inst::new(
"sload8x8",
Expand All @@ -1186,6 +1200,20 @@ pub(crate) fn define(
.can_load(true),
);

ig.push(
Inst::new(
"sload8x8_complex",
r#"
Load an 8x8 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
i16x8 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);

let I32x4 = &TypeVar::new(
"I32x4",
"A SIMD vector with exactly 4 lanes of 32-bit values",
Expand All @@ -1201,7 +1229,7 @@ pub(crate) fn define(
Inst::new(
"uload16x4",
r#"
Load an 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
Load a 16x4 vector (64 bits) from memory at ``p + Offset`` and zero-extend into an i32x4
vector.
"#,
&formats.load,
Expand All @@ -1211,6 +1239,20 @@ pub(crate) fn define(
.can_load(true),
);

ig.push(
Inst::new(
"uload16x4_complex",
r#"
Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
i32x4 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);

ig.push(
Inst::new(
"sload16x4",
Expand All @@ -1225,6 +1267,20 @@ pub(crate) fn define(
.can_load(true),
);

ig.push(
Inst::new(
"sload16x4_complex",
r#"
Load a 16x4 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
i32x4 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);

let I64x2 = &TypeVar::new(
"I64x2",
"A SIMD vector with exactly 2 lanes of 64-bit values",
Expand All @@ -1250,6 +1306,20 @@ pub(crate) fn define(
.can_load(true),
);

ig.push(
Inst::new(
"uload32x2_complex",
r#"
Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and zero-extend into an
i64x2 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);

ig.push(
Inst::new(
"sload32x2",
Expand All @@ -1264,6 +1334,20 @@ pub(crate) fn define(
.can_load(true),
);

ig.push(
Inst::new(
"sload32x2_complex",
r#"
Load a 32x2 vector (64 bits) from memory at ``sum(args) + Offset`` and sign-extend into an
i64x2 vector.
"#,
&formats.load_complex,
)
.operands_in(vec![MemFlags, args, Offset])
.operands_out(vec![a])
.can_load(true),
);

let x = &Operand::new("x", Mem).with_doc("Value to be stored");
let a = &Operand::new("a", Mem).with_doc("Value loaded");
let Offset =
Expand Down
8 changes: 7 additions & 1 deletion cranelift/codegen/src/isa/aarch64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2051,11 +2051,17 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(ctx: &mut C, insn: IRInst) {
| Opcode::ScalarToVector
| Opcode::Swizzle
| Opcode::Uload8x8
| Opcode::Uload8x8Complex
| Opcode::Sload8x8
| Opcode::Sload8x8Complex
| Opcode::Uload16x4
| Opcode::Uload16x4Complex
| Opcode::Sload16x4
| Opcode::Sload16x4Complex
| Opcode::Uload32x2
| Opcode::Sload32x2 => {
| Opcode::Uload32x2Complex
| Opcode::Sload32x2
| Opcode::Sload32x2Complex => {
// TODO
panic!("Vector ops not implemented.");
}
Expand Down

0 comments on commit 9e9250f

Please sign in to comment.