-
Notifications
You must be signed in to change notification settings - Fork 1.3k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
x64: Add AVX support for some more float-related instructions (#6092)
* x64: Add AVX encodings of `vcvt{ss2sd,sd2ss}` Additionally update the instruction helpers to take an `XmmMem` argument to allow load sinking into the instruction. * x64: Add AVX encoding of `sqrts{s,d}` * x64: Add AVX support for `rounds{s,d}`
- Loading branch information
1 parent
afb4179
commit 0b0ac3f
Showing
8 changed files
with
543 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
test compile precise-output | ||
set enable_simd | ||
target x86_64 has_avx | ||
|
||
function %f1(f32) -> f32 { | ||
block0(v0: f32): | ||
v1 = ceil v0 | ||
return v1 | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block0: | ||
; vroundss $2, %xmm0, %xmm0 | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block1: ; offset 0x4 | ||
; vroundss $2, %xmm0, %xmm0, %xmm0 | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
||
function %f2(f64) -> f64 { | ||
block0(v0: f64): | ||
v1 = ceil v0 | ||
return v1 | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block0: | ||
; vroundsd $2, %xmm0, %xmm0 | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block1: ; offset 0x4 | ||
; vroundsd $2, %xmm0, %xmm0, %xmm0 | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
||
function %f4(f32x4) -> f32x4 { | ||
block0(v0: f32x4): | ||
v1 = ceil v0 | ||
return v1 | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block0: | ||
; vroundps $2, %xmm0, %xmm0 | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block1: ; offset 0x4 | ||
; vroundps $2, %xmm0, %xmm0 | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
||
function %f4(f64x2) -> f64x2 { | ||
block0(v0: f64x2): | ||
v1 = ceil v0 | ||
return v1 | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block0: | ||
; vroundpd $2, %xmm0, %xmm0 | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block1: ; offset 0x4 | ||
; vroundpd $2, %xmm0, %xmm0 | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
130 changes: 130 additions & 0 deletions
130
cranelift/filetests/filetests/isa/x64/fpromote-demote-avx.clif
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
test compile precise-output | ||
set enable_simd | ||
target x86_64 has_avx | ||
|
||
function %fpromote(f32) -> f64 { | ||
block0(v0: f32): | ||
v1 = fpromote.f64 v0 | ||
return v1 | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block0: | ||
; vcvtss2sd %xmm0, %xmm0 | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block1: ; offset 0x4 | ||
; vcvtss2sd %xmm0, %xmm0, %xmm0 | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
||
function %fpromote_load(i64, f32) -> f64 { | ||
ss0 = explicit_slot 16 | ||
|
||
block0(v1: i64, v2: f32): | ||
v3 = stack_addr.i64 ss0 | ||
store.f32 v2, v3 | ||
v4 = load.f32 v3 | ||
v5 = fpromote.f64 v4 | ||
return v5 | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; subq %rsp, $16, %rsp | ||
; block0: | ||
; lea rsp(0 + virtual offset), %rdx | ||
; vmovss %xmm0, 0(%rdx) | ||
; vcvtss2sd 0(%rdx), %xmm0 | ||
; addq %rsp, $16, %rsp | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; subq $0x10, %rsp | ||
; block1: ; offset 0x8 | ||
; leaq (%rsp), %rdx | ||
; vmovss %xmm0, (%rdx) ; trap: heap_oob | ||
; vcvtss2sd (%rdx), %xmm0, %xmm0 ; trap: heap_oob | ||
; addq $0x10, %rsp | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
||
function %fdemote(f64) -> f32 { | ||
block0(v0: f64): | ||
v1 = fdemote.f32 v0 | ||
return v1 | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block0: | ||
; vcvtsd2ss %xmm0, %xmm0 | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; block1: ; offset 0x4 | ||
; vcvtsd2ss %xmm0, %xmm0, %xmm0 | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
||
function %fdemote_load(i64, f64) -> f32 { | ||
ss0 = explicit_slot 16 | ||
|
||
block0(v1: i64, v2: f64): | ||
v3 = stack_addr.i64 ss0 | ||
store.f64 v2, v3 | ||
v4 = load.f64 v3 | ||
v5 = fdemote.f32 v4 | ||
return v5 | ||
} | ||
|
||
; VCode: | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; subq %rsp, $16, %rsp | ||
; block0: | ||
; lea rsp(0 + virtual offset), %rdx | ||
; vmovsd %xmm0, 0(%rdx) | ||
; vcvtsd2ss 0(%rdx), %xmm0 | ||
; addq %rsp, $16, %rsp | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; ret | ||
; | ||
; Disassembled: | ||
; block0: ; offset 0x0 | ||
; pushq %rbp | ||
; movq %rsp, %rbp | ||
; subq $0x10, %rsp | ||
; block1: ; offset 0x8 | ||
; leaq (%rsp), %rdx | ||
; vmovsd %xmm0, (%rdx) ; trap: heap_oob | ||
; vcvtsd2ss (%rdx), %xmm0, %xmm0 ; trap: heap_oob | ||
; addq $0x10, %rsp | ||
; movq %rbp, %rsp | ||
; popq %rbp | ||
; retq | ||
|
Oops, something went wrong.