Skip to content

Commit f3a3065

Browse files
committed
[dev.simd] simd, cmd/compile: add float -> float conversions
This should mark the end of the conversion table, except for float16 which does not exist on Go yet. The rounding logic documentation of float64 -> float32 is based on abi-internal default MXCSR: | RC | 14/13 | 0 (RN) | Round to nearest | Change-Id: I27a86560e8d74d20f21350bf78314b4eada20ec0 Reviewed-on: https://go-review.googlesource.com/c/go/+/724440 LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: David Chase <drchase@google.com>
1 parent d6564ed commit f3a3065

File tree

11 files changed

+881
-3
lines changed

11 files changed

+881
-3
lines changed

src/cmd/compile/internal/amd64/simdssa.go

Lines changed: 32 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/_gen/simdAMD64.rules

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,9 @@
249249
(ConcatShiftBytesRightUint8x16 ...) => (VPALIGNR128 ...)
250250
(ConcatShiftBytesRightGroupedUint8x32 ...) => (VPALIGNR256 ...)
251251
(ConcatShiftBytesRightGroupedUint8x64 ...) => (VPALIGNR512 ...)
252+
(ConvertToFloat32Float64x2 ...) => (VCVTPD2PSX128 ...)
253+
(ConvertToFloat32Float64x4 ...) => (VCVTPD2PSY128 ...)
254+
(ConvertToFloat32Float64x8 ...) => (VCVTPD2PS256 ...)
252255
(ConvertToFloat32Int32x4 ...) => (VCVTDQ2PS128 ...)
253256
(ConvertToFloat32Int32x8 ...) => (VCVTDQ2PS256 ...)
254257
(ConvertToFloat32Int32x16 ...) => (VCVTDQ2PS512 ...)
@@ -261,6 +264,8 @@
261264
(ConvertToFloat32Uint64x2 ...) => (VCVTUQQ2PSX128 ...)
262265
(ConvertToFloat32Uint64x4 ...) => (VCVTUQQ2PSY128 ...)
263266
(ConvertToFloat32Uint64x8 ...) => (VCVTUQQ2PS256 ...)
267+
(ConvertToFloat64Float32x4 ...) => (VCVTPS2PD256 ...)
268+
(ConvertToFloat64Float32x8 ...) => (VCVTPS2PD512 ...)
264269
(ConvertToFloat64Int32x4 ...) => (VCVTDQ2PD256 ...)
265270
(ConvertToFloat64Int32x8 ...) => (VCVTDQ2PD512 ...)
266271
(ConvertToFloat64Int64x2 ...) => (VCVTQQ2PD128 ...)
@@ -1478,6 +1483,9 @@
14781483
(VMOVDQU8Masked256 (VPALIGNR256 [a] x y) mask) => (VPALIGNRMasked256 [a] x y mask)
14791484
(VMOVDQU8Masked512 (VPALIGNR512 [a] x y) mask) => (VPALIGNRMasked512 [a] x y mask)
14801485
(VMOVDQU8Masked128 (VPALIGNR128 [a] x y) mask) => (VPALIGNRMasked128 [a] x y mask)
1486+
(VMOVDQU64Masked128 (VCVTPD2PSX128 x) mask) => (VCVTPD2PSXMasked128 x mask)
1487+
(VMOVDQU64Masked128 (VCVTPD2PSY128 x) mask) => (VCVTPD2PSYMasked128 x mask)
1488+
(VMOVDQU64Masked256 (VCVTPD2PS256 x) mask) => (VCVTPD2PSMasked256 x mask)
14811489
(VMOVDQU32Masked128 (VCVTDQ2PS128 x) mask) => (VCVTDQ2PSMasked128 x mask)
14821490
(VMOVDQU32Masked256 (VCVTDQ2PS256 x) mask) => (VCVTDQ2PSMasked256 x mask)
14831491
(VMOVDQU32Masked512 (VCVTDQ2PS512 x) mask) => (VCVTDQ2PSMasked512 x mask)
@@ -1490,6 +1498,8 @@
14901498
(VMOVDQU64Masked128 (VCVTUQQ2PSX128 x) mask) => (VCVTUQQ2PSXMasked128 x mask)
14911499
(VMOVDQU64Masked128 (VCVTUQQ2PSY128 x) mask) => (VCVTUQQ2PSYMasked128 x mask)
14921500
(VMOVDQU64Masked256 (VCVTUQQ2PS256 x) mask) => (VCVTUQQ2PSMasked256 x mask)
1501+
(VMOVDQU32Masked256 (VCVTPS2PD256 x) mask) => (VCVTPS2PDMasked256 x mask)
1502+
(VMOVDQU32Masked512 (VCVTPS2PD512 x) mask) => (VCVTPS2PDMasked512 x mask)
14931503
(VMOVDQU32Masked256 (VCVTDQ2PD256 x) mask) => (VCVTDQ2PDMasked256 x mask)
14941504
(VMOVDQU32Masked512 (VCVTDQ2PD512 x) mask) => (VCVTDQ2PDMasked512 x mask)
14951505
(VMOVDQU64Masked128 (VCVTQQ2PD128 x) mask) => (VCVTQQ2PDMasked128 x mask)
@@ -2031,6 +2041,7 @@
20312041
(VPBLENDMDMasked512 dst (VSQRTPS512 x) mask) => (VSQRTPSMasked512Merging dst x mask)
20322042
(VPBLENDMDMasked512 dst (VSUBPS512 x y) mask) => (VSUBPSMasked512Merging dst x y mask)
20332043
(VPBLENDMQMasked512 dst (VADDPD512 x y) mask) => (VADDPDMasked512Merging dst x y mask)
2044+
(VPBLENDMQMasked512 dst (VCVTPD2PS256 x) mask) => (VCVTPD2PSMasked256Merging dst x mask)
20342045
(VPBLENDMQMasked512 dst (VCVTQQ2PD512 x) mask) => (VCVTQQ2PDMasked512Merging dst x mask)
20352046
(VPBLENDMQMasked512 dst (VCVTQQ2PS256 x) mask) => (VCVTQQ2PSMasked256Merging dst x mask)
20362047
(VPBLENDMQMasked512 dst (VCVTTPD2DQ256 x) mask) => (VCVTTPD2DQMasked256Merging dst x mask)
@@ -2121,6 +2132,8 @@
21212132
(VPBLENDVB128 dst (VBROADCASTSS512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VBROADCASTSSMasked512Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
21222133
(VPBLENDVB128 dst (VCVTDQ2PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTDQ2PDMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
21232134
(VPBLENDVB128 dst (VCVTDQ2PS128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTDQ2PSMasked128Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
2135+
(VPBLENDVB128 dst (VCVTPD2PSX128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPD2PSXMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
2136+
(VPBLENDVB128 dst (VCVTPS2PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2PDMasked256Merging dst x (VPMOVVec32x4ToM <types.TypeMask> mask))
21242137
(VPBLENDVB128 dst (VCVTQQ2PD128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTQQ2PDMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
21252138
(VPBLENDVB128 dst (VCVTQQ2PSX128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTQQ2PSXMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
21262139
(VPBLENDVB128 dst (VCVTTPD2DQX128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPD2DQXMasked128Merging dst x (VPMOVVec64x2ToM <types.TypeMask> mask))
@@ -2304,6 +2317,8 @@
23042317
(VPBLENDVB256 dst (VADDPS256 x y) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VADDPSMasked256Merging dst x y (VPMOVVec32x8ToM <types.TypeMask> mask))
23052318
(VPBLENDVB256 dst (VCVTDQ2PD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTDQ2PDMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
23062319
(VPBLENDVB256 dst (VCVTDQ2PS256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTDQ2PSMasked256Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
2320+
(VPBLENDVB256 dst (VCVTPD2PSY128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPD2PSYMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
2321+
(VPBLENDVB256 dst (VCVTPS2PD512 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTPS2PDMasked512Merging dst x (VPMOVVec32x8ToM <types.TypeMask> mask))
23072322
(VPBLENDVB256 dst (VCVTQQ2PD256 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTQQ2PDMasked256Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
23082323
(VPBLENDVB256 dst (VCVTQQ2PSY128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTQQ2PSYMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
23092324
(VPBLENDVB256 dst (VCVTTPD2DQY128 x) mask) && v.Block.CPUfeatures.hasFeature(CPUavx512) => (VCVTTPD2DQYMasked128Merging dst x (VPMOVVec64x4ToM <types.TypeMask> mask))
@@ -2541,6 +2556,7 @@
25412556
(VPERMI2QMasked256 x y l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked256load {sym} [off] x y ptr mask mem)
25422557
(VPERMI2PDMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2PDMasked512load {sym} [off] x y ptr mask mem)
25432558
(VPERMI2QMasked512 x y l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VPERMI2QMasked512load {sym} [off] x y ptr mask mem)
2559+
(VCVTPD2PS256 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPD2PS256load {sym} [off] ptr mem)
25442560
(VCVTDQ2PS512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTDQ2PS512load {sym} [off] ptr mem)
25452561
(VCVTQQ2PSX128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTQQ2PSX128load {sym} [off] ptr mem)
25462562
(VCVTQQ2PSY128 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTQQ2PSY128load {sym} [off] ptr mem)
@@ -2551,6 +2567,9 @@
25512567
(VCVTUQQ2PSX128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PSX128load {sym} [off] ptr mem)
25522568
(VCVTUQQ2PSY128 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PSY128load {sym} [off] ptr mem)
25532569
(VCVTUQQ2PS256 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PS256load {sym} [off] ptr mem)
2570+
(VCVTPD2PSXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTPD2PSXMasked128load {sym} [off] ptr mask mem)
2571+
(VCVTPD2PSYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTPD2PSYMasked128load {sym} [off] ptr mask mem)
2572+
(VCVTPD2PSMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTPD2PSMasked256load {sym} [off] ptr mask mem)
25542573
(VCVTDQ2PSMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTDQ2PSMasked128load {sym} [off] ptr mask mem)
25552574
(VCVTDQ2PSMasked256 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTDQ2PSMasked256load {sym} [off] ptr mask mem)
25562575
(VCVTDQ2PSMasked512 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTDQ2PSMasked512load {sym} [off] ptr mask mem)
@@ -2563,6 +2582,7 @@
25632582
(VCVTUQQ2PSXMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PSXMasked128load {sym} [off] ptr mask mem)
25642583
(VCVTUQQ2PSYMasked128 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PSYMasked128load {sym} [off] ptr mask mem)
25652584
(VCVTUQQ2PSMasked256 l:(VMOVDQUload512 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PSMasked256load {sym} [off] ptr mask mem)
2585+
(VCVTPS2PD512 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2PD512load {sym} [off] ptr mem)
25662586
(VCVTDQ2PD512 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTDQ2PD512load {sym} [off] ptr mem)
25672587
(VCVTQQ2PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTQQ2PD128load {sym} [off] ptr mem)
25682588
(VCVTQQ2PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTQQ2PD256load {sym} [off] ptr mem)
@@ -2572,6 +2592,8 @@
25722592
(VCVTUQQ2PD128 l:(VMOVDQUload128 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PD128load {sym} [off] ptr mem)
25732593
(VCVTUQQ2PD256 l:(VMOVDQUload256 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PD256load {sym} [off] ptr mem)
25742594
(VCVTUQQ2PD512 l:(VMOVDQUload512 {sym} [off] ptr mem)) && canMergeLoad(v, l) && clobber(l) => (VCVTUQQ2PD512load {sym} [off] ptr mem)
2595+
(VCVTPS2PDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2PDMasked256load {sym} [off] ptr mask mem)
2596+
(VCVTPS2PDMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTPS2PDMasked512load {sym} [off] ptr mask mem)
25752597
(VCVTDQ2PDMasked256 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTDQ2PDMasked256load {sym} [off] ptr mask mem)
25762598
(VCVTDQ2PDMasked512 l:(VMOVDQUload256 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTDQ2PDMasked512load {sym} [off] ptr mask mem)
25772599
(VCVTQQ2PDMasked128 l:(VMOVDQUload128 {sym} [off] ptr mem) mask) && canMergeLoad(v, l) && clobber(l) => (VCVTQQ2PDMasked128load {sym} [off] ptr mask mem)

0 commit comments

Comments
 (0)