@@ -1474,6 +1474,189 @@ entry:
14741474 ret <2 x double > %out
14751475}
14761476
1477+ define arm_aapcs_vfpcc <4 x double > @shuffle4_f64 (<2 x double > %src1 , <2 x double > %src2 ) {
1478+ ; CHECK-LABEL: shuffle4_f64:
1479+ ; CHECK: @ %bb.0: @ %entry
1480+ ; CHECK-NEXT: vmov.f32 s8, s6
1481+ ; CHECK-NEXT: vmov.f32 s6, s0
1482+ ; CHECK-NEXT: vmov.f32 s9, s7
1483+ ; CHECK-NEXT: vmov.f32 s7, s1
1484+ ; CHECK-NEXT: vmov.f32 s10, s2
1485+ ; CHECK-NEXT: vmov.f32 s11, s3
1486+ ; CHECK-NEXT: vmov q0, q2
1487+ ; CHECK-NEXT: bx lr
1488+ entry:
1489+ %out = shufflevector <2 x double > %src1 , <2 x double > %src2 , <4 x i32 > <i32 3 , i32 1 , i32 2 , i32 0 >
1490+ ret <4 x double > %out
1491+ }
1492+ define arm_aapcs_vfpcc <4 x double > @shuffle5_f64 (<2 x double > %src1 , <2 x double > %src2 ) {
1493+ ; CHECK-LABEL: shuffle5_f64:
1494+ ; CHECK: @ %bb.0: @ %entry
1495+ ; CHECK-NEXT: vmov.f32 s8, s6
1496+ ; CHECK-NEXT: vmov.f32 s10, s4
1497+ ; CHECK-NEXT: vmov.f32 s4, s2
1498+ ; CHECK-NEXT: vmov.f32 s6, s0
1499+ ; CHECK-NEXT: vmov.f32 s9, s7
1500+ ; CHECK-NEXT: vmov.f32 s11, s5
1501+ ; CHECK-NEXT: vmov.f32 s5, s3
1502+ ; CHECK-NEXT: vmov.f32 s7, s1
1503+ ; CHECK-NEXT: vmov q0, q2
1504+ ; CHECK-NEXT: bx lr
1505+ entry:
1506+ %out = shufflevector <2 x double > %src1 , <2 x double > %src2 , <4 x i32 > <i32 3 , i32 2 , i32 1 , i32 0 >
1507+ ret <4 x double > %out
1508+ }
1509+ define arm_aapcs_vfpcc <2 x double > @shuffle6_f64 (<2 x double > %src1 , <2 x double > %src2 ) {
1510+ ; CHECK-LABEL: shuffle6_f64:
1511+ ; CHECK: @ %bb.0: @ %entry
1512+ ; CHECK-NEXT: vmov.f32 s2, s6
1513+ ; CHECK-NEXT: vmov.f32 s3, s7
1514+ ; CHECK-NEXT: bx lr
1515+ entry:
1516+ %out = shufflevector <2 x double > %src1 , <2 x double > %src2 , <2 x i32 > <i32 0 , i32 3 >
1517+ ret <2 x double > %out
1518+ }
1519+ define arm_aapcs_vfpcc <2 x double > @shuffle7_f64 (<2 x double > %src1 , <2 x double > %src2 ) {
1520+ ; CHECK-LABEL: shuffle7_f64:
1521+ ; CHECK: @ %bb.0: @ %entry
1522+ ; CHECK-NEXT: vmov.f32 s0, s6
1523+ ; CHECK-NEXT: vmov.f32 s1, s7
1524+ ; CHECK-NEXT: bx lr
1525+ entry:
1526+ %out = shufflevector <2 x double > %src1 , <2 x double > %src2 , <2 x i32 > <i32 3 , i32 1 >
1527+ ret <2 x double > %out
1528+ }
1529+ define arm_aapcs_vfpcc <2 x double > @shuffle8_f64 (<2 x double > %src1 , <2 x double > %src2 ) {
1530+ ; CHECK-LABEL: shuffle8_f64:
1531+ ; CHECK: @ %bb.0: @ %entry
1532+ ; CHECK-NEXT: vmov.f32 s6, s2
1533+ ; CHECK-NEXT: vmov.f32 s7, s3
1534+ ; CHECK-NEXT: vmov q0, q1
1535+ ; CHECK-NEXT: bx lr
1536+ entry:
1537+ %out = shufflevector <2 x double > %src1 , <2 x double > %src2 , <2 x i32 > <i32 2 , i32 1 >
1538+ ret <2 x double > %out
1539+ }
1540+ define arm_aapcs_vfpcc <8 x double > @shuffle9_f64 (<4 x double > %src1 , <4 x double > %src2 ) {
1541+ ; CHECK-LABEL: shuffle9_f64:
1542+ ; CHECK: @ %bb.0: @ %entry
1543+ ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1544+ ; CHECK-NEXT: vpush {d8, d9, d10, d11}
1545+ ; CHECK-NEXT: vmov q5, q2
1546+ ; CHECK-NEXT: vmov.f32 s16, s0
1547+ ; CHECK-NEXT: vmov.f32 s18, s20
1548+ ; CHECK-NEXT: vmov.f32 s20, s2
1549+ ; CHECK-NEXT: vmov.f32 s10, s12
1550+ ; CHECK-NEXT: vmov.f32 s19, s21
1551+ ; CHECK-NEXT: vmov.f32 s8, s4
1552+ ; CHECK-NEXT: vmov.f32 s17, s1
1553+ ; CHECK-NEXT: vmov.f32 s21, s3
1554+ ; CHECK-NEXT: vmov q0, q4
1555+ ; CHECK-NEXT: vmov.f32 s12, s6
1556+ ; CHECK-NEXT: vmov.f32 s11, s13
1557+ ; CHECK-NEXT: vmov.f32 s9, s5
1558+ ; CHECK-NEXT: vmov.f32 s13, s7
1559+ ; CHECK-NEXT: vmov q1, q5
1560+ ; CHECK-NEXT: vpop {d8, d9, d10, d11}
1561+ ; CHECK-NEXT: bx lr
1562+ entry:
1563+ %out = shufflevector <4 x double > %src1 , <4 x double > %src2 , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 , i32 2 , i32 6 , i32 3 , i32 7 >
1564+ ret <8 x double > %out
1565+ }
1566+
1567+
1568+
1569+
1570+ define arm_aapcs_vfpcc <4 x i64 > @shuffle4_i64 (<2 x i64 > %src1 , <2 x i64 > %src2 ) {
1571+ ; CHECK-LABEL: shuffle4_i64:
1572+ ; CHECK: @ %bb.0: @ %entry
1573+ ; CHECK-NEXT: vmov.f32 s8, s6
1574+ ; CHECK-NEXT: vmov.f32 s6, s0
1575+ ; CHECK-NEXT: vmov.f32 s9, s7
1576+ ; CHECK-NEXT: vmov.f32 s7, s1
1577+ ; CHECK-NEXT: vmov.f32 s10, s2
1578+ ; CHECK-NEXT: vmov.f32 s11, s3
1579+ ; CHECK-NEXT: vmov q0, q2
1580+ ; CHECK-NEXT: bx lr
1581+ entry:
1582+ %out = shufflevector <2 x i64 > %src1 , <2 x i64 > %src2 , <4 x i32 > <i32 3 , i32 1 , i32 2 , i32 0 >
1583+ ret <4 x i64 > %out
1584+ }
1585+ define arm_aapcs_vfpcc <4 x i64 > @shuffle5_i64 (<2 x i64 > %src1 , <2 x i64 > %src2 ) {
1586+ ; CHECK-LABEL: shuffle5_i64:
1587+ ; CHECK: @ %bb.0: @ %entry
1588+ ; CHECK-NEXT: vmov.f32 s8, s6
1589+ ; CHECK-NEXT: vmov.f32 s10, s4
1590+ ; CHECK-NEXT: vmov.f32 s4, s2
1591+ ; CHECK-NEXT: vmov.f32 s6, s0
1592+ ; CHECK-NEXT: vmov.f32 s9, s7
1593+ ; CHECK-NEXT: vmov.f32 s11, s5
1594+ ; CHECK-NEXT: vmov.f32 s5, s3
1595+ ; CHECK-NEXT: vmov.f32 s7, s1
1596+ ; CHECK-NEXT: vmov q0, q2
1597+ ; CHECK-NEXT: bx lr
1598+ entry:
1599+ %out = shufflevector <2 x i64 > %src1 , <2 x i64 > %src2 , <4 x i32 > <i32 3 , i32 2 , i32 1 , i32 0 >
1600+ ret <4 x i64 > %out
1601+ }
1602+ define arm_aapcs_vfpcc <2 x i64 > @shuffle6_i64 (<2 x i64 > %src1 , <2 x i64 > %src2 ) {
1603+ ; CHECK-LABEL: shuffle6_i64:
1604+ ; CHECK: @ %bb.0: @ %entry
1605+ ; CHECK-NEXT: vmov.f32 s2, s6
1606+ ; CHECK-NEXT: vmov.f32 s3, s7
1607+ ; CHECK-NEXT: bx lr
1608+ entry:
1609+ %out = shufflevector <2 x i64 > %src1 , <2 x i64 > %src2 , <2 x i32 > <i32 0 , i32 3 >
1610+ ret <2 x i64 > %out
1611+ }
1612+ define arm_aapcs_vfpcc <2 x i64 > @shuffle7_i64 (<2 x i64 > %src1 , <2 x i64 > %src2 ) {
1613+ ; CHECK-LABEL: shuffle7_i64:
1614+ ; CHECK: @ %bb.0: @ %entry
1615+ ; CHECK-NEXT: vmov.f32 s0, s6
1616+ ; CHECK-NEXT: vmov.f32 s1, s7
1617+ ; CHECK-NEXT: bx lr
1618+ entry:
1619+ %out = shufflevector <2 x i64 > %src1 , <2 x i64 > %src2 , <2 x i32 > <i32 3 , i32 1 >
1620+ ret <2 x i64 > %out
1621+ }
1622+ define arm_aapcs_vfpcc <2 x i64 > @shuffle8_i64 (<2 x i64 > %src1 , <2 x i64 > %src2 ) {
1623+ ; CHECK-LABEL: shuffle8_i64:
1624+ ; CHECK: @ %bb.0: @ %entry
1625+ ; CHECK-NEXT: vmov.f32 s6, s2
1626+ ; CHECK-NEXT: vmov.f32 s7, s3
1627+ ; CHECK-NEXT: vmov q0, q1
1628+ ; CHECK-NEXT: bx lr
1629+ entry:
1630+ %out = shufflevector <2 x i64 > %src1 , <2 x i64 > %src2 , <2 x i32 > <i32 2 , i32 1 >
1631+ ret <2 x i64 > %out
1632+ }
1633+ define arm_aapcs_vfpcc <8 x i64 > @shuffle9_i64 (<4 x i64 > %src1 , <4 x i64 > %src2 ) {
1634+ ; CHECK-LABEL: shuffle9_i64:
1635+ ; CHECK: @ %bb.0: @ %entry
1636+ ; CHECK-NEXT: .vsave {d8, d9, d10, d11}
1637+ ; CHECK-NEXT: vpush {d8, d9, d10, d11}
1638+ ; CHECK-NEXT: vmov q5, q2
1639+ ; CHECK-NEXT: vmov.f32 s16, s0
1640+ ; CHECK-NEXT: vmov.f32 s18, s20
1641+ ; CHECK-NEXT: vmov.f32 s20, s2
1642+ ; CHECK-NEXT: vmov.f32 s10, s12
1643+ ; CHECK-NEXT: vmov.f32 s19, s21
1644+ ; CHECK-NEXT: vmov.f32 s8, s4
1645+ ; CHECK-NEXT: vmov.f32 s17, s1
1646+ ; CHECK-NEXT: vmov.f32 s21, s3
1647+ ; CHECK-NEXT: vmov q0, q4
1648+ ; CHECK-NEXT: vmov.f32 s12, s6
1649+ ; CHECK-NEXT: vmov.f32 s11, s13
1650+ ; CHECK-NEXT: vmov.f32 s9, s5
1651+ ; CHECK-NEXT: vmov.f32 s13, s7
1652+ ; CHECK-NEXT: vmov q1, q5
1653+ ; CHECK-NEXT: vpop {d8, d9, d10, d11}
1654+ ; CHECK-NEXT: bx lr
1655+ entry:
1656+ %out = shufflevector <4 x i64 > %src1 , <4 x i64 > %src2 , <8 x i32 > <i32 0 , i32 4 , i32 1 , i32 5 , i32 2 , i32 6 , i32 3 , i32 7 >
1657+ ret <8 x i64 > %out
1658+ }
1659+
14771660
14781661define arm_aapcs_vfpcc <4 x i32 > @insert_i32 (i32 %a ) {
14791662; CHECK-LABEL: insert_i32:
@@ -1548,7 +1731,7 @@ define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
15481731; CHECK: @ %bb.0: @ %entry
15491732; CHECK-NEXT: .pad #8
15501733; CHECK-NEXT: sub sp, #8
1551- ; CHECK-NEXT: adr r2, .LCPI76_0
1734+ ; CHECK-NEXT: adr r2, .LCPI88_0
15521735; CHECK-NEXT: vmov.u16 r0, q0[0]
15531736; CHECK-NEXT: vldrw.u32 q0, [r2]
15541737; CHECK-NEXT: mov r1, sp
@@ -1558,7 +1741,7 @@ define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) {
15581741; CHECK-NEXT: bx lr
15591742; CHECK-NEXT: .p2align 4
15601743; CHECK-NEXT: @ %bb.1:
1561- ; CHECK-NEXT: .LCPI76_0 :
1744+ ; CHECK-NEXT: .LCPI88_0 :
15621745; CHECK-NEXT: .zero 4
15631746; CHECK-NEXT: .long 7 @ 0x7
15641747; CHECK-NEXT: .long 1 @ 0x1
0 commit comments