@@ -48,11 +48,6 @@ void SIRegisterInfo::classifyPressureSet(unsigned PSetID, unsigned Reg,
4848 }
4949}
5050
51- static cl::opt<bool > EnableSpillSGPRToSMEM (
52- " amdgpu-spill-sgpr-to-smem" ,
53- cl::desc (" Use scalar stores to spill SGPRs if supported by subtarget" ),
54- cl::init(false ));
55-
5651static cl::opt<bool > EnableSpillSGPRToVGPR (
5752 " amdgpu-spill-sgpr-to-vgpr" ,
5853 cl::desc (" Enable spilling VGPRs to SGPRs" ),
@@ -65,14 +60,8 @@ SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) :
6560 SGPRPressureSets(getNumRegPressureSets()),
6661 VGPRPressureSets(getNumRegPressureSets()),
6762 AGPRPressureSets(getNumRegPressureSets()),
68- SpillSGPRToVGPR(false ),
69- SpillSGPRToSMEM(false ),
63+ SpillSGPRToVGPR(EnableSpillSGPRToVGPR),
7064 isWave32(ST.isWave32()) {
71- if (EnableSpillSGPRToSMEM && ST.hasScalarStores ())
72- SpillSGPRToSMEM = true ;
73- else if (EnableSpillSGPRToVGPR)
74- SpillSGPRToVGPR = true ;
75-
7665 unsigned NumRegPressureSets = getNumRegPressureSets ();
7766
7867 SGPRSetID = NumRegPressureSets;
@@ -759,22 +748,6 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
759748 }
760749}
761750
762- static std::pair<unsigned , unsigned > getSpillEltSize (unsigned SuperRegSize,
763- bool Store) {
764- if (SuperRegSize % 16 == 0 ) {
765- return { 16 , Store ? AMDGPU::S_BUFFER_STORE_DWORDX4_SGPR :
766- AMDGPU::S_BUFFER_LOAD_DWORDX4_SGPR };
767- }
768-
769- if (SuperRegSize % 8 == 0 ) {
770- return { 8 , Store ? AMDGPU::S_BUFFER_STORE_DWORDX2_SGPR :
771- AMDGPU::S_BUFFER_LOAD_DWORDX2_SGPR };
772- }
773-
774- return { 4 , Store ? AMDGPU::S_BUFFER_STORE_DWORD_SGPR :
775- AMDGPU::S_BUFFER_LOAD_DWORD_SGPR};
776- }
777-
778751bool SIRegisterInfo::spillSGPR (MachineBasicBlock::iterator MI,
779752 int Index,
780753 RegScavenger *RS,
@@ -799,38 +772,16 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
799772
800773 MachineFrameInfo &FrameInfo = MF->getFrameInfo ();
801774
802- bool SpillToSMEM = spillSGPRToSMEM ();
803- if (SpillToSMEM && OnlyToVGPR)
804- return false ;
805-
806- Register FrameReg = getFrameRegister (*MF);
807-
808775 assert (SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg () &&
809776 SuperReg != MFI->getFrameOffsetReg () &&
810777 SuperReg != MFI->getScratchWaveOffsetReg ()));
811778
812779 assert (SuperReg != AMDGPU::M0 && " m0 should never spill" );
813780
814- unsigned OffsetReg = AMDGPU::M0;
815781 unsigned M0CopyReg = AMDGPU::NoRegister;
816782
817- if (SpillToSMEM) {
818- if (RS->isRegUsed (AMDGPU::M0)) {
819- M0CopyReg = RS->scavengeRegister (&AMDGPU::SReg_32_XM0RegClass, MI, 0 , false );
820- BuildMI (*MBB, MI, DL, TII->get (AMDGPU::COPY), M0CopyReg)
821- .addReg (AMDGPU::M0);
822- }
823- }
824-
825- unsigned ScalarStoreOp;
826783 unsigned EltSize = 4 ;
827784 const TargetRegisterClass *RC = getPhysRegClass (SuperReg);
828- if (SpillToSMEM && isSGPRClass (RC)) {
829- // XXX - if private_element_size is larger than 4 it might be useful to be
830- // able to spill wider vmem spills.
831- std::tie (EltSize, ScalarStoreOp) =
832- getSpillEltSize (getRegSizeInBits (*RC) / 8 , true );
833- }
834785
835786 ArrayRef<int16_t > SplitParts = getRegSplitParts (RC, EltSize);
836787 unsigned NumSubRegs = SplitParts.empty () ? 1 : SplitParts.size ();
@@ -845,47 +796,6 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
845796 Register SubReg =
846797 NumSubRegs == 1 ? SuperReg : getSubReg (SuperReg, SplitParts[i]);
847798
848- if (SpillToSMEM) {
849- int64_t FrOffset = FrameInfo.getObjectOffset (Index);
850-
851- // The allocated memory size is really the wavefront size * the frame
852- // index size. The widest register class is 64 bytes, so a 4-byte scratch
853- // allocation is enough to spill this in a single stack object.
854- //
855- // FIXME: Frame size/offsets are computed earlier than this, so the extra
856- // space is still unnecessarily allocated.
857-
858- unsigned Align = FrameInfo.getObjectAlignment (Index);
859- MachinePointerInfo PtrInfo
860- = MachinePointerInfo::getFixedStack (*MF, Index, EltSize * i);
861- MachineMemOperand *MMO
862- = MF->getMachineMemOperand (PtrInfo, MachineMemOperand::MOStore,
863- EltSize, MinAlign (Align, EltSize * i));
864-
865- // SMEM instructions only support a single offset, so increment the wave
866- // offset.
867-
868- int64_t Offset = (ST.getWavefrontSize () * FrOffset) + (EltSize * i);
869- if (Offset != 0 ) {
870- BuildMI (*MBB, MI, DL, TII->get (AMDGPU::S_ADD_U32), OffsetReg)
871- .addReg (FrameReg)
872- .addImm (Offset);
873- } else {
874- BuildMI (*MBB, MI, DL, TII->get (AMDGPU::S_MOV_B32), OffsetReg)
875- .addReg (FrameReg);
876- }
877-
878- BuildMI (*MBB, MI, DL, TII->get (ScalarStoreOp))
879- .addReg (SubReg, getKillRegState (IsKill)) // sdata
880- .addReg (MFI->getScratchRSrcReg ()) // sbase
881- .addReg (OffsetReg, RegState::Kill) // soff
882- .addImm (0 ) // glc
883- .addImm (0 ) // dlc
884- .addMemOperand (MMO);
885-
886- continue ;
887- }
888-
889799 if (SpillToVGPR) {
890800 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
891801
@@ -914,10 +824,8 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI,
914824 return false ;
915825
916826 // Spill SGPR to a frame index.
917- // TODO: Should VI try to spill to VGPR and then spill to SMEM?
918827 if (!TmpVGPR.isValid ())
919828 TmpVGPR = RS->scavengeRegister (&AMDGPU::VGPR_32RegClass, MI, 0 );
920- // TODO: Should VI try to spill to VGPR and then spill to SMEM?
921829
922830 MachineInstrBuilder Mov
923831 = BuildMI (*MBB, MI, DL, TII->get (AMDGPU::V_MOV_B32_e32), TmpVGPR)
@@ -979,82 +887,24 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI,
979887 const DebugLoc &DL = MI->getDebugLoc ();
980888
981889 Register SuperReg = MI->getOperand (0 ).getReg ();
982- bool SpillToSMEM = spillSGPRToSMEM ();
983- if (SpillToSMEM && OnlyToVGPR)
984- return false ;
985890
986891 assert (SuperReg != AMDGPU::M0 && " m0 should never spill" );
987892
988- unsigned OffsetReg = AMDGPU::M0;
989893 unsigned M0CopyReg = AMDGPU::NoRegister;
990894
991- if (SpillToSMEM) {
992- if (RS->isRegUsed (AMDGPU::M0)) {
993- M0CopyReg = RS->scavengeRegister (&AMDGPU::SReg_32_XM0RegClass, MI, 0 , false );
994- BuildMI (*MBB, MI, DL, TII->get (AMDGPU::COPY), M0CopyReg)
995- .addReg (AMDGPU::M0);
996- }
997- }
998-
999895 unsigned EltSize = 4 ;
1000- unsigned ScalarLoadOp;
1001-
1002- Register FrameReg = getFrameRegister (*MF);
1003896
1004897 const TargetRegisterClass *RC = getPhysRegClass (SuperReg);
1005- if (SpillToSMEM && isSGPRClass (RC)) {
1006- // XXX - if private_element_size is larger than 4 it might be useful to be
1007- // able to spill wider vmem spills.
1008- std::tie (EltSize, ScalarLoadOp) =
1009- getSpillEltSize (getRegSizeInBits (*RC) / 8 , false );
1010- }
1011898
1012899 ArrayRef<int16_t > SplitParts = getRegSplitParts (RC, EltSize);
1013900 unsigned NumSubRegs = SplitParts.empty () ? 1 : SplitParts.size ();
1014901
1015- // SubReg carries the "Kill" flag when SubReg == SuperReg.
1016- int64_t FrOffset = FrameInfo.getObjectOffset (Index);
1017-
1018902 Register TmpVGPR;
1019903
1020904 for (unsigned i = 0 , e = NumSubRegs; i < e; ++i) {
1021905 Register SubReg =
1022906 NumSubRegs == 1 ? SuperReg : getSubReg (SuperReg, SplitParts[i]);
1023907
1024- if (SpillToSMEM) {
1025- // FIXME: Size may be > 4 but extra bytes wasted.
1026- unsigned Align = FrameInfo.getObjectAlignment (Index);
1027- MachinePointerInfo PtrInfo
1028- = MachinePointerInfo::getFixedStack (*MF, Index, EltSize * i);
1029- MachineMemOperand *MMO
1030- = MF->getMachineMemOperand (PtrInfo, MachineMemOperand::MOLoad,
1031- EltSize, MinAlign (Align, EltSize * i));
1032-
1033- // Add i * 4 offset
1034- int64_t Offset = (ST.getWavefrontSize () * FrOffset) + (EltSize * i);
1035- if (Offset != 0 ) {
1036- BuildMI (*MBB, MI, DL, TII->get (AMDGPU::S_ADD_U32), OffsetReg)
1037- .addReg (FrameReg)
1038- .addImm (Offset);
1039- } else {
1040- BuildMI (*MBB, MI, DL, TII->get (AMDGPU::S_MOV_B32), OffsetReg)
1041- .addReg (FrameReg);
1042- }
1043-
1044- auto MIB =
1045- BuildMI (*MBB, MI, DL, TII->get (ScalarLoadOp), SubReg)
1046- .addReg (MFI->getScratchRSrcReg ()) // sbase
1047- .addReg (OffsetReg, RegState::Kill) // soff
1048- .addImm (0 ) // glc
1049- .addImm (0 ) // dlc
1050- .addMemOperand (MMO);
1051-
1052- if (NumSubRegs > 1 && i == 0 )
1053- MIB.addReg (SuperReg, RegState::ImplicitDefine);
1054-
1055- continue ;
1056- }
1057-
1058908 if (SpillToVGPR) {
1059909 SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i];
1060910 auto MIB =
0 commit comments