diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 531 |
1 files changed, 319 insertions, 212 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp index 55c1ce3da47f..caafc0c8d443 100644 --- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -43,6 +43,231 @@ std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable; static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9}; +namespace llvm { + +// A temporary struct to spill SGPRs. +// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits +// just v_writelane and v_readlane. +// +// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR +// is saved to scratch (or the other way around for loads). +// For this, a VGPR is required where the needed lanes can be clobbered. The +// RegScavenger can provide a VGPR where currently active lanes can be +// clobbered, but we still need to save inactive lanes. +// The high-level steps are: +// - Try to scavenge SGPR(s) to save exec +// - Try to scavenge VGPR +// - Save needed, all or inactive lanes of a TmpVGPR +// - Spill/Restore SGPRs using TmpVGPR +// - Restore TmpVGPR +// +// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we +// cannot scavenge temporary SGPRs to save exec, we use the following code: +// buffer_store_dword TmpVGPR ; only if active lanes need to be saved +// s_not exec, exec +// buffer_store_dword TmpVGPR ; save inactive lanes +// s_not exec, exec +struct SGPRSpillBuilder { + struct PerVGPRData { + unsigned PerVGPR; + unsigned NumVGPRs; + int64_t VGPRLanes; + }; + + // The SGPR to save + Register SuperReg; + MachineBasicBlock::iterator MI; + ArrayRef<int16_t> SplitParts; + unsigned NumSubRegs; + bool IsKill; + const DebugLoc &DL; + + /* When spilling to stack */ + // The SGPRs are written into this VGPR, which is then written to scratch + // (or vice versa for loads). + Register TmpVGPR = AMDGPU::NoRegister; + // Temporary spill slot to save TmpVGPR to. + int TmpVGPRIndex = 0; + // If TmpVGPR is live before the spill or if it is scavenged. + bool TmpVGPRLive = false; + // Scavenged SGPR to save EXEC. + Register SavedExecReg = AMDGPU::NoRegister; + // Stack index to write the SGPRs to. + int Index; + unsigned EltSize = 4; + + RegScavenger &RS; + MachineBasicBlock &MBB; + MachineFunction &MF; + SIMachineFunctionInfo &MFI; + const SIInstrInfo &TII; + const SIRegisterInfo &TRI; + bool IsWave32; + Register ExecReg; + unsigned MovOpc; + unsigned NotOpc; + + SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, + bool IsWave32, MachineBasicBlock::iterator MI, int Index, + RegScavenger &RS) + : SuperReg(MI->getOperand(0).getReg()), MI(MI), + IsKill(MI->getOperand(0).isKill()), DL(MI->getDebugLoc()), Index(Index), + RS(RS), MBB(*MI->getParent()), MF(*MBB.getParent()), + MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI), + IsWave32(IsWave32) { + const TargetRegisterClass *RC = TRI.getPhysRegClass(SuperReg); + SplitParts = TRI.getRegSplitParts(RC, EltSize); + NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); + + if (IsWave32) { + ExecReg = AMDGPU::EXEC_LO; + MovOpc = AMDGPU::S_MOV_B32; + NotOpc = AMDGPU::S_NOT_B32; + } else { + ExecReg = AMDGPU::EXEC; + MovOpc = AMDGPU::S_MOV_B64; + NotOpc = AMDGPU::S_NOT_B64; + } + + assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); + assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && + SuperReg != AMDGPU::EXEC && "exec should never spill"); + } + + PerVGPRData getPerVGPRData() { + PerVGPRData Data; + Data.PerVGPR = IsWave32 ? 32 : 64; + Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR; + Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL; + return Data; + } + + // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is + // free. + // Writes these instructions if an SGPR can be scavenged: + // s_mov_b64 s[6:7], exec ; Save exec + // s_mov_b64 exec, 3 ; Wanted lanemask + // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot + // + // Writes these instructions if no SGPR can be scavenged: + // buffer_store_dword v0 ; Only if no free VGPR was found + // s_not_b64 exec, exec + // buffer_store_dword v0 ; Save inactive lanes + // ; exec stays inverted, it is flipped back in + // ; restore. + void prepare() { + // Scavenged temporary VGPR to use. It must be scavenged once for any number + // of spilled subregs. + // FIXME: The liveness analysis is limited and does not tell if a register + // is in use in lanes that are currently inactive. We can never be sure if + // a register as actually in use in another lane, so we need to save all + // used lanes of the chosen VGPR. + TmpVGPR = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false); + + // Reserve temporary stack slot + TmpVGPRIndex = MFI.getScavengeFI(MF.getFrameInfo(), TRI); + if (TmpVGPR) { + // Found a register that is dead in the currently active lanes, we only + // need to spill inactive lanes. + TmpVGPRLive = false; + } else { + // Pick v0 because it doesn't make a difference. + TmpVGPR = AMDGPU::VGPR0; + TmpVGPRLive = true; + } + + // Try to scavenge SGPRs to save exec + assert(!SavedExecReg && "Exec is already saved, refuse to save again"); + const TargetRegisterClass &RC = + IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass; + RS.setRegUsed(SuperReg); + SavedExecReg = RS.scavengeRegister(&RC, MI, 0, false); + + int64_t VGPRLanes = getPerVGPRData().VGPRLanes; + + if (SavedExecReg) { + // Set exec to needed lanes + BuildMI(MBB, MI, DL, TII.get(MovOpc), SavedExecReg).addReg(ExecReg); + auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes); + if (!TmpVGPRLive) + I.addReg(TmpVGPR, RegState::ImplicitDefine); + // Spill needed lanes + TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); + } else { + // Spill active lanes + if (TmpVGPRLive) + TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false, + /*IsKill*/ false); + // Spill inactive lanes + auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + if (!TmpVGPRLive) + I.addReg(TmpVGPR, RegState::ImplicitDefine); + TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false); + } + } + + // Writes these instructions if an SGPR can be scavenged: + // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot + // s_waitcnt vmcnt(0) ; If a free VGPR was found + // s_mov_b64 exec, s[6:7] ; Save exec + // + // Writes these instructions if no SGPR can be scavenged: + // buffer_load_dword v0 ; Restore inactive lanes + // s_waitcnt vmcnt(0) ; If a free VGPR was found + // s_not_b64 exec, exec + // buffer_load_dword v0 ; Only if no free VGPR was found + void restore() { + if (SavedExecReg) { + // Restore used lanes + TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, + /*IsKill*/ false); + // Restore exec + auto I = BuildMI(MBB, MI, DL, TII.get(MovOpc), ExecReg) + .addReg(SavedExecReg, RegState::Kill); + // Add an implicit use of the load so it is not dead. + // FIXME This inserts an unnecessary waitcnt + if (!TmpVGPRLive) { + I.addReg(TmpVGPR, RegState::Implicit); + } + } else { + // Restore inactive lanes + TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true, + /*IsKill*/ false); + auto I = BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + if (!TmpVGPRLive) { + I.addReg(TmpVGPR, RegState::Implicit); + } + // Restore active lanes + if (TmpVGPRLive) + TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true); + } + } + + // Write TmpVGPR to memory or read TmpVGPR from memory. + // Either using a single buffer_load/store if exec is set to the needed mask + // or using + // buffer_load + // s_not exec, exec + // buffer_load + // s_not exec, exec + void readWriteTmpVGPR(unsigned Offset, bool IsLoad) { + if (SavedExecReg) { + // Spill needed lanes + TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); + } else { + // Spill active lanes + TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad, + /*IsKill*/ false); + // Spill inactive lanes + BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad); + BuildMI(MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg); + } + } +}; + +} // namespace llvm + SIRegisterInfo::SIRegisterInfo(const GCNSubtarget &ST) : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST), SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) { @@ -1039,120 +1264,36 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI, } } -// Generate a VMEM access which loads or stores the VGPR containing an SGPR -// spill such that all the lanes set in VGPRLanes are loaded or stored. -// This generates exec mask manipulation and will use SGPRs available in MI -// or VGPR lanes in the VGPR to save and restore the exec mask. -void SIRegisterInfo::buildSGPRSpillLoadStore(MachineBasicBlock::iterator MI, - int Index, int Offset, - unsigned EltSize, Register VGPR, - int64_t VGPRLanes, - RegScavenger *RS, - bool IsLoad) const { - MachineBasicBlock *MBB = MI->getParent(); - MachineFunction *MF = MBB->getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); - const SIInstrInfo *TII = ST.getInstrInfo(); - - Register SuperReg = MI->getOperand(0).getReg(); - const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); - unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); - unsigned FirstPart = Offset * 32; - unsigned ExecLane = 0; - - bool IsKill = MI->getOperand(0).isKill(); - const DebugLoc &DL = MI->getDebugLoc(); - - // Cannot handle load/store to EXEC - assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && - SuperReg != AMDGPU::EXEC && "exec should never spill"); - - // On Wave32 only handle EXEC_LO. - // On Wave64 only update EXEC_HI if there is sufficent space for a copy. - bool OnlyExecLo = isWave32 || NumSubRegs == 1 || SuperReg == AMDGPU::EXEC_HI; - - unsigned ExecMovOpc = OnlyExecLo ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64; - Register ExecReg = OnlyExecLo ? AMDGPU::EXEC_LO : AMDGPU::EXEC; - Register SavedExecReg; - - // Backup EXEC - if (OnlyExecLo) { - SavedExecReg = - NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[FirstPart + ExecLane])); - } else { - // If src/dst is an odd size it is possible subreg0 is not aligned. - for (; ExecLane < (NumSubRegs - 1); ++ExecLane) { - SavedExecReg = getMatchingSuperReg( - getSubReg(SuperReg, SplitParts[FirstPart + ExecLane]), AMDGPU::sub0, - &AMDGPU::SReg_64_XEXECRegClass); - if (SavedExecReg) - break; - } - } - assert(SavedExecReg); - BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), SavedExecReg).addReg(ExecReg); - - // Setup EXEC - BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg).addImm(VGPRLanes); - +void SIRegisterInfo::buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, + int Offset, bool IsLoad, + bool IsKill) const { // Load/store VGPR - MachineFrameInfo &FrameInfo = MF->getFrameInfo(); + MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo(); assert(FrameInfo.getStackID(Index) != TargetStackID::SGPRSpill); - Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF) - ? getBaseRegister() - : getFrameRegister(*MF); + Register FrameReg = + FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF) + ? getBaseRegister() + : getFrameRegister(SB.MF); Align Alignment = FrameInfo.getObjectAlign(Index); - MachinePointerInfo PtrInfo = - MachinePointerInfo::getFixedStack(*MF, Index); - MachineMemOperand *MMO = MF->getMachineMemOperand( + MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(SB.MF, Index); + MachineMemOperand *MMO = SB.MF.getMachineMemOperand( PtrInfo, IsLoad ? MachineMemOperand::MOLoad : MachineMemOperand::MOStore, - EltSize, Alignment); + SB.EltSize, Alignment); if (IsLoad) { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR : AMDGPU::BUFFER_LOAD_DWORD_OFFSET; - buildSpillLoadStore(MI, Opc, - Index, - VGPR, false, - FrameReg, - Offset * EltSize, MMO, - RS); + buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, false, FrameReg, + Offset * SB.EltSize, MMO, &SB.RS); } else { unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR : AMDGPU::BUFFER_STORE_DWORD_OFFSET; - buildSpillLoadStore(MI, Opc, Index, VGPR, - IsKill, FrameReg, - Offset * EltSize, MMO, RS); + buildSpillLoadStore(SB.MI, Opc, Index, SB.TmpVGPR, IsKill, FrameReg, + Offset * SB.EltSize, MMO, &SB.RS); // This only ever adds one VGPR spill - MFI->addToSpilledVGPRs(1); - } - - // Restore EXEC - BuildMI(*MBB, MI, DL, TII->get(ExecMovOpc), ExecReg) - .addReg(SavedExecReg, getKillRegState(IsLoad || IsKill)); - - // Restore clobbered SGPRs - if (IsLoad) { - // Nothing to do; register will be overwritten - } else if (!IsKill) { - // Restore SGPRs from appropriate VGPR lanes - if (!OnlyExecLo) { - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), - getSubReg(SuperReg, SplitParts[FirstPart + ExecLane + 1])) - .addReg(VGPR) - .addImm(ExecLane + 1); - } - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), - NumSubRegs == 1 ? SavedExecReg - : Register(getSubReg( - SuperReg, SplitParts[FirstPart + ExecLane]))) - .addReg(VGPR, RegState::Kill) - .addImm(ExecLane); + SB.MFI.addToSpilledVGPRs(1); } } @@ -1160,115 +1301,97 @@ bool SIRegisterInfo::spillSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS, bool OnlyToVGPR) const { - MachineBasicBlock *MBB = MI->getParent(); - MachineFunction *MF = MBB->getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); + SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, *RS); - ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills - = MFI->getSGPRToVGPRSpills(Index); + ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills = + SB.MFI.getSGPRToVGPRSpills(Index); bool SpillToVGPR = !VGPRSpills.empty(); if (OnlyToVGPR && !SpillToVGPR) return false; - const SIInstrInfo *TII = ST.getInstrInfo(); - - Register SuperReg = MI->getOperand(0).getReg(); - bool IsKill = MI->getOperand(0).isKill(); - const DebugLoc &DL = MI->getDebugLoc(); - - assert(SpillToVGPR || (SuperReg != MFI->getStackPtrOffsetReg() && - SuperReg != MFI->getFrameOffsetReg())); - - assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); - assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && - SuperReg != AMDGPU::EXEC && "exec should never spill"); - - unsigned EltSize = 4; - const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - - ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); - unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); + assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() && + SB.SuperReg != SB.MFI.getFrameOffsetReg())); if (SpillToVGPR) { - for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { - Register SubReg = NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[i])); + for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; - bool UseKill = IsKill && i == NumSubRegs - 1; + bool UseKill = SB.IsKill && i == SB.NumSubRegs - 1; // Mark the "old value of vgpr" input undef only if this is the first sgpr // spill to this specific vgpr in the first basic block. - auto MIB = - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), Spill.VGPR) - .addReg(SubReg, getKillRegState(UseKill)) - .addImm(Spill.Lane) - .addReg(Spill.VGPR); + auto MIB = BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), + Spill.VGPR) + .addReg(SubReg, getKillRegState(UseKill)) + .addImm(Spill.Lane) + .addReg(Spill.VGPR); - if (i == 0 && NumSubRegs > 1) { + if (i == 0 && SB.NumSubRegs > 1) { // We may be spilling a super-register which is only partially defined, // and need to ensure later spills think the value is defined. - MIB.addReg(SuperReg, RegState::ImplicitDefine); + MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); } - if (NumSubRegs > 1) - MIB.addReg(SuperReg, getKillRegState(UseKill) | RegState::Implicit); + if (SB.NumSubRegs > 1) + MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit); // FIXME: Since this spills to another register instead of an actual // frame index, we should delete the frame index when all references to // it are fixed. } } else { - // Scavenged temporary VGPR to use. It must be scavenged once for any number - // of spilled subregs. - Register TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); - RS->setRegUsed(TmpVGPR); + SB.prepare(); - // SubReg carries the "Kill" flag when SubReg == SuperReg. - unsigned SubKillState = getKillRegState((NumSubRegs == 1) && IsKill); + // SubReg carries the "Kill" flag when SubReg == SB.SuperReg. + unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill); - unsigned PerVGPR = 32; - unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR; - int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL; + // Per VGPR helper data + auto PVD = SB.getPerVGPRData(); - for (unsigned Offset = 0; Offset < NumVGPRs; ++Offset) { + for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { unsigned TmpVGPRFlags = RegState::Undef; // Write sub registers into the VGPR - for (unsigned i = Offset * PerVGPR, - e = std::min((Offset + 1) * PerVGPR, NumSubRegs); + for (unsigned i = Offset * PVD.PerVGPR, + e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); i < e; ++i) { - Register SubReg = NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[i])); + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); MachineInstrBuilder WriteLane = - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_WRITELANE_B32), TmpVGPR) + BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32), + SB.TmpVGPR) .addReg(SubReg, SubKillState) - .addImm(i % PerVGPR) - .addReg(TmpVGPR, TmpVGPRFlags); + .addImm(i % PVD.PerVGPR) + .addReg(SB.TmpVGPR, TmpVGPRFlags); TmpVGPRFlags = 0; // There could be undef components of a spilled super register. // TODO: Can we detect this and skip the spill? - if (NumSubRegs > 1) { - // The last implicit use of the SuperReg carries the "Kill" flag. + if (SB.NumSubRegs > 1) { + // The last implicit use of the SB.SuperReg carries the "Kill" flag. unsigned SuperKillState = 0; - if (i + 1 == NumSubRegs) - SuperKillState |= getKillRegState(IsKill); - WriteLane.addReg(SuperReg, RegState::Implicit | SuperKillState); + if (i + 1 == SB.NumSubRegs) + SuperKillState |= getKillRegState(SB.IsKill); + WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState); } } // Write out VGPR - buildSGPRSpillLoadStore(MI, Index, Offset, EltSize, TmpVGPR, VGPRLanes, - RS, false); + SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false); } + + SB.restore(); } MI->eraseFromParent(); - MFI->addToSpilledSGPRs(NumSubRegs); + SB.MFI.addToSpilledSGPRs(SB.NumSubRegs); return true; } @@ -1276,75 +1399,59 @@ bool SIRegisterInfo::restoreSGPR(MachineBasicBlock::iterator MI, int Index, RegScavenger *RS, bool OnlyToVGPR) const { - MachineFunction *MF = MI->getParent()->getParent(); - MachineBasicBlock *MBB = MI->getParent(); - SIMachineFunctionInfo *MFI = MF->getInfo<SIMachineFunctionInfo>(); + SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, *RS); - ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills - = MFI->getSGPRToVGPRSpills(Index); + ArrayRef<SIMachineFunctionInfo::SpilledReg> VGPRSpills = + SB.MFI.getSGPRToVGPRSpills(Index); bool SpillToVGPR = !VGPRSpills.empty(); if (OnlyToVGPR && !SpillToVGPR) return false; - const SIInstrInfo *TII = ST.getInstrInfo(); - const DebugLoc &DL = MI->getDebugLoc(); - - Register SuperReg = MI->getOperand(0).getReg(); - - assert(SuperReg != AMDGPU::M0 && "m0 should never spill"); - assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI && - SuperReg != AMDGPU::EXEC && "exec should never spill"); - - unsigned EltSize = 4; - - const TargetRegisterClass *RC = getPhysRegClass(SuperReg); - - ArrayRef<int16_t> SplitParts = getRegSplitParts(RC, EltSize); - unsigned NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size(); - if (SpillToVGPR) { - for (unsigned i = 0, e = NumSubRegs; i < e; ++i) { - Register SubReg = NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[i])); + for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) { + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); SIMachineFunctionInfo::SpilledReg Spill = VGPRSpills[i]; - auto MIB = BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) - .addReg(Spill.VGPR) - .addImm(Spill.Lane); - if (NumSubRegs > 1 && i == 0) - MIB.addReg(SuperReg, RegState::ImplicitDefine); + auto MIB = + BuildMI(SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) + .addReg(Spill.VGPR) + .addImm(Spill.Lane); + if (SB.NumSubRegs > 1 && i == 0) + MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); } } else { - Register TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0); - RS->setRegUsed(TmpVGPR); + SB.prepare(); - unsigned PerVGPR = 32; - unsigned NumVGPRs = (NumSubRegs + (PerVGPR - 1)) / PerVGPR; - int64_t VGPRLanes = (1LL << std::min(PerVGPR, NumSubRegs)) - 1LL; + // Per VGPR helper data + auto PVD = SB.getPerVGPRData(); - for (unsigned Offset = 0; Offset < NumVGPRs; ++Offset) { + for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) { // Load in VGPR data - buildSGPRSpillLoadStore(MI, Index, Offset, EltSize, TmpVGPR, VGPRLanes, - RS, true); + SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true); // Unpack lanes - for (unsigned i = Offset * PerVGPR, - e = std::min((Offset + 1) * PerVGPR, NumSubRegs); + for (unsigned i = Offset * PVD.PerVGPR, + e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs); i < e; ++i) { - Register SubReg = NumSubRegs == 1 - ? SuperReg - : Register(getSubReg(SuperReg, SplitParts[i])); + Register SubReg = + SB.NumSubRegs == 1 + ? SB.SuperReg + : Register(getSubReg(SB.SuperReg, SB.SplitParts[i])); bool LastSubReg = (i + 1 == e); - auto MIB = - BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READLANE_B32), SubReg) - .addReg(TmpVGPR, getKillRegState(LastSubReg)) - .addImm(i); - if (NumSubRegs > 1 && i == 0) - MIB.addReg(SuperReg, RegState::ImplicitDefine); + auto MIB = BuildMI(SB.MBB, MI, SB.DL, + SB.TII.get(AMDGPU::V_READLANE_B32), SubReg) + .addReg(SB.TmpVGPR, getKillRegState(LastSubReg)) + .addImm(i); + if (SB.NumSubRegs > 1 && i == 0) + MIB.addReg(SB.SuperReg, RegState::ImplicitDefine); } } + + SB.restore(); } MI->eraseFromParent(); |