You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
222 lines
6.1 KiB
222 lines
6.1 KiB
4 months ago
|
//===- SIPreAllocateWWMRegs.cpp - WWM Register Pre-allocation -------------===//
|
||
|
//
|
||
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
||
|
// See https://llvm.org/LICENSE.txt for license information.
|
||
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
//
|
||
|
/// \file
|
||
|
/// Pass to pre-allocated WWM registers
|
||
|
//
|
||
|
//===----------------------------------------------------------------------===//
|
||
|
|
||
|
#include "AMDGPU.h"
|
||
|
#include "AMDGPUSubtarget.h"
|
||
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
||
|
#include "SIInstrInfo.h"
|
||
|
#include "SIMachineFunctionInfo.h"
|
||
|
#include "SIRegisterInfo.h"
|
||
|
#include "llvm/ADT/PostOrderIterator.h"
|
||
|
#include "llvm/CodeGen/LiveInterval.h"
|
||
|
#include "llvm/CodeGen/LiveIntervals.h"
|
||
|
#include "llvm/CodeGen/LiveRegMatrix.h"
|
||
|
#include "llvm/CodeGen/MachineDominators.h"
|
||
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||
|
#include "llvm/CodeGen/RegisterClassInfo.h"
|
||
|
#include "llvm/CodeGen/VirtRegMap.h"
|
||
|
#include "llvm/InitializePasses.h"
|
||
|
|
||
|
using namespace llvm;
|
||
|
|
||
|
#define DEBUG_TYPE "si-pre-allocate-wwm-regs"
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
class SIPreAllocateWWMRegs : public MachineFunctionPass {
|
||
|
private:
|
||
|
const SIInstrInfo *TII;
|
||
|
const SIRegisterInfo *TRI;
|
||
|
MachineRegisterInfo *MRI;
|
||
|
LiveIntervals *LIS;
|
||
|
LiveRegMatrix *Matrix;
|
||
|
VirtRegMap *VRM;
|
||
|
RegisterClassInfo RegClassInfo;
|
||
|
|
||
|
std::vector<unsigned> RegsToRewrite;
|
||
|
|
||
|
public:
|
||
|
static char ID;
|
||
|
|
||
|
SIPreAllocateWWMRegs() : MachineFunctionPass(ID) {
|
||
|
initializeSIPreAllocateWWMRegsPass(*PassRegistry::getPassRegistry());
|
||
|
}
|
||
|
|
||
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
||
|
|
||
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||
|
AU.addRequired<LiveIntervals>();
|
||
|
AU.addPreserved<LiveIntervals>();
|
||
|
AU.addRequired<VirtRegMap>();
|
||
|
AU.addRequired<LiveRegMatrix>();
|
||
|
AU.addPreserved<SlotIndexes>();
|
||
|
AU.setPreservesCFG();
|
||
|
MachineFunctionPass::getAnalysisUsage(AU);
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
bool processDef(MachineOperand &MO);
|
||
|
void rewriteRegs(MachineFunction &MF);
|
||
|
};
|
||
|
|
||
|
} // End anonymous namespace.
|
||
|
|
||
|
INITIALIZE_PASS_BEGIN(SIPreAllocateWWMRegs, DEBUG_TYPE,
|
||
|
"SI Pre-allocate WWM Registers", false, false)
|
||
|
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
|
||
|
INITIALIZE_PASS_DEPENDENCY(VirtRegMap)
|
||
|
INITIALIZE_PASS_DEPENDENCY(LiveRegMatrix)
|
||
|
INITIALIZE_PASS_END(SIPreAllocateWWMRegs, DEBUG_TYPE,
|
||
|
"SI Pre-allocate WWM Registers", false, false)
|
||
|
|
||
|
char SIPreAllocateWWMRegs::ID = 0;
|
||
|
|
||
|
char &llvm::SIPreAllocateWWMRegsID = SIPreAllocateWWMRegs::ID;
|
||
|
|
||
|
FunctionPass *llvm::createSIPreAllocateWWMRegsPass() {
|
||
|
return new SIPreAllocateWWMRegs();
|
||
|
}
|
||
|
|
||
|
bool SIPreAllocateWWMRegs::processDef(MachineOperand &MO) {
|
||
|
if (!MO.isReg())
|
||
|
return false;
|
||
|
|
||
|
Register Reg = MO.getReg();
|
||
|
if (Reg.isPhysical())
|
||
|
return false;
|
||
|
|
||
|
if (!TRI->isVGPR(*MRI, Reg))
|
||
|
return false;
|
||
|
|
||
|
if (VRM->hasPhys(Reg))
|
||
|
return false;
|
||
|
|
||
|
LiveInterval &LI = LIS->getInterval(Reg);
|
||
|
|
||
|
for (MCRegister PhysReg : RegClassInfo.getOrder(MRI->getRegClass(Reg))) {
|
||
|
if (!MRI->isPhysRegUsed(PhysReg) &&
|
||
|
Matrix->checkInterference(LI, PhysReg) == LiveRegMatrix::IK_Free) {
|
||
|
Matrix->assign(LI, PhysReg);
|
||
|
assert(PhysReg != 0);
|
||
|
RegsToRewrite.push_back(Reg);
|
||
|
return true;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
llvm_unreachable("physreg not found for WWM expression");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
void SIPreAllocateWWMRegs::rewriteRegs(MachineFunction &MF) {
|
||
|
for (MachineBasicBlock &MBB : MF) {
|
||
|
for (MachineInstr &MI : MBB) {
|
||
|
for (MachineOperand &MO : MI.operands()) {
|
||
|
if (!MO.isReg())
|
||
|
continue;
|
||
|
|
||
|
const Register VirtReg = MO.getReg();
|
||
|
if (VirtReg.isPhysical())
|
||
|
continue;
|
||
|
|
||
|
if (!VRM->hasPhys(VirtReg))
|
||
|
continue;
|
||
|
|
||
|
Register PhysReg = VRM->getPhys(VirtReg);
|
||
|
const unsigned SubReg = MO.getSubReg();
|
||
|
if (SubReg != 0) {
|
||
|
PhysReg = TRI->getSubReg(PhysReg, SubReg);
|
||
|
MO.setSubReg(0);
|
||
|
}
|
||
|
|
||
|
MO.setReg(PhysReg);
|
||
|
MO.setIsRenamable(false);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
|
||
|
|
||
|
for (unsigned Reg : RegsToRewrite) {
|
||
|
LIS->removeInterval(Reg);
|
||
|
|
||
|
const Register PhysReg = VRM->getPhys(Reg);
|
||
|
assert(PhysReg != 0);
|
||
|
MFI->ReserveWWMRegister(PhysReg);
|
||
|
}
|
||
|
|
||
|
RegsToRewrite.clear();
|
||
|
|
||
|
// Update the set of reserved registers to include WWM ones.
|
||
|
MRI->freezeReservedRegs(MF);
|
||
|
}
|
||
|
|
||
|
bool SIPreAllocateWWMRegs::runOnMachineFunction(MachineFunction &MF) {
|
||
|
LLVM_DEBUG(dbgs() << "SIPreAllocateWWMRegs: function " << MF.getName() << "\n");
|
||
|
|
||
|
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
|
||
|
|
||
|
TII = ST.getInstrInfo();
|
||
|
TRI = &TII->getRegisterInfo();
|
||
|
MRI = &MF.getRegInfo();
|
||
|
|
||
|
LIS = &getAnalysis<LiveIntervals>();
|
||
|
Matrix = &getAnalysis<LiveRegMatrix>();
|
||
|
VRM = &getAnalysis<VirtRegMap>();
|
||
|
|
||
|
RegClassInfo.runOnMachineFunction(MF);
|
||
|
|
||
|
bool RegsAssigned = false;
|
||
|
|
||
|
// We use a reverse post-order traversal of the control-flow graph to
|
||
|
// guarantee that we visit definitions in dominance order. Since WWM
|
||
|
// expressions are guaranteed to never involve phi nodes, and we can only
|
||
|
// escape WWM through the special WWM instruction, this means that this is a
|
||
|
// perfect elimination order, so we can never do any better.
|
||
|
ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
|
||
|
|
||
|
for (MachineBasicBlock *MBB : RPOT) {
|
||
|
bool InWWM = false;
|
||
|
for (MachineInstr &MI : *MBB) {
|
||
|
if (MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B32 ||
|
||
|
MI.getOpcode() == AMDGPU::V_SET_INACTIVE_B64)
|
||
|
RegsAssigned |= processDef(MI.getOperand(0));
|
||
|
|
||
|
if (MI.getOpcode() == AMDGPU::ENTER_WWM) {
|
||
|
LLVM_DEBUG(dbgs() << "entering WWM region: " << MI << "\n");
|
||
|
InWWM = true;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (MI.getOpcode() == AMDGPU::EXIT_WWM) {
|
||
|
LLVM_DEBUG(dbgs() << "exiting WWM region: " << MI << "\n");
|
||
|
InWWM = false;
|
||
|
}
|
||
|
|
||
|
if (!InWWM)
|
||
|
continue;
|
||
|
|
||
|
LLVM_DEBUG(dbgs() << "processing " << MI << "\n");
|
||
|
|
||
|
for (MachineOperand &DefOpnd : MI.defs()) {
|
||
|
RegsAssigned |= processDef(DefOpnd);
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if (!RegsAssigned)
|
||
|
return false;
|
||
|
|
||
|
rewriteRegs(MF);
|
||
|
return true;
|
||
|
}
|