You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
149 lines
4.5 KiB
149 lines
4.5 KiB
//===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file This file contains a DAG scheduling mutation to cluster shader
|
|
/// exports.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "AMDGPUExportClustering.h"
|
|
#include "AMDGPUSubtarget.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "SIInstrInfo.h"
|
|
|
|
using namespace llvm;
|
|
|
|
namespace {
|
|
|
|
class ExportClustering : public ScheduleDAGMutation {
|
|
public:
|
|
ExportClustering() {}
|
|
void apply(ScheduleDAGInstrs *DAG) override;
|
|
};
|
|
|
|
static bool isExport(const SUnit &SU) {
|
|
return SIInstrInfo::isEXP(*SU.getInstr());
|
|
}
|
|
|
|
static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
|
|
const MachineInstr *MI = SU->getInstr();
|
|
int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
|
|
return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;
|
|
}
|
|
|
|
static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
|
|
unsigned PosCount) {
|
|
if (!PosCount || PosCount == Chain.size())
|
|
return;
|
|
|
|
// Position exports should occur as soon as possible in the shader
|
|
// for optimal performance. This moves position exports before
|
|
// other exports while preserving the order within different export
|
|
// types (pos or other).
|
|
SmallVector<SUnit *, 8> Copy(Chain);
|
|
unsigned PosIdx = 0;
|
|
unsigned OtherIdx = PosCount;
|
|
for (SUnit *SU : Copy) {
|
|
if (isPositionExport(TII, SU))
|
|
Chain[PosIdx++] = SU;
|
|
else
|
|
Chain[OtherIdx++] = SU;
|
|
}
|
|
}
|
|
|
|
static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
|
|
SUnit *ChainHead = Exports.front();
|
|
|
|
// Now construct cluster from chain by adding new edges.
|
|
for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
|
|
SUnit *SUa = Exports[Idx];
|
|
SUnit *SUb = Exports[Idx + 1];
|
|
|
|
// Copy all dependencies to the head of the chain to avoid any
|
|
// computation being inserted into the chain.
|
|
for (const SDep &Pred : SUb->Preds) {
|
|
SUnit *PredSU = Pred.getSUnit();
|
|
if (!isExport(*PredSU) && !Pred.isWeak())
|
|
DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
|
|
}
|
|
|
|
// New barrier edge ordering exports
|
|
DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
|
|
// Also add cluster edge
|
|
DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
|
|
}
|
|
}
|
|
|
|
static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
|
|
SmallVector<SDep, 2> ToAdd, ToRemove;
|
|
|
|
for (const SDep &Pred : SU.Preds) {
|
|
SUnit *PredSU = Pred.getSUnit();
|
|
if (Pred.isBarrier() && isExport(*PredSU)) {
|
|
ToRemove.push_back(Pred);
|
|
if (isExport(SU))
|
|
continue;
|
|
|
|
// If we remove a barrier we need to copy dependencies
|
|
// from the predecessor to maintain order.
|
|
for (const SDep &ExportPred : PredSU->Preds) {
|
|
SUnit *ExportPredSU = ExportPred.getSUnit();
|
|
if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
|
|
ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
|
|
}
|
|
}
|
|
}
|
|
|
|
for (SDep Pred : ToRemove)
|
|
SU.removePred(Pred);
|
|
for (SDep Pred : ToAdd)
|
|
DAG->addEdge(&SU, Pred);
|
|
}
|
|
|
|
void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
|
|
const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
|
|
|
|
SmallVector<SUnit *, 8> Chain;
|
|
|
|
// Pass through DAG gathering a list of exports and removing barrier edges
|
|
// creating dependencies on exports. Freeing exports of successor edges
|
|
// allows more scheduling freedom, and nothing should be order dependent
|
|
// on exports. Edges will be added later to order the exports.
|
|
unsigned PosCount = 0;
|
|
for (SUnit &SU : DAG->SUnits) {
|
|
if (!isExport(SU))
|
|
continue;
|
|
|
|
Chain.push_back(&SU);
|
|
if (isPositionExport(TII, &SU))
|
|
PosCount++;
|
|
|
|
removeExportDependencies(DAG, SU);
|
|
|
|
SmallVector<SDep, 4> Succs(SU.Succs);
|
|
for (SDep Succ : Succs)
|
|
removeExportDependencies(DAG, *Succ.getSUnit());
|
|
}
|
|
|
|
// Apply clustering if there are multiple exports
|
|
if (Chain.size() > 1) {
|
|
sortChain(TII, Chain, PosCount);
|
|
buildCluster(Chain, DAG);
|
|
}
|
|
}
|
|
|
|
} // end namespace
|
|
|
|
namespace llvm {
|
|
|
|
std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
|
|
return std::make_unique<ExportClustering>();
|
|
}
|
|
|
|
} // end namespace llvm
|