You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
947 lines
27 KiB
947 lines
27 KiB
//==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
/// \file
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|
|
#define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|
|
|
|
#include "AMDGPUArgumentUsageInfo.h"
|
|
#include "AMDGPUMachineFunction.h"
|
|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
|
|
#include "SIInstrInfo.h"
|
|
#include "SIRegisterInfo.h"
|
|
#include "llvm/ADT/ArrayRef.h"
|
|
#include "llvm/ADT/DenseMap.h"
|
|
#include "llvm/ADT/Optional.h"
|
|
#include "llvm/ADT/STLExtras.h"
|
|
#include "llvm/ADT/SmallVector.h"
|
|
#include "llvm/ADT/SparseBitVector.h"
|
|
#include "llvm/CodeGen/MIRYamlMapping.h"
|
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
|
#include "llvm/CodeGen/TargetInstrInfo.h"
|
|
#include "llvm/MC/MCRegisterInfo.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
#include <array>
|
|
#include <cassert>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
namespace llvm {
|
|
|
|
class MachineFrameInfo;
|
|
class MachineFunction;
|
|
class TargetRegisterClass;
|
|
|
|
class AMDGPUPseudoSourceValue : public PseudoSourceValue {
|
|
public:
|
|
enum AMDGPUPSVKind : unsigned {
|
|
PSVBuffer = PseudoSourceValue::TargetCustom,
|
|
PSVImage,
|
|
GWSResource
|
|
};
|
|
|
|
protected:
|
|
AMDGPUPseudoSourceValue(unsigned Kind, const TargetInstrInfo &TII)
|
|
: PseudoSourceValue(Kind, TII) {}
|
|
|
|
public:
|
|
bool isConstant(const MachineFrameInfo *) const override {
|
|
// This should probably be true for most images, but we will start by being
|
|
// conservative.
|
|
return false;
|
|
}
|
|
|
|
bool isAliased(const MachineFrameInfo *) const override {
|
|
return true;
|
|
}
|
|
|
|
bool mayAlias(const MachineFrameInfo *) const override {
|
|
return true;
|
|
}
|
|
};
|
|
|
|
class AMDGPUBufferPseudoSourceValue final : public AMDGPUPseudoSourceValue {
|
|
public:
|
|
explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII)
|
|
: AMDGPUPseudoSourceValue(PSVBuffer, TII) {}
|
|
|
|
static bool classof(const PseudoSourceValue *V) {
|
|
return V->kind() == PSVBuffer;
|
|
}
|
|
};
|
|
|
|
class AMDGPUImagePseudoSourceValue final : public AMDGPUPseudoSourceValue {
|
|
public:
|
|
// TODO: Is the img rsrc useful?
|
|
explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII)
|
|
: AMDGPUPseudoSourceValue(PSVImage, TII) {}
|
|
|
|
static bool classof(const PseudoSourceValue *V) {
|
|
return V->kind() == PSVImage;
|
|
}
|
|
};
|
|
|
|
class AMDGPUGWSResourcePseudoSourceValue final : public AMDGPUPseudoSourceValue {
|
|
public:
|
|
explicit AMDGPUGWSResourcePseudoSourceValue(const TargetInstrInfo &TII)
|
|
: AMDGPUPseudoSourceValue(GWSResource, TII) {}
|
|
|
|
static bool classof(const PseudoSourceValue *V) {
|
|
return V->kind() == GWSResource;
|
|
}
|
|
|
|
// These are inaccessible memory from IR.
|
|
bool isAliased(const MachineFrameInfo *) const override {
|
|
return false;
|
|
}
|
|
|
|
// These are inaccessible memory from IR.
|
|
bool mayAlias(const MachineFrameInfo *) const override {
|
|
return false;
|
|
}
|
|
|
|
void printCustom(raw_ostream &OS) const override {
|
|
OS << "GWSResource";
|
|
}
|
|
};
|
|
|
|
namespace yaml {
|
|
|
|
struct SIArgument {
|
|
bool IsRegister;
|
|
union {
|
|
StringValue RegisterName;
|
|
unsigned StackOffset;
|
|
};
|
|
Optional<unsigned> Mask;
|
|
|
|
// Default constructor, which creates a stack argument.
|
|
SIArgument() : IsRegister(false), StackOffset(0) {}
|
|
SIArgument(const SIArgument &Other) {
|
|
IsRegister = Other.IsRegister;
|
|
if (IsRegister) {
|
|
::new ((void *)std::addressof(RegisterName))
|
|
StringValue(Other.RegisterName);
|
|
} else
|
|
StackOffset = Other.StackOffset;
|
|
Mask = Other.Mask;
|
|
}
|
|
SIArgument &operator=(const SIArgument &Other) {
|
|
IsRegister = Other.IsRegister;
|
|
if (IsRegister) {
|
|
::new ((void *)std::addressof(RegisterName))
|
|
StringValue(Other.RegisterName);
|
|
} else
|
|
StackOffset = Other.StackOffset;
|
|
Mask = Other.Mask;
|
|
return *this;
|
|
}
|
|
~SIArgument() {
|
|
if (IsRegister)
|
|
RegisterName.~StringValue();
|
|
}
|
|
|
|
// Helper to create a register or stack argument.
|
|
static inline SIArgument createArgument(bool IsReg) {
|
|
if (IsReg)
|
|
return SIArgument(IsReg);
|
|
return SIArgument();
|
|
}
|
|
|
|
private:
|
|
// Construct a register argument.
|
|
SIArgument(bool) : IsRegister(true), RegisterName() {}
|
|
};
|
|
|
|
template <> struct MappingTraits<SIArgument> {
|
|
static void mapping(IO &YamlIO, SIArgument &A) {
|
|
if (YamlIO.outputting()) {
|
|
if (A.IsRegister)
|
|
YamlIO.mapRequired("reg", A.RegisterName);
|
|
else
|
|
YamlIO.mapRequired("offset", A.StackOffset);
|
|
} else {
|
|
auto Keys = YamlIO.keys();
|
|
if (is_contained(Keys, "reg")) {
|
|
A = SIArgument::createArgument(true);
|
|
YamlIO.mapRequired("reg", A.RegisterName);
|
|
} else if (is_contained(Keys, "offset"))
|
|
YamlIO.mapRequired("offset", A.StackOffset);
|
|
else
|
|
YamlIO.setError("missing required key 'reg' or 'offset'");
|
|
}
|
|
YamlIO.mapOptional("mask", A.Mask);
|
|
}
|
|
static const bool flow = true;
|
|
};
|
|
|
|
struct SIArgumentInfo {
|
|
Optional<SIArgument> PrivateSegmentBuffer;
|
|
Optional<SIArgument> DispatchPtr;
|
|
Optional<SIArgument> QueuePtr;
|
|
Optional<SIArgument> KernargSegmentPtr;
|
|
Optional<SIArgument> DispatchID;
|
|
Optional<SIArgument> FlatScratchInit;
|
|
Optional<SIArgument> PrivateSegmentSize;
|
|
|
|
Optional<SIArgument> WorkGroupIDX;
|
|
Optional<SIArgument> WorkGroupIDY;
|
|
Optional<SIArgument> WorkGroupIDZ;
|
|
Optional<SIArgument> WorkGroupInfo;
|
|
Optional<SIArgument> PrivateSegmentWaveByteOffset;
|
|
|
|
Optional<SIArgument> ImplicitArgPtr;
|
|
Optional<SIArgument> ImplicitBufferPtr;
|
|
|
|
Optional<SIArgument> WorkItemIDX;
|
|
Optional<SIArgument> WorkItemIDY;
|
|
Optional<SIArgument> WorkItemIDZ;
|
|
};
|
|
|
|
template <> struct MappingTraits<SIArgumentInfo> {
|
|
static void mapping(IO &YamlIO, SIArgumentInfo &AI) {
|
|
YamlIO.mapOptional("privateSegmentBuffer", AI.PrivateSegmentBuffer);
|
|
YamlIO.mapOptional("dispatchPtr", AI.DispatchPtr);
|
|
YamlIO.mapOptional("queuePtr", AI.QueuePtr);
|
|
YamlIO.mapOptional("kernargSegmentPtr", AI.KernargSegmentPtr);
|
|
YamlIO.mapOptional("dispatchID", AI.DispatchID);
|
|
YamlIO.mapOptional("flatScratchInit", AI.FlatScratchInit);
|
|
YamlIO.mapOptional("privateSegmentSize", AI.PrivateSegmentSize);
|
|
|
|
YamlIO.mapOptional("workGroupIDX", AI.WorkGroupIDX);
|
|
YamlIO.mapOptional("workGroupIDY", AI.WorkGroupIDY);
|
|
YamlIO.mapOptional("workGroupIDZ", AI.WorkGroupIDZ);
|
|
YamlIO.mapOptional("workGroupInfo", AI.WorkGroupInfo);
|
|
YamlIO.mapOptional("privateSegmentWaveByteOffset",
|
|
AI.PrivateSegmentWaveByteOffset);
|
|
|
|
YamlIO.mapOptional("implicitArgPtr", AI.ImplicitArgPtr);
|
|
YamlIO.mapOptional("implicitBufferPtr", AI.ImplicitBufferPtr);
|
|
|
|
YamlIO.mapOptional("workItemIDX", AI.WorkItemIDX);
|
|
YamlIO.mapOptional("workItemIDY", AI.WorkItemIDY);
|
|
YamlIO.mapOptional("workItemIDZ", AI.WorkItemIDZ);
|
|
}
|
|
};
|
|
|
|
// Default to default mode for default calling convention.
|
|
struct SIMode {
|
|
bool IEEE = true;
|
|
bool DX10Clamp = true;
|
|
bool FP32InputDenormals = true;
|
|
bool FP32OutputDenormals = true;
|
|
bool FP64FP16InputDenormals = true;
|
|
bool FP64FP16OutputDenormals = true;
|
|
|
|
SIMode() = default;
|
|
|
|
SIMode(const AMDGPU::SIModeRegisterDefaults &Mode) {
|
|
IEEE = Mode.IEEE;
|
|
DX10Clamp = Mode.DX10Clamp;
|
|
FP32InputDenormals = Mode.FP32InputDenormals;
|
|
FP32OutputDenormals = Mode.FP32OutputDenormals;
|
|
FP64FP16InputDenormals = Mode.FP64FP16InputDenormals;
|
|
FP64FP16OutputDenormals = Mode.FP64FP16OutputDenormals;
|
|
}
|
|
|
|
bool operator ==(const SIMode Other) const {
|
|
return IEEE == Other.IEEE &&
|
|
DX10Clamp == Other.DX10Clamp &&
|
|
FP32InputDenormals == Other.FP32InputDenormals &&
|
|
FP32OutputDenormals == Other.FP32OutputDenormals &&
|
|
FP64FP16InputDenormals == Other.FP64FP16InputDenormals &&
|
|
FP64FP16OutputDenormals == Other.FP64FP16OutputDenormals;
|
|
}
|
|
};
|
|
|
|
template <> struct MappingTraits<SIMode> {
|
|
static void mapping(IO &YamlIO, SIMode &Mode) {
|
|
YamlIO.mapOptional("ieee", Mode.IEEE, true);
|
|
YamlIO.mapOptional("dx10-clamp", Mode.DX10Clamp, true);
|
|
YamlIO.mapOptional("fp32-input-denormals", Mode.FP32InputDenormals, true);
|
|
YamlIO.mapOptional("fp32-output-denormals", Mode.FP32OutputDenormals, true);
|
|
YamlIO.mapOptional("fp64-fp16-input-denormals", Mode.FP64FP16InputDenormals, true);
|
|
YamlIO.mapOptional("fp64-fp16-output-denormals", Mode.FP64FP16OutputDenormals, true);
|
|
}
|
|
};
|
|
|
|
struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
|
|
uint64_t ExplicitKernArgSize = 0;
|
|
unsigned MaxKernArgAlign = 0;
|
|
unsigned LDSSize = 0;
|
|
Align DynLDSAlign;
|
|
bool IsEntryFunction = false;
|
|
bool NoSignedZerosFPMath = false;
|
|
bool MemoryBound = false;
|
|
bool WaveLimiter = false;
|
|
bool HasSpilledSGPRs = false;
|
|
bool HasSpilledVGPRs = false;
|
|
uint32_t HighBitsOf32BitAddress = 0;
|
|
|
|
StringValue ScratchRSrcReg = "$private_rsrc_reg";
|
|
StringValue FrameOffsetReg = "$fp_reg";
|
|
StringValue StackPtrOffsetReg = "$sp_reg";
|
|
|
|
Optional<SIArgumentInfo> ArgInfo;
|
|
SIMode Mode;
|
|
|
|
SIMachineFunctionInfo() = default;
|
|
SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
|
|
const TargetRegisterInfo &TRI);
|
|
|
|
void mappingImpl(yaml::IO &YamlIO) override;
|
|
~SIMachineFunctionInfo() = default;
|
|
};
|
|
|
|
template <> struct MappingTraits<SIMachineFunctionInfo> {
|
|
static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
|
|
YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
|
|
UINT64_C(0));
|
|
YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
|
|
YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
|
|
YamlIO.mapOptional("dynLDSAlign", MFI.DynLDSAlign, Align());
|
|
YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
|
|
YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
|
|
YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
|
|
YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
|
|
YamlIO.mapOptional("hasSpilledSGPRs", MFI.HasSpilledSGPRs, false);
|
|
YamlIO.mapOptional("hasSpilledVGPRs", MFI.HasSpilledVGPRs, false);
|
|
YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
|
|
StringValue("$private_rsrc_reg"));
|
|
YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
|
|
StringValue("$fp_reg"));
|
|
YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
|
|
StringValue("$sp_reg"));
|
|
YamlIO.mapOptional("argumentInfo", MFI.ArgInfo);
|
|
YamlIO.mapOptional("mode", MFI.Mode, SIMode());
|
|
YamlIO.mapOptional("highBitsOf32BitAddress",
|
|
MFI.HighBitsOf32BitAddress, 0u);
|
|
}
|
|
};
|
|
|
|
} // end namespace yaml
|
|
|
|
/// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
|
|
/// tells the hardware which interpolation parameters to load.
|
|
class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
|
|
friend class GCNTargetMachine;
|
|
|
|
Register TIDReg = AMDGPU::NoRegister;
|
|
|
|
// Registers that may be reserved for spilling purposes. These may be the same
|
|
// as the input registers.
|
|
Register ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
|
|
|
|
// This is the the unswizzled offset from the current dispatch's scratch wave
|
|
// base to the beginning of the current function's frame.
|
|
Register FrameOffsetReg = AMDGPU::FP_REG;
|
|
|
|
// This is an ABI register used in the non-entry calling convention to
|
|
// communicate the unswizzled offset from the current dispatch's scratch wave
|
|
// base to the beginning of the new function's frame.
|
|
Register StackPtrOffsetReg = AMDGPU::SP_REG;
|
|
|
|
AMDGPUFunctionArgInfo ArgInfo;
|
|
|
|
// Graphics info.
|
|
unsigned PSInputAddr = 0;
|
|
unsigned PSInputEnable = 0;
|
|
|
|
/// Number of bytes of arguments this function has on the stack. If the callee
|
|
/// is expected to restore the argument stack this should be a multiple of 16,
|
|
/// all usable during a tail call.
|
|
///
|
|
/// The alternative would forbid tail call optimisation in some cases: if we
|
|
/// want to transfer control from a function with 8-bytes of stack-argument
|
|
/// space to a function with 16-bytes then misalignment of this value would
|
|
/// make a stack adjustment necessary, which could not be undone by the
|
|
/// callee.
|
|
unsigned BytesInStackArgArea = 0;
|
|
|
|
bool ReturnsVoid = true;
|
|
|
|
// A pair of default/requested minimum/maximum flat work group sizes.
|
|
// Minimum - first, maximum - second.
|
|
std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
|
|
|
|
// A pair of default/requested minimum/maximum number of waves per execution
|
|
// unit. Minimum - first, maximum - second.
|
|
std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
|
|
|
|
DenseMap<const Value *,
|
|
std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
|
|
DenseMap<const Value *,
|
|
std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
|
|
std::unique_ptr<const AMDGPUGWSResourcePseudoSourceValue> GWSResourcePSV;
|
|
|
|
private:
|
|
unsigned LDSWaveSpillSize = 0;
|
|
unsigned NumUserSGPRs = 0;
|
|
unsigned NumSystemSGPRs = 0;
|
|
|
|
bool HasSpilledSGPRs = false;
|
|
bool HasSpilledVGPRs = false;
|
|
bool HasNonSpillStackObjects = false;
|
|
bool IsStackRealigned = false;
|
|
|
|
unsigned NumSpilledSGPRs = 0;
|
|
unsigned NumSpilledVGPRs = 0;
|
|
|
|
// Feature bits required for inputs passed in user SGPRs.
|
|
bool PrivateSegmentBuffer : 1;
|
|
bool DispatchPtr : 1;
|
|
bool QueuePtr : 1;
|
|
bool KernargSegmentPtr : 1;
|
|
bool DispatchID : 1;
|
|
bool FlatScratchInit : 1;
|
|
|
|
// Feature bits required for inputs passed in system SGPRs.
|
|
bool WorkGroupIDX : 1; // Always initialized.
|
|
bool WorkGroupIDY : 1;
|
|
bool WorkGroupIDZ : 1;
|
|
bool WorkGroupInfo : 1;
|
|
bool PrivateSegmentWaveByteOffset : 1;
|
|
|
|
bool WorkItemIDX : 1; // Always initialized.
|
|
bool WorkItemIDY : 1;
|
|
bool WorkItemIDZ : 1;
|
|
|
|
// Private memory buffer
|
|
// Compute directly in sgpr[0:1]
|
|
// Other shaders indirect 64-bits at sgpr[0:1]
|
|
bool ImplicitBufferPtr : 1;
|
|
|
|
// Pointer to where the ABI inserts special kernel arguments separate from the
|
|
// user arguments. This is an offset from the KernargSegmentPtr.
|
|
bool ImplicitArgPtr : 1;
|
|
|
|
// The hard-wired high half of the address of the global information table
|
|
// for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
|
|
// current hardware only allows a 16 bit value.
|
|
unsigned GITPtrHigh;
|
|
|
|
unsigned HighBitsOf32BitAddress;
|
|
unsigned GDSSize;
|
|
|
|
// Current recorded maximum possible occupancy.
|
|
unsigned Occupancy;
|
|
|
|
MCPhysReg getNextUserSGPR() const;
|
|
|
|
MCPhysReg getNextSystemSGPR() const;
|
|
|
|
public:
|
|
struct SpilledReg {
|
|
Register VGPR;
|
|
int Lane = -1;
|
|
|
|
SpilledReg() = default;
|
|
SpilledReg(Register R, int L) : VGPR (R), Lane (L) {}
|
|
|
|
bool hasLane() { return Lane != -1;}
|
|
bool hasReg() { return VGPR != 0;}
|
|
};
|
|
|
|
struct SGPRSpillVGPRCSR {
|
|
// VGPR used for SGPR spills
|
|
Register VGPR;
|
|
|
|
// If the VGPR is a CSR, the stack slot used to save/restore it in the
|
|
// prolog/epilog.
|
|
Optional<int> FI;
|
|
|
|
SGPRSpillVGPRCSR(Register V, Optional<int> F) : VGPR(V), FI(F) {}
|
|
};
|
|
|
|
struct VGPRSpillToAGPR {
|
|
SmallVector<MCPhysReg, 32> Lanes;
|
|
bool FullyAllocated = false;
|
|
};
|
|
|
|
SparseBitVector<> WWMReservedRegs;
|
|
|
|
void ReserveWWMRegister(Register Reg) { WWMReservedRegs.set(Reg); }
|
|
|
|
private:
|
|
// Track VGPR + wave index for each subregister of the SGPR spilled to
|
|
// frameindex key.
|
|
DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
|
|
unsigned NumVGPRSpillLanes = 0;
|
|
SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
|
|
|
|
DenseMap<int, VGPRSpillToAGPR> VGPRToAGPRSpills;
|
|
|
|
// AGPRs used for VGPR spills.
|
|
SmallVector<MCPhysReg, 32> SpillAGPR;
|
|
|
|
// VGPRs used for AGPR spills.
|
|
SmallVector<MCPhysReg, 32> SpillVGPR;
|
|
|
|
public: // FIXME
|
|
/// If this is set, an SGPR used for save/restore of the register used for the
|
|
/// frame pointer.
|
|
Register SGPRForFPSaveRestoreCopy;
|
|
Optional<int> FramePointerSaveIndex;
|
|
|
|
/// If this is set, an SGPR used for save/restore of the register used for the
|
|
/// base pointer.
|
|
Register SGPRForBPSaveRestoreCopy;
|
|
Optional<int> BasePointerSaveIndex;
|
|
|
|
Register VGPRReservedForSGPRSpill;
|
|
bool isCalleeSavedReg(const MCPhysReg *CSRegs, MCPhysReg Reg);
|
|
|
|
public:
|
|
SIMachineFunctionInfo(const MachineFunction &MF);
|
|
|
|
bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
|
|
|
|
ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
|
|
auto I = SGPRToVGPRSpills.find(FrameIndex);
|
|
return (I == SGPRToVGPRSpills.end()) ?
|
|
ArrayRef<SpilledReg>() : makeArrayRef(I->second);
|
|
}
|
|
|
|
ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
|
|
return SpillVGPRs;
|
|
}
|
|
|
|
void setSGPRSpillVGPRs(Register NewVGPR, Optional<int> newFI, int Index) {
|
|
SpillVGPRs[Index].VGPR = NewVGPR;
|
|
SpillVGPRs[Index].FI = newFI;
|
|
VGPRReservedForSGPRSpill = NewVGPR;
|
|
}
|
|
|
|
bool removeVGPRForSGPRSpill(Register ReservedVGPR, MachineFunction &MF);
|
|
|
|
ArrayRef<MCPhysReg> getAGPRSpillVGPRs() const {
|
|
return SpillAGPR;
|
|
}
|
|
|
|
ArrayRef<MCPhysReg> getVGPRSpillAGPRs() const {
|
|
return SpillVGPR;
|
|
}
|
|
|
|
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const {
|
|
auto I = VGPRToAGPRSpills.find(FrameIndex);
|
|
return (I == VGPRToAGPRSpills.end()) ? (MCPhysReg)AMDGPU::NoRegister
|
|
: I->second.Lanes[Lane];
|
|
}
|
|
|
|
bool haveFreeLanesForSGPRSpill(const MachineFunction &MF,
|
|
unsigned NumLane) const;
|
|
bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
|
|
bool reserveVGPRforSGPRSpills(MachineFunction &MF);
|
|
bool allocateVGPRSpillToAGPR(MachineFunction &MF, int FI, bool isAGPRtoVGPR);
|
|
void removeDeadFrameIndices(MachineFrameInfo &MFI);
|
|
|
|
bool hasCalculatedTID() const { return TIDReg != 0; };
|
|
Register getTIDReg() const { return TIDReg; };
|
|
void setTIDReg(Register Reg) { TIDReg = Reg; }
|
|
|
|
unsigned getBytesInStackArgArea() const {
|
|
return BytesInStackArgArea;
|
|
}
|
|
|
|
void setBytesInStackArgArea(unsigned Bytes) {
|
|
BytesInStackArgArea = Bytes;
|
|
}
|
|
|
|
// Add user SGPRs.
|
|
Register addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
|
|
Register addDispatchPtr(const SIRegisterInfo &TRI);
|
|
Register addQueuePtr(const SIRegisterInfo &TRI);
|
|
Register addKernargSegmentPtr(const SIRegisterInfo &TRI);
|
|
Register addDispatchID(const SIRegisterInfo &TRI);
|
|
Register addFlatScratchInit(const SIRegisterInfo &TRI);
|
|
Register addImplicitBufferPtr(const SIRegisterInfo &TRI);
|
|
|
|
// Add system SGPRs.
|
|
Register addWorkGroupIDX() {
|
|
ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
|
|
NumSystemSGPRs += 1;
|
|
return ArgInfo.WorkGroupIDX.getRegister();
|
|
}
|
|
|
|
Register addWorkGroupIDY() {
|
|
ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
|
|
NumSystemSGPRs += 1;
|
|
return ArgInfo.WorkGroupIDY.getRegister();
|
|
}
|
|
|
|
Register addWorkGroupIDZ() {
|
|
ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
|
|
NumSystemSGPRs += 1;
|
|
return ArgInfo.WorkGroupIDZ.getRegister();
|
|
}
|
|
|
|
Register addWorkGroupInfo() {
|
|
ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
|
|
NumSystemSGPRs += 1;
|
|
return ArgInfo.WorkGroupInfo.getRegister();
|
|
}
|
|
|
|
// Add special VGPR inputs
|
|
void setWorkItemIDX(ArgDescriptor Arg) {
|
|
ArgInfo.WorkItemIDX = Arg;
|
|
}
|
|
|
|
void setWorkItemIDY(ArgDescriptor Arg) {
|
|
ArgInfo.WorkItemIDY = Arg;
|
|
}
|
|
|
|
void setWorkItemIDZ(ArgDescriptor Arg) {
|
|
ArgInfo.WorkItemIDZ = Arg;
|
|
}
|
|
|
|
Register addPrivateSegmentWaveByteOffset() {
|
|
ArgInfo.PrivateSegmentWaveByteOffset
|
|
= ArgDescriptor::createRegister(getNextSystemSGPR());
|
|
NumSystemSGPRs += 1;
|
|
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
|
|
}
|
|
|
|
void setPrivateSegmentWaveByteOffset(Register Reg) {
|
|
ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
|
|
}
|
|
|
|
bool hasPrivateSegmentBuffer() const {
|
|
return PrivateSegmentBuffer;
|
|
}
|
|
|
|
bool hasDispatchPtr() const {
|
|
return DispatchPtr;
|
|
}
|
|
|
|
bool hasQueuePtr() const {
|
|
return QueuePtr;
|
|
}
|
|
|
|
bool hasKernargSegmentPtr() const {
|
|
return KernargSegmentPtr;
|
|
}
|
|
|
|
bool hasDispatchID() const {
|
|
return DispatchID;
|
|
}
|
|
|
|
bool hasFlatScratchInit() const {
|
|
return FlatScratchInit;
|
|
}
|
|
|
|
bool hasWorkGroupIDX() const {
|
|
return WorkGroupIDX;
|
|
}
|
|
|
|
bool hasWorkGroupIDY() const {
|
|
return WorkGroupIDY;
|
|
}
|
|
|
|
bool hasWorkGroupIDZ() const {
|
|
return WorkGroupIDZ;
|
|
}
|
|
|
|
bool hasWorkGroupInfo() const {
|
|
return WorkGroupInfo;
|
|
}
|
|
|
|
bool hasPrivateSegmentWaveByteOffset() const {
|
|
return PrivateSegmentWaveByteOffset;
|
|
}
|
|
|
|
bool hasWorkItemIDX() const {
|
|
return WorkItemIDX;
|
|
}
|
|
|
|
bool hasWorkItemIDY() const {
|
|
return WorkItemIDY;
|
|
}
|
|
|
|
bool hasWorkItemIDZ() const {
|
|
return WorkItemIDZ;
|
|
}
|
|
|
|
bool hasImplicitArgPtr() const {
|
|
return ImplicitArgPtr;
|
|
}
|
|
|
|
bool hasImplicitBufferPtr() const {
|
|
return ImplicitBufferPtr;
|
|
}
|
|
|
|
AMDGPUFunctionArgInfo &getArgInfo() {
|
|
return ArgInfo;
|
|
}
|
|
|
|
const AMDGPUFunctionArgInfo &getArgInfo() const {
|
|
return ArgInfo;
|
|
}
|
|
|
|
std::tuple<const ArgDescriptor *, const TargetRegisterClass *, LLT>
|
|
getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
|
return ArgInfo.getPreloadedValue(Value);
|
|
}
|
|
|
|
MCRegister getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
|
|
auto Arg = std::get<0>(ArgInfo.getPreloadedValue(Value));
|
|
return Arg ? Arg->getRegister() : MCRegister();
|
|
}
|
|
|
|
unsigned getGITPtrHigh() const {
|
|
return GITPtrHigh;
|
|
}
|
|
|
|
Register getGITPtrLoReg(const MachineFunction &MF) const;
|
|
|
|
uint32_t get32BitAddressHighBits() const {
|
|
return HighBitsOf32BitAddress;
|
|
}
|
|
|
|
unsigned getGDSSize() const {
|
|
return GDSSize;
|
|
}
|
|
|
|
unsigned getNumUserSGPRs() const {
|
|
return NumUserSGPRs;
|
|
}
|
|
|
|
unsigned getNumPreloadedSGPRs() const {
|
|
return NumUserSGPRs + NumSystemSGPRs;
|
|
}
|
|
|
|
Register getPrivateSegmentWaveByteOffsetSystemSGPR() const {
|
|
return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
|
|
}
|
|
|
|
/// Returns the physical register reserved for use as the resource
|
|
/// descriptor for scratch accesses.
|
|
Register getScratchRSrcReg() const {
|
|
return ScratchRSrcReg;
|
|
}
|
|
|
|
void setScratchRSrcReg(Register Reg) {
|
|
assert(Reg != 0 && "Should never be unset");
|
|
ScratchRSrcReg = Reg;
|
|
}
|
|
|
|
Register getFrameOffsetReg() const {
|
|
return FrameOffsetReg;
|
|
}
|
|
|
|
void setFrameOffsetReg(Register Reg) {
|
|
assert(Reg != 0 && "Should never be unset");
|
|
FrameOffsetReg = Reg;
|
|
}
|
|
|
|
void setStackPtrOffsetReg(Register Reg) {
|
|
assert(Reg != 0 && "Should never be unset");
|
|
StackPtrOffsetReg = Reg;
|
|
}
|
|
|
|
// Note the unset value for this is AMDGPU::SP_REG rather than
|
|
// NoRegister. This is mostly a workaround for MIR tests where state that
|
|
// can't be directly computed from the function is not preserved in serialized
|
|
// MIR.
|
|
Register getStackPtrOffsetReg() const {
|
|
return StackPtrOffsetReg;
|
|
}
|
|
|
|
Register getQueuePtrUserSGPR() const {
|
|
return ArgInfo.QueuePtr.getRegister();
|
|
}
|
|
|
|
Register getImplicitBufferPtrUserSGPR() const {
|
|
return ArgInfo.ImplicitBufferPtr.getRegister();
|
|
}
|
|
|
|
bool hasSpilledSGPRs() const {
|
|
return HasSpilledSGPRs;
|
|
}
|
|
|
|
void setHasSpilledSGPRs(bool Spill = true) {
|
|
HasSpilledSGPRs = Spill;
|
|
}
|
|
|
|
bool hasSpilledVGPRs() const {
|
|
return HasSpilledVGPRs;
|
|
}
|
|
|
|
void setHasSpilledVGPRs(bool Spill = true) {
|
|
HasSpilledVGPRs = Spill;
|
|
}
|
|
|
|
bool hasNonSpillStackObjects() const {
|
|
return HasNonSpillStackObjects;
|
|
}
|
|
|
|
void setHasNonSpillStackObjects(bool StackObject = true) {
|
|
HasNonSpillStackObjects = StackObject;
|
|
}
|
|
|
|
bool isStackRealigned() const {
|
|
return IsStackRealigned;
|
|
}
|
|
|
|
void setIsStackRealigned(bool Realigned = true) {
|
|
IsStackRealigned = Realigned;
|
|
}
|
|
|
|
unsigned getNumSpilledSGPRs() const {
|
|
return NumSpilledSGPRs;
|
|
}
|
|
|
|
unsigned getNumSpilledVGPRs() const {
|
|
return NumSpilledVGPRs;
|
|
}
|
|
|
|
void addToSpilledSGPRs(unsigned num) {
|
|
NumSpilledSGPRs += num;
|
|
}
|
|
|
|
void addToSpilledVGPRs(unsigned num) {
|
|
NumSpilledVGPRs += num;
|
|
}
|
|
|
|
unsigned getPSInputAddr() const {
|
|
return PSInputAddr;
|
|
}
|
|
|
|
unsigned getPSInputEnable() const {
|
|
return PSInputEnable;
|
|
}
|
|
|
|
bool isPSInputAllocated(unsigned Index) const {
|
|
return PSInputAddr & (1 << Index);
|
|
}
|
|
|
|
void markPSInputAllocated(unsigned Index) {
|
|
PSInputAddr |= 1 << Index;
|
|
}
|
|
|
|
void markPSInputEnabled(unsigned Index) {
|
|
PSInputEnable |= 1 << Index;
|
|
}
|
|
|
|
bool returnsVoid() const {
|
|
return ReturnsVoid;
|
|
}
|
|
|
|
void setIfReturnsVoid(bool Value) {
|
|
ReturnsVoid = Value;
|
|
}
|
|
|
|
/// \returns A pair of default/requested minimum/maximum flat work group sizes
|
|
/// for this function.
|
|
std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
|
|
return FlatWorkGroupSizes;
|
|
}
|
|
|
|
/// \returns Default/requested minimum flat work group size for this function.
|
|
unsigned getMinFlatWorkGroupSize() const {
|
|
return FlatWorkGroupSizes.first;
|
|
}
|
|
|
|
/// \returns Default/requested maximum flat work group size for this function.
|
|
unsigned getMaxFlatWorkGroupSize() const {
|
|
return FlatWorkGroupSizes.second;
|
|
}
|
|
|
|
/// \returns A pair of default/requested minimum/maximum number of waves per
|
|
/// execution unit.
|
|
std::pair<unsigned, unsigned> getWavesPerEU() const {
|
|
return WavesPerEU;
|
|
}
|
|
|
|
/// \returns Default/requested minimum number of waves per execution unit.
|
|
unsigned getMinWavesPerEU() const {
|
|
return WavesPerEU.first;
|
|
}
|
|
|
|
/// \returns Default/requested maximum number of waves per execution unit.
|
|
unsigned getMaxWavesPerEU() const {
|
|
return WavesPerEU.second;
|
|
}
|
|
|
|
/// \returns SGPR used for \p Dim's work group ID.
|
|
Register getWorkGroupIDSGPR(unsigned Dim) const {
|
|
switch (Dim) {
|
|
case 0:
|
|
assert(hasWorkGroupIDX());
|
|
return ArgInfo.WorkGroupIDX.getRegister();
|
|
case 1:
|
|
assert(hasWorkGroupIDY());
|
|
return ArgInfo.WorkGroupIDY.getRegister();
|
|
case 2:
|
|
assert(hasWorkGroupIDZ());
|
|
return ArgInfo.WorkGroupIDZ.getRegister();
|
|
}
|
|
llvm_unreachable("unexpected dimension");
|
|
}
|
|
|
|
unsigned getLDSWaveSpillSize() const {
|
|
return LDSWaveSpillSize;
|
|
}
|
|
|
|
const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
|
|
const Value *BufferRsrc) {
|
|
assert(BufferRsrc);
|
|
auto PSV = BufferPSVs.try_emplace(
|
|
BufferRsrc,
|
|
std::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
|
|
return PSV.first->second.get();
|
|
}
|
|
|
|
const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
|
|
const Value *ImgRsrc) {
|
|
assert(ImgRsrc);
|
|
auto PSV = ImagePSVs.try_emplace(
|
|
ImgRsrc,
|
|
std::make_unique<AMDGPUImagePseudoSourceValue>(TII));
|
|
return PSV.first->second.get();
|
|
}
|
|
|
|
const AMDGPUGWSResourcePseudoSourceValue *getGWSPSV(const SIInstrInfo &TII) {
|
|
if (!GWSResourcePSV) {
|
|
GWSResourcePSV =
|
|
std::make_unique<AMDGPUGWSResourcePseudoSourceValue>(TII);
|
|
}
|
|
|
|
return GWSResourcePSV.get();
|
|
}
|
|
|
|
unsigned getOccupancy() const {
|
|
return Occupancy;
|
|
}
|
|
|
|
unsigned getMinAllowedOccupancy() const {
|
|
if (!isMemoryBound() && !needsWaveLimiter())
|
|
return Occupancy;
|
|
return (Occupancy < 4) ? Occupancy : 4;
|
|
}
|
|
|
|
void limitOccupancy(const MachineFunction &MF);
|
|
|
|
void limitOccupancy(unsigned Limit) {
|
|
if (Occupancy > Limit)
|
|
Occupancy = Limit;
|
|
}
|
|
|
|
void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
|
|
if (Occupancy < Limit)
|
|
Occupancy = Limit;
|
|
limitOccupancy(MF);
|
|
}
|
|
};
|
|
|
|
} // end namespace llvm
|
|
|
|
#endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
|