You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
164 lines
6.2 KiB
164 lines
6.2 KiB
//===- ConvertKernelFuncToBlob.cpp - MLIR GPU lowering passes -------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// This file implements a pass to convert gpu kernel functions into a
|
|
// corresponding binary blob that can be executed on a GPU. Currently
|
|
// only translates the function itself but no dependencies.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
|
|
|
|
#include "mlir/Dialect/GPU/GPUDialect.h"
|
|
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
|
|
#include "mlir/IR/Attributes.h"
|
|
#include "mlir/IR/Builders.h"
|
|
#include "mlir/IR/BuiltinOps.h"
|
|
#include "mlir/Pass/Pass.h"
|
|
#include "mlir/Pass/PassRegistry.h"
|
|
#include "mlir/Support/LogicalResult.h"
|
|
|
|
#include "llvm/ADT/Optional.h"
|
|
#include "llvm/ADT/Twine.h"
|
|
#include "llvm/IR/Constants.h"
|
|
#include "llvm/IR/LegacyPassManager.h"
|
|
#include "llvm/IR/Module.h"
|
|
#include "llvm/Support/Error.h"
|
|
#include "llvm/Support/Mutex.h"
|
|
#include "llvm/Support/TargetRegistry.h"
|
|
#include "llvm/Support/TargetSelect.h"
|
|
#include "llvm/Target/TargetMachine.h"
|
|
|
|
using namespace mlir;
|
|
|
|
namespace {
|
|
|
|
/// A pass converting tagged kernel modules to a blob with target instructions.
|
|
///
|
|
/// If tagged as a kernel module, each contained function is translated to
|
|
/// user-specified IR. A user provided BlobGenerator then compiles the IR to
|
|
/// GPU binary code, which is then attached as an attribute to the function.
|
|
/// The function body is erased.
|
|
class GpuKernelToBlobPass
|
|
: public PassWrapper<GpuKernelToBlobPass, OperationPass<gpu::GPUModuleOp>> {
|
|
public:
|
|
GpuKernelToBlobPass(LoweringCallback loweringCallback,
|
|
BlobGenerator blobGenerator, StringRef triple,
|
|
StringRef targetChip, StringRef features,
|
|
StringRef gpuBinaryAnnotation)
|
|
: loweringCallback(loweringCallback), blobGenerator(blobGenerator),
|
|
triple(triple), targetChip(targetChip), features(features),
|
|
blobAnnotation(gpuBinaryAnnotation) {}
|
|
|
|
void runOnOperation() override {
|
|
gpu::GPUModuleOp module = getOperation();
|
|
|
|
// Lower the module to an LLVM IR module using a separate context to enable
|
|
// multi-threaded processing.
|
|
llvm::LLVMContext llvmContext;
|
|
std::unique_ptr<llvm::Module> llvmModule =
|
|
loweringCallback(module, llvmContext, "LLVMDialectModule");
|
|
if (!llvmModule)
|
|
return signalPassFailure();
|
|
|
|
// Translate the llvm module to a target blob and attach the result as
|
|
// attribute to the module.
|
|
if (auto blobAttr = translateGPUModuleToBinaryAnnotation(
|
|
*llvmModule, module.getLoc(), module.getName()))
|
|
module.setAttr(blobAnnotation, blobAttr);
|
|
else
|
|
signalPassFailure();
|
|
}
|
|
|
|
private:
|
|
std::string translateModuleToISA(llvm::Module &module,
|
|
llvm::TargetMachine &targetMachine);
|
|
|
|
/// Converts llvmModule to a blob with target instructions using the
|
|
/// user-provided generator. Location is used for error reporting and name is
|
|
/// forwarded to the blob generator to use in its logging mechanisms.
|
|
OwnedBlob convertModuleToBlob(llvm::Module &llvmModule, Location loc,
|
|
StringRef name);
|
|
|
|
/// Translates llvmModule to a blob with target instructions and returns the
|
|
/// result as attribute.
|
|
StringAttr translateGPUModuleToBinaryAnnotation(llvm::Module &llvmModule,
|
|
Location loc, StringRef name);
|
|
|
|
LoweringCallback loweringCallback;
|
|
BlobGenerator blobGenerator;
|
|
llvm::Triple triple;
|
|
StringRef targetChip;
|
|
StringRef features;
|
|
StringRef blobAnnotation;
|
|
};
|
|
|
|
} // anonymous namespace
|
|
|
|
std::string
|
|
GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module,
|
|
llvm::TargetMachine &targetMachine) {
|
|
std::string targetISA;
|
|
{
|
|
llvm::raw_string_ostream stream(targetISA);
|
|
llvm::buffer_ostream pstream(stream);
|
|
llvm::legacy::PassManager codegenPasses;
|
|
targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
|
|
llvm::CGFT_AssemblyFile);
|
|
codegenPasses.run(module);
|
|
}
|
|
|
|
return targetISA;
|
|
}
|
|
|
|
OwnedBlob GpuKernelToBlobPass::convertModuleToBlob(llvm::Module &llvmModule,
|
|
Location loc,
|
|
StringRef name) {
|
|
std::unique_ptr<llvm::TargetMachine> targetMachine;
|
|
{
|
|
std::string error;
|
|
const llvm::Target *target =
|
|
llvm::TargetRegistry::lookupTarget("", triple, error);
|
|
if (target == nullptr) {
|
|
emitError(loc, "cannot initialize target triple");
|
|
return {};
|
|
}
|
|
targetMachine.reset(target->createTargetMachine(triple.str(), targetChip,
|
|
features, {}, {}));
|
|
if (targetMachine == nullptr) {
|
|
emitError(loc, "connot initialize target machine");
|
|
return {};
|
|
}
|
|
}
|
|
|
|
llvmModule.setDataLayout(targetMachine->createDataLayout());
|
|
|
|
auto targetISA = translateModuleToISA(llvmModule, *targetMachine);
|
|
|
|
return blobGenerator(targetISA, loc, name);
|
|
}
|
|
|
|
StringAttr GpuKernelToBlobPass::translateGPUModuleToBinaryAnnotation(
|
|
llvm::Module &llvmModule, Location loc, StringRef name) {
|
|
auto blob = convertModuleToBlob(llvmModule, loc, name);
|
|
if (!blob)
|
|
return {};
|
|
return StringAttr::get({blob->data(), blob->size()}, loc->getContext());
|
|
}
|
|
|
|
std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
|
|
mlir::createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback,
|
|
BlobGenerator blobGenerator,
|
|
StringRef triple, StringRef targetChip,
|
|
StringRef features,
|
|
StringRef gpuBinaryAnnotation) {
|
|
return std::make_unique<GpuKernelToBlobPass>(loweringCallback, blobGenerator,
|
|
triple, targetChip, features,
|
|
gpuBinaryAnnotation);
|
|
}
|