You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

164 lines
6.2 KiB

//===- ConvertKernelFuncToBlob.cpp - MLIR GPU lowering passes -------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file implements a pass to convert gpu kernel functions into a
// corresponding binary blob that can be executed on a GPU. Currently
// only translates the function itself but no dependencies.
//
//===----------------------------------------------------------------------===//
#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"
#include "mlir/Dialect/GPU/GPUDialect.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/IR/Attributes.h"
#include "mlir/IR/Builders.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/Pass/Pass.h"
#include "mlir/Pass/PassRegistry.h"
#include "mlir/Support/LogicalResult.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/Twine.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Module.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/Mutex.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include "llvm/Target/TargetMachine.h"
using namespace mlir;
namespace {
/// A pass converting tagged kernel modules to a blob with target instructions.
///
/// If tagged as a kernel module, each contained function is translated to
/// user-specified IR. A user provided BlobGenerator then compiles the IR to
/// GPU binary code, which is then attached as an attribute to the function.
/// The function body is erased.
class GpuKernelToBlobPass
: public PassWrapper<GpuKernelToBlobPass, OperationPass<gpu::GPUModuleOp>> {
public:
GpuKernelToBlobPass(LoweringCallback loweringCallback,
BlobGenerator blobGenerator, StringRef triple,
StringRef targetChip, StringRef features,
StringRef gpuBinaryAnnotation)
: loweringCallback(loweringCallback), blobGenerator(blobGenerator),
triple(triple), targetChip(targetChip), features(features),
blobAnnotation(gpuBinaryAnnotation) {}
void runOnOperation() override {
gpu::GPUModuleOp module = getOperation();
// Lower the module to an LLVM IR module using a separate context to enable
// multi-threaded processing.
llvm::LLVMContext llvmContext;
std::unique_ptr<llvm::Module> llvmModule =
loweringCallback(module, llvmContext, "LLVMDialectModule");
if (!llvmModule)
return signalPassFailure();
// Translate the llvm module to a target blob and attach the result as
// attribute to the module.
if (auto blobAttr = translateGPUModuleToBinaryAnnotation(
*llvmModule, module.getLoc(), module.getName()))
module.setAttr(blobAnnotation, blobAttr);
else
signalPassFailure();
}
private:
std::string translateModuleToISA(llvm::Module &module,
llvm::TargetMachine &targetMachine);
/// Converts llvmModule to a blob with target instructions using the
/// user-provided generator. Location is used for error reporting and name is
/// forwarded to the blob generator to use in its logging mechanisms.
OwnedBlob convertModuleToBlob(llvm::Module &llvmModule, Location loc,
StringRef name);
/// Translates llvmModule to a blob with target instructions and returns the
/// result as attribute.
StringAttr translateGPUModuleToBinaryAnnotation(llvm::Module &llvmModule,
Location loc, StringRef name);
LoweringCallback loweringCallback;
BlobGenerator blobGenerator;
llvm::Triple triple;
StringRef targetChip;
StringRef features;
StringRef blobAnnotation;
};
} // anonymous namespace
std::string
GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module,
llvm::TargetMachine &targetMachine) {
std::string targetISA;
{
llvm::raw_string_ostream stream(targetISA);
llvm::buffer_ostream pstream(stream);
llvm::legacy::PassManager codegenPasses;
targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
llvm::CGFT_AssemblyFile);
codegenPasses.run(module);
}
return targetISA;
}
OwnedBlob GpuKernelToBlobPass::convertModuleToBlob(llvm::Module &llvmModule,
Location loc,
StringRef name) {
std::unique_ptr<llvm::TargetMachine> targetMachine;
{
std::string error;
const llvm::Target *target =
llvm::TargetRegistry::lookupTarget("", triple, error);
if (target == nullptr) {
emitError(loc, "cannot initialize target triple");
return {};
}
targetMachine.reset(target->createTargetMachine(triple.str(), targetChip,
features, {}, {}));
if (targetMachine == nullptr) {
emitError(loc, "connot initialize target machine");
return {};
}
}
llvmModule.setDataLayout(targetMachine->createDataLayout());
auto targetISA = translateModuleToISA(llvmModule, *targetMachine);
return blobGenerator(targetISA, loc, name);
}
StringAttr GpuKernelToBlobPass::translateGPUModuleToBinaryAnnotation(
llvm::Module &llvmModule, Location loc, StringRef name) {
auto blob = convertModuleToBlob(llvmModule, loc, name);
if (!blob)
return {};
return StringAttr::get({blob->data(), blob->size()}, loc->getContext());
}
std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
mlir::createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback,
BlobGenerator blobGenerator,
StringRef triple, StringRef targetChip,
StringRef features,
StringRef gpuBinaryAnnotation) {
return std::make_unique<GpuKernelToBlobPass>(loweringCallback, blobGenerator,
triple, targetChip, features,
gpuBinaryAnnotation);
}