You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
26 lines
992 B
26 lines
992 B
// RUN: mlir-opt --gpu-kernel-outlining --convert-gpu-to-nvvm %s | FileCheck %s
|
|
|
|
func @main() {
|
|
%data = alloc() : memref<2x6xf32>
|
|
%sum = alloc() : memref<2xf32>
|
|
%mul = alloc() : memref<2xf32>
|
|
%c1 = constant 1 : index
|
|
|
|
// ADD + MUL
|
|
gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %c1, %grid_y = %c1, %grid_z = %c1)
|
|
threads(%tx, %ty, %tz) in (%block_x = %c1, %block_y = %c1, %block_z = %c1) {
|
|
%val = load %data[%bx, %tx] : memref<2x6xf32>
|
|
%reduced0 = "gpu.all_reduce"(%val) ({}) { op = "add" } : (f32) -> (f32)
|
|
store %reduced0, %sum[%bx] : memref<2xf32>
|
|
%reduced1 = "gpu.all_reduce"(%val) ({}) { op = "mul" } : (f32) -> (f32)
|
|
store %reduced1, %mul[%bx] : memref<2xf32>
|
|
gpu.terminator
|
|
}
|
|
|
|
// CHECK: gpu.module @main_kernel {
|
|
// CHECK-NEXT: llvm.mlir.global internal @{{.*}}() {addr_space = 3 : i32} : !llvm.array<32 x float>
|
|
// CHECK-NEXT: llvm.mlir.global internal @{{.*}}() {addr_space = 3 : i32} : !llvm.array<32 x float>
|
|
|
|
return
|
|
}
|