You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

683 lines
35 KiB

// NOTE: Assertions have been autogenerated by utils/generate-test-checks.py
// RUN: mlir-opt %s -test-sparsification | FileCheck %s
#trait_d = {
indexing_maps = [
affine_map<(i) -> (i)>, // a
affine_map<(i) -> (i)> // x (out)
],
sparse = [
[ "D" ], // a
[ "D" ] // x
],
iterator_types = ["parallel"],
doc = "x(i) = a(i) OP b"
}
// CHECK-LABEL: func @add_d(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: f32) -> tensor<32xf32> {
// CHECK: %[[VAL_2:.*]] = constant 32 : index
// CHECK: %[[VAL_3:.*]] = constant 0 : index
// CHECK: %[[VAL_4:.*]] = constant 1 : index
// CHECK: %[[VAL_5:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_6:.*]] = alloca() : memref<32xf32>
// CHECK: scf.for %[[VAL_7:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] {
// CHECK: %[[VAL_8:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_7]]] : memref<32xf32>
// CHECK: %[[VAL_9:.*]] = addf %[[VAL_8]], %[[VAL_1]] : f32
// CHECK: store %[[VAL_9]], %[[VAL_6]]{{\[}}%[[VAL_7]]] : memref<32xf32>
// CHECK: }
// CHECK: %[[VAL_10:.*]] = tensor_load %[[VAL_6]] : memref<32xf32>
// CHECK: return %[[VAL_10]] : tensor<32xf32>
// CHECK: }
func @add_d(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
%0 = linalg.generic #trait_d
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
%0 = addf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
// CHECK-LABEL: func @mul_d(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: f32) -> tensor<32xf32> {
// CHECK: %[[VAL_2:.*]] = constant 32 : index
// CHECK: %[[VAL_3:.*]] = constant 0 : index
// CHECK: %[[VAL_4:.*]] = constant 1 : index
// CHECK: %[[VAL_5:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_6:.*]] = alloca() : memref<32xf32>
// CHECK: scf.for %[[VAL_7:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] {
// CHECK: %[[VAL_8:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_7]]] : memref<32xf32>
// CHECK: %[[VAL_9:.*]] = mulf %[[VAL_8]], %[[VAL_1]] : f32
// CHECK: store %[[VAL_9]], %[[VAL_6]]{{\[}}%[[VAL_7]]] : memref<32xf32>
// CHECK: }
// CHECK: %[[VAL_10:.*]] = tensor_load %[[VAL_6]] : memref<32xf32>
// CHECK: return %[[VAL_10]] : tensor<32xf32>
// CHECK: }
func @mul_d(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
%0 = linalg.generic #trait_d
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
%0 = mulf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
#trait_s = {
indexing_maps = [
affine_map<(i) -> (i)>, // a
affine_map<(i) -> (i)> // x (out)
],
sparse = [
[ "S" ], // a
[ "D" ] // x
],
iterator_types = ["parallel"],
doc = "x(i) = a(i) OP b"
}
// CHECK-LABEL: func @add_s(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: f32) -> tensor<32xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 0 : index
// CHECK: %[[VAL_5:.*]] = constant true
// CHECK: %[[VAL_6:.*]] = constant 1 : index
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_10:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_11:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_12:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK: %[[VAL_13:.*]]:2 = scf.while (%[[VAL_14:.*]] = %[[VAL_11]], %[[VAL_15:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_16:.*]] = cmpi "ult", %[[VAL_14]], %[[VAL_12]] : index
// CHECK: scf.condition(%[[VAL_16]]) %[[VAL_14]], %[[VAL_15]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_17:.*]]: index, %[[VAL_18:.*]]: index):
// CHECK: %[[VAL_19:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_17]]] : memref<?xindex>
// CHECK: %[[VAL_20:.*]] = cmpi "eq", %[[VAL_19]], %[[VAL_18]] : index
// CHECK: scf.if %[[VAL_20]] {
// CHECK: %[[VAL_21:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_17]]] : memref<?xf32>
// CHECK: %[[VAL_22:.*]] = addf %[[VAL_21]], %[[VAL_1]] : f32
// CHECK: store %[[VAL_22]], %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref<32xf32>
// CHECK: } else {
// CHECK: scf.if %[[VAL_5]] {
// CHECK: store %[[VAL_1]], %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref<32xf32>
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_23:.*]] = cmpi "eq", %[[VAL_19]], %[[VAL_18]] : index
// CHECK: %[[VAL_24:.*]] = addi %[[VAL_17]], %[[VAL_6]] : index
// CHECK: %[[VAL_25:.*]] = select %[[VAL_23]], %[[VAL_24]], %[[VAL_17]] : index
// CHECK: %[[VAL_26:.*]] = addi %[[VAL_18]], %[[VAL_6]] : index
// CHECK: scf.yield %[[VAL_25]], %[[VAL_26]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_27:.*]] = %[[VAL_28:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
// CHECK: store %[[VAL_1]], %[[VAL_10]]{{\[}}%[[VAL_27]]] : memref<32xf32>
// CHECK: }
// CHECK: %[[VAL_29:.*]] = tensor_load %[[VAL_10]] : memref<32xf32>
// CHECK: return %[[VAL_29]] : tensor<32xf32>
// CHECK: }
func @add_s(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
%0 = linalg.generic #trait_s
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
%0 = addf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
// CHECK-LABEL: func @repeated_add_s(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: %[[VAL_1:.*]] = constant 999 : index
// CHECK: %[[VAL_2:.*]] = constant 0 : index
// CHECK: %[[VAL_3:.*]] = constant 1 : index
// CHECK: %[[VAL_4:.*]] = alloca(%[[VAL_1]]) : memref<?xindex>
// CHECK: %[[VAL_5:.*]] = alloca(%[[VAL_1]]) : memref<?xindex>
// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_1]]) : memref<?xf32>
// CHECK: %[[VAL_7:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_8:.*]] = load %[[VAL_4]]{{\[}}%[[VAL_2]]] : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = load %[[VAL_4]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_10:.*]] = %[[VAL_8]] to %[[VAL_9]] step %[[VAL_3]] {
// CHECK: %[[VAL_11:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_10]]] : memref<?xindex>
// CHECK: %[[VAL_12:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xf32>
// CHECK: %[[VAL_13:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xf32>
// CHECK: %[[VAL_14:.*]] = addf %[[VAL_12]], %[[VAL_13]] : f32
// CHECK: %[[VAL_15:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xf32>
// CHECK: %[[VAL_16:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_10]]] : memref<?xf32>
// CHECK: %[[VAL_17:.*]] = addf %[[VAL_15]], %[[VAL_16]] : f32
// CHECK: %[[VAL_18:.*]] = addf %[[VAL_14]], %[[VAL_17]] : f32
// CHECK: store %[[VAL_18]], %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref<32xf32>
// CHECK: }
// CHECK: %[[VAL_19:.*]] = tensor_load %[[VAL_7]] : memref<32xf32>
// CHECK: return %[[VAL_19]] : tensor<32xf32>
// CHECK: }
func @repeated_add_s(%arga: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_s
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
%0 = addf %a, %a : f32 // same tensor
%1 = addf %a, %a : f32 // should yield
%2 = addf %0, %1 : f32 // one guard
linalg.yield %2 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
// CHECK-LABEL: func @mul_s(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: f32) -> tensor<32xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 0 : index
// CHECK: %[[VAL_4:.*]] = constant 1 : index
// CHECK: %[[VAL_5:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_8:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_9:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] {
// CHECK: %[[VAL_12:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_11]]] : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref<?xf32>
// CHECK: %[[VAL_14:.*]] = mulf %[[VAL_13]], %[[VAL_1]] : f32
// CHECK: store %[[VAL_14]], %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<32xf32>
// CHECK: }
// CHECK: %[[VAL_15:.*]] = tensor_load %[[VAL_8]] : memref<32xf32>
// CHECK: return %[[VAL_15]] : tensor<32xf32>
// CHECK: }
func @mul_s(%arga: tensor<32xf32>, %argb: f32) -> tensor<32xf32> {
%0 = linalg.generic #trait_s
ins(%arga: tensor<32xf32>) {
^bb(%a: f32):
%0 = mulf %a, %argb : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
#trait_dd = {
indexing_maps = [
affine_map<(i) -> (i)>, // a
affine_map<(i) -> (i)>, // b
affine_map<(i) -> (i)> // x (out)
],
sparse = [
[ "D" ], // a
[ "D" ], // b
[ "D" ] // x
],
iterator_types = ["parallel"],
doc = "x(i) = a(i) OP b(i)"
}
// CHECK-LABEL: func @add_dd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: %[[VAL_2:.*]] = constant 32 : index
// CHECK: %[[VAL_3:.*]] = constant 0 : index
// CHECK: %[[VAL_4:.*]] = constant 1 : index
// CHECK: %[[VAL_5:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_6:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_7:.*]] = alloca() : memref<32xf32>
// CHECK: scf.for %[[VAL_8:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] {
// CHECK: %[[VAL_9:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_8]]] : memref<32xf32>
// CHECK: %[[VAL_10:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_8]]] : memref<32xf32>
// CHECK: %[[VAL_11:.*]] = addf %[[VAL_9]], %[[VAL_10]] : f32
// CHECK: store %[[VAL_11]], %[[VAL_7]]{{\[}}%[[VAL_8]]] : memref<32xf32>
// CHECK: }
// CHECK: %[[VAL_12:.*]] = tensor_load %[[VAL_7]] : memref<32xf32>
// CHECK: return %[[VAL_12]] : tensor<32xf32>
// CHECK: }
func @add_dd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_dd
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
// CHECK-LABEL: func @mul_dd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: %[[VAL_2:.*]] = constant 32 : index
// CHECK: %[[VAL_3:.*]] = constant 0 : index
// CHECK: %[[VAL_4:.*]] = constant 1 : index
// CHECK: %[[VAL_5:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_6:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_7:.*]] = alloca() : memref<32xf32>
// CHECK: scf.for %[[VAL_8:.*]] = %[[VAL_3]] to %[[VAL_2]] step %[[VAL_4]] {
// CHECK: %[[VAL_9:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_8]]] : memref<32xf32>
// CHECK: %[[VAL_10:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_8]]] : memref<32xf32>
// CHECK: %[[VAL_11:.*]] = mulf %[[VAL_9]], %[[VAL_10]] : f32
// CHECK: store %[[VAL_11]], %[[VAL_7]]{{\[}}%[[VAL_8]]] : memref<32xf32>
// CHECK: }
// CHECK: %[[VAL_12:.*]] = tensor_load %[[VAL_7]] : memref<32xf32>
// CHECK: return %[[VAL_12]] : tensor<32xf32>
// CHECK: }
func @mul_dd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_dd
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
#trait_ds = {
indexing_maps = [
affine_map<(i) -> (i)>, // a
affine_map<(i) -> (i)>, // b
affine_map<(i) -> (i)> // x (out)
],
sparse = [
[ "D" ], // a
[ "S" ], // b
[ "D" ] // x
],
iterator_types = ["parallel"],
doc = "x(i) = a(i) OP b(i)"
}
// CHECK-LABEL: func @add_ds(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 0 : index
// CHECK: %[[VAL_5:.*]] = constant true
// CHECK: %[[VAL_6:.*]] = constant 1 : index
// CHECK: %[[VAL_7:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_11:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_12:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_17:.*]] = cmpi "ult", %[[VAL_15]], %[[VAL_13]] : index
// CHECK: scf.condition(%[[VAL_17]]) %[[VAL_15]], %[[VAL_16]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_18:.*]]: index, %[[VAL_19:.*]]: index):
// CHECK: %[[VAL_20:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref<?xindex>
// CHECK: %[[VAL_21:.*]] = cmpi "eq", %[[VAL_20]], %[[VAL_19]] : index
// CHECK: scf.if %[[VAL_21]] {
// CHECK: %[[VAL_22:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_19]]] : memref<32xf32>
// CHECK: %[[VAL_23:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_18]]] : memref<?xf32>
// CHECK: %[[VAL_24:.*]] = addf %[[VAL_22]], %[[VAL_23]] : f32
// CHECK: store %[[VAL_24]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xf32>
// CHECK: } else {
// CHECK: scf.if %[[VAL_5]] {
// CHECK: %[[VAL_25:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_19]]] : memref<32xf32>
// CHECK: store %[[VAL_25]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xf32>
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_26:.*]] = cmpi "eq", %[[VAL_20]], %[[VAL_19]] : index
// CHECK: %[[VAL_27:.*]] = addi %[[VAL_18]], %[[VAL_6]] : index
// CHECK: %[[VAL_28:.*]] = select %[[VAL_26]], %[[VAL_27]], %[[VAL_18]] : index
// CHECK: %[[VAL_29:.*]] = addi %[[VAL_19]], %[[VAL_6]] : index
// CHECK: scf.yield %[[VAL_28]], %[[VAL_29]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_30:.*]] = %[[VAL_31:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
// CHECK: %[[VAL_32:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_30]]] : memref<32xf32>
// CHECK: store %[[VAL_32]], %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<32xf32>
// CHECK: }
// CHECK: %[[VAL_33:.*]] = tensor_load %[[VAL_11]] : memref<32xf32>
// CHECK: return %[[VAL_33]] : tensor<32xf32>
// CHECK: }
func @add_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_ds
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
// CHECK-LABEL: func @mul_ds(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 0 : index
// CHECK: %[[VAL_4:.*]] = constant 1 : index
// CHECK: %[[VAL_5:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_9:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_10:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_11:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] {
// CHECK: %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_13]]] : memref<32xf32>
// CHECK: %[[VAL_15:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_12]]] : memref<?xf32>
// CHECK: %[[VAL_16:.*]] = mulf %[[VAL_14]], %[[VAL_15]] : f32
// CHECK: store %[[VAL_16]], %[[VAL_9]]{{\[}}%[[VAL_13]]] : memref<32xf32>
// CHECK: }
// CHECK: %[[VAL_17:.*]] = tensor_load %[[VAL_9]] : memref<32xf32>
// CHECK: return %[[VAL_17]] : tensor<32xf32>
// CHECK: }
func @mul_ds(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_ds
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
#trait_sd = {
indexing_maps = [
affine_map<(i) -> (i)>, // a
affine_map<(i) -> (i)>, // b
affine_map<(i) -> (i)> // x (out)
],
sparse = [
[ "S" ], // a
[ "D" ], // b
[ "D" ] // x
],
iterator_types = ["parallel"],
doc = "x(i) = a(i) OP b(i)"
}
// CHECK-LABEL: func @add_sd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 32 : index
// CHECK: %[[VAL_4:.*]] = constant 0 : index
// CHECK: %[[VAL_5:.*]] = constant true
// CHECK: %[[VAL_6:.*]] = constant 1 : index
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_10:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_11:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_12:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_6]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]]:2 = scf.while (%[[VAL_15:.*]] = %[[VAL_12]], %[[VAL_16:.*]] = %[[VAL_4]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_17:.*]] = cmpi "ult", %[[VAL_15]], %[[VAL_13]] : index
// CHECK: scf.condition(%[[VAL_17]]) %[[VAL_15]], %[[VAL_16]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_18:.*]]: index, %[[VAL_19:.*]]: index):
// CHECK: %[[VAL_20:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_18]]] : memref<?xindex>
// CHECK: %[[VAL_21:.*]] = cmpi "eq", %[[VAL_20]], %[[VAL_19]] : index
// CHECK: scf.if %[[VAL_21]] {
// CHECK: %[[VAL_22:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_18]]] : memref<?xf32>
// CHECK: %[[VAL_23:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<32xf32>
// CHECK: %[[VAL_24:.*]] = addf %[[VAL_22]], %[[VAL_23]] : f32
// CHECK: store %[[VAL_24]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xf32>
// CHECK: } else {
// CHECK: scf.if %[[VAL_5]] {
// CHECK: %[[VAL_25:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_19]]] : memref<32xf32>
// CHECK: store %[[VAL_25]], %[[VAL_11]]{{\[}}%[[VAL_19]]] : memref<32xf32>
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_26:.*]] = cmpi "eq", %[[VAL_20]], %[[VAL_19]] : index
// CHECK: %[[VAL_27:.*]] = addi %[[VAL_18]], %[[VAL_6]] : index
// CHECK: %[[VAL_28:.*]] = select %[[VAL_26]], %[[VAL_27]], %[[VAL_18]] : index
// CHECK: %[[VAL_29:.*]] = addi %[[VAL_19]], %[[VAL_6]] : index
// CHECK: scf.yield %[[VAL_28]], %[[VAL_29]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_30:.*]] = %[[VAL_31:.*]]#1 to %[[VAL_3]] step %[[VAL_6]] {
// CHECK: %[[VAL_32:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_30]]] : memref<32xf32>
// CHECK: store %[[VAL_32]], %[[VAL_11]]{{\[}}%[[VAL_30]]] : memref<32xf32>
// CHECK: }
// CHECK: %[[VAL_33:.*]] = tensor_load %[[VAL_11]] : memref<32xf32>
// CHECK: return %[[VAL_33]] : tensor<32xf32>
// CHECK: }
func @add_sd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_sd
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
// CHECK-LABEL: func @mul_sd(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 0 : index
// CHECK: %[[VAL_4:.*]] = constant 1 : index
// CHECK: %[[VAL_5:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_8:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_9:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_10:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_11:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_12:.*]] = %[[VAL_10]] to %[[VAL_11]] step %[[VAL_4]] {
// CHECK: %[[VAL_13:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_12]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_12]]] : memref<?xf32>
// CHECK: %[[VAL_15:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_13]]] : memref<32xf32>
// CHECK: %[[VAL_16:.*]] = mulf %[[VAL_14]], %[[VAL_15]] : f32
// CHECK: store %[[VAL_16]], %[[VAL_9]]{{\[}}%[[VAL_13]]] : memref<32xf32>
// CHECK: }
// CHECK: %[[VAL_17:.*]] = tensor_load %[[VAL_9]] : memref<32xf32>
// CHECK: return %[[VAL_17]] : tensor<32xf32>
// CHECK: }
func @mul_sd(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_sd
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
#trait_ss = {
indexing_maps = [
affine_map<(i) -> (i)>, // a
affine_map<(i) -> (i)>, // b
affine_map<(i) -> (i)> // x (out)
],
sparse = [
[ "S" ], // a
[ "S" ], // b
[ "D" ] // x
],
iterator_types = ["parallel"],
doc = "x(i) = a(i) OP b(i)"
}
// CHECK-LABEL: func @add_ss(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 0 : index
// CHECK: %[[VAL_4:.*]] = constant 1 : index
// CHECK: %[[VAL_5:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_11:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_12:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_16:.*]]:2 = scf.while (%[[VAL_17:.*]] = %[[VAL_12]], %[[VAL_18:.*]] = %[[VAL_14]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_19:.*]] = cmpi "ult", %[[VAL_17]], %[[VAL_13]] : index
// CHECK: %[[VAL_20:.*]] = cmpi "ult", %[[VAL_18]], %[[VAL_15]] : index
// CHECK: %[[VAL_21:.*]] = and %[[VAL_19]], %[[VAL_20]] : i1
// CHECK: scf.condition(%[[VAL_21]]) %[[VAL_17]], %[[VAL_18]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_22:.*]]: index, %[[VAL_23:.*]]: index):
// CHECK: %[[VAL_24:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_22]]] : memref<?xindex>
// CHECK: %[[VAL_25:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_23]]] : memref<?xindex>
// CHECK: %[[VAL_26:.*]] = cmpi "ult", %[[VAL_25]], %[[VAL_24]] : index
// CHECK: %[[VAL_27:.*]] = select %[[VAL_26]], %[[VAL_25]], %[[VAL_24]] : index
// CHECK: %[[VAL_28:.*]] = cmpi "eq", %[[VAL_24]], %[[VAL_27]] : index
// CHECK: %[[VAL_29:.*]] = cmpi "eq", %[[VAL_25]], %[[VAL_27]] : index
// CHECK: %[[VAL_30:.*]] = and %[[VAL_28]], %[[VAL_29]] : i1
// CHECK: scf.if %[[VAL_30]] {
// CHECK: %[[VAL_31:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_22]]] : memref<?xf32>
// CHECK: %[[VAL_32:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xf32>
// CHECK: %[[VAL_33:.*]] = addf %[[VAL_31]], %[[VAL_32]] : f32
// CHECK: store %[[VAL_33]], %[[VAL_11]]{{\[}}%[[VAL_27]]] : memref<32xf32>
// CHECK: } else {
// CHECK: %[[VAL_34:.*]] = cmpi "eq", %[[VAL_24]], %[[VAL_27]] : index
// CHECK: scf.if %[[VAL_34]] {
// CHECK: %[[VAL_35:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_22]]] : memref<?xf32>
// CHECK: store %[[VAL_35]], %[[VAL_11]]{{\[}}%[[VAL_27]]] : memref<32xf32>
// CHECK: } else {
// CHECK: %[[VAL_36:.*]] = cmpi "eq", %[[VAL_25]], %[[VAL_27]] : index
// CHECK: scf.if %[[VAL_36]] {
// CHECK: %[[VAL_37:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xf32>
// CHECK: store %[[VAL_37]], %[[VAL_11]]{{\[}}%[[VAL_27]]] : memref<32xf32>
// CHECK: } else {
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: %[[VAL_38:.*]] = cmpi "eq", %[[VAL_24]], %[[VAL_27]] : index
// CHECK: %[[VAL_39:.*]] = addi %[[VAL_22]], %[[VAL_4]] : index
// CHECK: %[[VAL_40:.*]] = select %[[VAL_38]], %[[VAL_39]], %[[VAL_22]] : index
// CHECK: %[[VAL_41:.*]] = cmpi "eq", %[[VAL_25]], %[[VAL_27]] : index
// CHECK: %[[VAL_42:.*]] = addi %[[VAL_23]], %[[VAL_4]] : index
// CHECK: %[[VAL_43:.*]] = select %[[VAL_41]], %[[VAL_42]], %[[VAL_23]] : index
// CHECK: scf.yield %[[VAL_40]], %[[VAL_43]] : index, index
// CHECK: }
// CHECK: scf.for %[[VAL_44:.*]] = %[[VAL_45:.*]]#0 to %[[VAL_13]] step %[[VAL_4]] {
// CHECK: %[[VAL_46:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_44]]] : memref<?xindex>
// CHECK: %[[VAL_47:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_44]]] : memref<?xf32>
// CHECK: store %[[VAL_47]], %[[VAL_11]]{{\[}}%[[VAL_46]]] : memref<32xf32>
// CHECK: }
// CHECK: scf.for %[[VAL_48:.*]] = %[[VAL_49:.*]]#1 to %[[VAL_15]] step %[[VAL_4]] {
// CHECK: %[[VAL_50:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_48]]] : memref<?xindex>
// CHECK: %[[VAL_51:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_48]]] : memref<?xf32>
// CHECK: store %[[VAL_51]], %[[VAL_11]]{{\[}}%[[VAL_50]]] : memref<32xf32>
// CHECK: }
// CHECK: %[[VAL_52:.*]] = tensor_load %[[VAL_11]] : memref<32xf32>
// CHECK: return %[[VAL_52]] : tensor<32xf32>
// CHECK: }
func @add_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_ss
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
%0 = addf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
// CHECK-LABEL: func @mul_ss(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<32xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<32xf32>) -> tensor<32xf32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 0 : index
// CHECK: %[[VAL_4:.*]] = constant 1 : index
// CHECK: %[[VAL_5:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_8:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_9:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_11:.*]] = alloca() : memref<32xf32>
// CHECK: %[[VAL_12:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_13:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_14:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_15:.*]] = load %[[VAL_8]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: %[[VAL_16:.*]]:2 = scf.while (%[[VAL_17:.*]] = %[[VAL_12]], %[[VAL_18:.*]] = %[[VAL_14]]) : (index, index) -> (index, index) {
// CHECK: %[[VAL_19:.*]] = cmpi "ult", %[[VAL_17]], %[[VAL_13]] : index
// CHECK: %[[VAL_20:.*]] = cmpi "ult", %[[VAL_18]], %[[VAL_15]] : index
// CHECK: %[[VAL_21:.*]] = and %[[VAL_19]], %[[VAL_20]] : i1
// CHECK: scf.condition(%[[VAL_21]]) %[[VAL_17]], %[[VAL_18]] : index, index
// CHECK: } do {
// CHECK: ^bb0(%[[VAL_22:.*]]: index, %[[VAL_23:.*]]: index):
// CHECK: %[[VAL_24:.*]] = load %[[VAL_6]]{{\[}}%[[VAL_22]]] : memref<?xindex>
// CHECK: %[[VAL_25:.*]] = load %[[VAL_9]]{{\[}}%[[VAL_23]]] : memref<?xindex>
// CHECK: %[[VAL_26:.*]] = cmpi "ult", %[[VAL_25]], %[[VAL_24]] : index
// CHECK: %[[VAL_27:.*]] = select %[[VAL_26]], %[[VAL_25]], %[[VAL_24]] : index
// CHECK: %[[VAL_28:.*]] = cmpi "eq", %[[VAL_24]], %[[VAL_27]] : index
// CHECK: %[[VAL_29:.*]] = cmpi "eq", %[[VAL_25]], %[[VAL_27]] : index
// CHECK: %[[VAL_30:.*]] = and %[[VAL_28]], %[[VAL_29]] : i1
// CHECK: scf.if %[[VAL_30]] {
// CHECK: %[[VAL_31:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_22]]] : memref<?xf32>
// CHECK: %[[VAL_32:.*]] = load %[[VAL_10]]{{\[}}%[[VAL_23]]] : memref<?xf32>
// CHECK: %[[VAL_33:.*]] = mulf %[[VAL_31]], %[[VAL_32]] : f32
// CHECK: store %[[VAL_33]], %[[VAL_11]]{{\[}}%[[VAL_27]]] : memref<32xf32>
// CHECK: } else {
// CHECK: }
// CHECK: %[[VAL_34:.*]] = cmpi "eq", %[[VAL_24]], %[[VAL_27]] : index
// CHECK: %[[VAL_35:.*]] = addi %[[VAL_22]], %[[VAL_4]] : index
// CHECK: %[[VAL_36:.*]] = select %[[VAL_34]], %[[VAL_35]], %[[VAL_22]] : index
// CHECK: %[[VAL_37:.*]] = cmpi "eq", %[[VAL_25]], %[[VAL_27]] : index
// CHECK: %[[VAL_38:.*]] = addi %[[VAL_23]], %[[VAL_4]] : index
// CHECK: %[[VAL_39:.*]] = select %[[VAL_37]], %[[VAL_38]], %[[VAL_23]] : index
// CHECK: scf.yield %[[VAL_36]], %[[VAL_39]] : index, index
// CHECK: }
// CHECK: %[[VAL_40:.*]] = tensor_load %[[VAL_11]] : memref<32xf32>
// CHECK: return %[[VAL_40]] : tensor<32xf32>
// CHECK: }
func @mul_ss(%arga: tensor<32xf32>, %argb: tensor<32xf32>) -> tensor<32xf32> {
%0 = linalg.generic #trait_ss
ins(%arga, %argb: tensor<32xf32>, tensor<32xf32>) {
^bb(%a: f32, %b: f32):
%0 = mulf %a, %b : f32
linalg.yield %0 : f32
} -> tensor<32xf32>
return %0 : tensor<32xf32>
}
#trait_sum_reduction = {
indexing_maps = [
affine_map<(i) -> (i)>, // a
affine_map<(i) -> ()> // x (scalar out)
],
sparse = [
[ "S" ], // a
[ ] // x
],
iterator_types = ["reduction"],
doc = "x = SUM_i a(i)"
}
// CHECK-LABEL: func @sum_reduction(
// CHECK-SAME: %[[VAL_0:.*]]: tensor<?xf32>,
// CHECK-SAME: %[[VAL_1:.*]]: tensor<f32>) -> tensor<f32> {
// CHECK: %[[VAL_2:.*]] = constant 999 : index
// CHECK: %[[VAL_3:.*]] = constant 0 : index
// CHECK: %[[VAL_4:.*]] = constant 1 : index
// CHECK: %[[VAL_5:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_6:.*]] = alloca(%[[VAL_2]]) : memref<?xindex>
// CHECK: %[[VAL_7:.*]] = alloca(%[[VAL_2]]) : memref<?xf32>
// CHECK: %[[VAL_8:.*]] = alloca() : memref<f32>
// CHECK: %[[VAL_9:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_3]]] : memref<?xindex>
// CHECK: %[[VAL_10:.*]] = load %[[VAL_5]]{{\[}}%[[VAL_4]]] : memref<?xindex>
// CHECK: scf.for %[[VAL_11:.*]] = %[[VAL_9]] to %[[VAL_10]] step %[[VAL_4]] {
// CHECK: %[[VAL_12:.*]] = load %[[VAL_8]][] : memref<f32>
// CHECK: %[[VAL_13:.*]] = load %[[VAL_7]]{{\[}}%[[VAL_11]]] : memref<?xf32>
// CHECK: %[[VAL_14:.*]] = addf %[[VAL_12]], %[[VAL_13]] : f32
// CHECK: store %[[VAL_14]], %[[VAL_8]][] : memref<f32>
// CHECK: }
// CHECK: %[[VAL_15:.*]] = tensor_load %[[VAL_8]] : memref<f32>
// CHECK: return %[[VAL_15]] : tensor<f32>
// CHECK: }
func @sum_reduction(%arga: tensor<?xf32>, %argx: tensor<f32>) -> tensor<f32> {
%0 = linalg.generic #trait_sum_reduction
ins(%arga : tensor<?xf32>)
init(%argx : tensor<f32>) {
^bb(%a : f32, %x : f32):
%0 = addf %x, %a : f32
linalg.yield %0: f32
} -> tensor<f32>
return %0 : tensor<f32>
}