You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
328 lines
13 KiB
328 lines
13 KiB
# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-lower-control-flow -o - %s | FileCheck %s
|
|
# Getting an undef that is specifically a VGPR is tricky from IR
|
|
|
|
# CHECK-LABEL: name: extract_undef_offset_vgpr{{$}}
|
|
# CHECK: bb.1:
|
|
# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
|
|
# CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
|
|
|
|
# CHECK: V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
|
|
# CHECK: %vgpr0 = V_MOVRELS_B32_e32 %vgpr0, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
|
|
# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
|
|
|
|
# CHECK: bb.2:
|
|
# CHECK: liveins: %sgpr6_sgpr7, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr4, %sgpr5, %sgpr6, %sgpr7, %sgpr4_sgpr5, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
|
|
|
|
|
|
--- |
|
|
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
|
|
|
|
define void @extract_undef_offset_vgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
|
|
entry:
|
|
%ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
|
|
%value = extractelement <4 x i32> %ld, i32 undef
|
|
store i32 %value, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define void @extract_undef_neg_offset_vgpr(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
|
|
entry:
|
|
%ld = load volatile <4 x i32>, <4 x i32> addrspace(1)* %in
|
|
%value = extractelement <4 x i32> %ld, i32 undef
|
|
store i32 %value, i32 addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define void @insert_undef_offset_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
|
|
entry:
|
|
%ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
|
|
%value = insertelement <4 x i32> %ld, i32 5, i32 undef
|
|
store <4 x i32> %value, <4 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define void @insert_undef_neg_offset_vgpr(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) {
|
|
entry:
|
|
%ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
|
|
%value = insertelement <4 x i32> %ld, i32 5, i32 undef
|
|
store <4 x i32> %value, <4 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
define void @insert_undef_value_offset_vgpr(<4 x i32> addrspace(1)*%out, <4 x i32> addrspace(1)* %in, i32 %idx) {
|
|
entry:
|
|
%ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
|
|
%value = insertelement <4 x i32> %ld, i32 undef, i32 %idx
|
|
store <4 x i32> %value, <4 x i32> addrspace(1)* %out
|
|
ret void
|
|
}
|
|
|
|
...
|
|
---
|
|
name: extract_undef_offset_vgpr
|
|
alignment: 0
|
|
exposesReturnsTwice: false
|
|
hasInlineAsm: false
|
|
allVRegsAllocated: true
|
|
isSSA: false
|
|
tracksRegLiveness: true
|
|
tracksSubRegLiveness: true
|
|
liveins:
|
|
- { reg: '%sgpr0_sgpr1' }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: %sgpr0_sgpr1
|
|
|
|
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11
|
|
%sgpr7 = S_MOV_B32 61440
|
|
%sgpr6 = S_MOV_B32 -1
|
|
S_WAITCNT 127
|
|
%vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
|
|
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
|
|
S_WAITCNT 3952
|
|
%vgpr0, dead %sgpr0_sgpr1 = SI_INDIRECT_SRC_V4 killed %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, 0, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
|
|
S_WAITCNT 127
|
|
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
|
|
S_ENDPGM
|
|
|
|
...
|
|
|
|
# CHECK-LABEL: name: extract_undef_neg_offset_vgpr{{$}}
|
|
# CHECK: bb.1:
|
|
# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
|
|
# CHECK: liveins: %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
|
|
|
|
# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
|
|
# CHECK: %m0 = S_MOV_B32 %vcc_lo
|
|
# CHECK: %m0 = S_ADD_I32 %m0, -7, implicit-def %scc
|
|
# CHECK: %vgpr0 = V_MOVRELS_B32_e32 %vgpr0, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
|
|
# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
|
|
|
|
# CHECK: bb.2:
|
|
# CHECK: liveins: %sgpr6_sgpr7, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr4, %sgpr5, %sgpr6, %sgpr7, %sgpr4_sgpr5, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %sgpr0_sgpr1, %sgpr0, %sgpr1
|
|
|
|
name: extract_undef_neg_offset_vgpr
|
|
alignment: 0
|
|
exposesReturnsTwice: false
|
|
hasInlineAsm: false
|
|
allVRegsAllocated: true
|
|
isSSA: false
|
|
tracksRegLiveness: true
|
|
tracksSubRegLiveness: true
|
|
liveins:
|
|
- { reg: '%sgpr0_sgpr1' }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: %sgpr0_sgpr1
|
|
|
|
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11
|
|
%sgpr7 = S_MOV_B32 61440
|
|
%sgpr6 = S_MOV_B32 -1
|
|
S_WAITCNT 127
|
|
%vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
|
|
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9
|
|
S_WAITCNT 3952
|
|
%vgpr0, dead %sgpr0_sgpr1 = SI_INDIRECT_SRC_V4 killed %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, -7, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
|
|
S_WAITCNT 127
|
|
BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec
|
|
S_ENDPGM
|
|
|
|
...
|
|
|
|
# CHECK-LABEL: name: insert_undef_offset_vgpr{{$}}
|
|
# CHECK: bb.1:
|
|
# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
|
|
# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
|
|
|
|
# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
|
|
# CHECK: %m0 = S_MOV_B32 %vcc_lo
|
|
# CHECK: %vgpr0 = V_MOVRELD_B32_e32 %vgpr4, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
|
|
# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
|
|
|
|
# CHECK: bb.2:
|
|
# CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1
|
|
|
|
name: insert_undef_offset_vgpr
|
|
alignment: 0
|
|
exposesReturnsTwice: false
|
|
hasInlineAsm: false
|
|
allVRegsAllocated: true
|
|
isSSA: false
|
|
tracksRegLiveness: true
|
|
tracksSubRegLiveness: true
|
|
liveins:
|
|
- { reg: '%sgpr0_sgpr1' }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: %sgpr0_sgpr1
|
|
|
|
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
|
|
%sgpr7 = S_MOV_B32 61440
|
|
%sgpr6 = S_MOV_B32 -1
|
|
%vgpr4 = V_MOV_B32_e32 5, implicit %exec
|
|
S_WAITCNT 127
|
|
%vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
|
|
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
|
|
S_WAITCNT 3952
|
|
%vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, 0, killed %vgpr4, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
|
|
S_WAITCNT 127
|
|
BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
|
|
S_ENDPGM
|
|
|
|
...
|
|
|
|
# CHECK-LABEL: name: insert_undef_neg_offset_vgpr{{$}}
|
|
# CHECK: bb.1:
|
|
# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
|
|
# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
|
|
|
|
# CHECK: %vcc_lo = V_READFIRSTLANE_B32 undef %vgpr10, implicit %exec
|
|
# CHECK: %m0 = S_MOV_B32 %vcc_lo
|
|
# CHECK: %m0 = S_ADD_I32 %m0, -7, implicit-def %scc
|
|
# CHECK: %vgpr0 = V_MOVRELD_B32_e32 %vgpr4, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
|
|
# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
|
|
|
|
# CHECK: bb.2:
|
|
# CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
|
|
|
|
name: insert_undef_neg_offset_vgpr
|
|
alignment: 0
|
|
exposesReturnsTwice: false
|
|
hasInlineAsm: false
|
|
allVRegsAllocated: true
|
|
isSSA: false
|
|
tracksRegLiveness: true
|
|
tracksSubRegLiveness: true
|
|
liveins:
|
|
- { reg: '%sgpr0_sgpr1' }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: %sgpr0_sgpr1
|
|
|
|
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
|
|
%sgpr7 = S_MOV_B32 61440
|
|
%sgpr6 = S_MOV_B32 -1
|
|
%vgpr4 = V_MOV_B32_e32 5, implicit %exec
|
|
S_WAITCNT 127
|
|
%vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
|
|
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
|
|
S_WAITCNT 3952
|
|
%vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, undef %vgpr10, -7, killed %vgpr4, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
|
|
S_WAITCNT 127
|
|
BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
|
|
S_ENDPGM
|
|
|
|
...
|
|
|
|
# CHECK-LABEL: insert_undef_value_offset_vgpr{{$}}
|
|
# CHECK: bb.1:
|
|
# CHECK: successors: %bb.2(0x40000000 / 0x80000000 = 50.00%), %bb.1(0x40000000 / 0x80000000 = 50.00%)
|
|
# CHECK: liveins: %vgpr4, %vgpr0_vgpr1_vgpr2_vgpr3{{$}}
|
|
|
|
# CHECK: %vcc_lo = V_READFIRSTLANE_B32 %vgpr4, implicit %exec
|
|
# CHECK: %m0 = S_MOV_B32 %vcc_lo
|
|
# CHECK: %vgpr0 = V_MOVRELD_B32_e32 undef %vgpr10, implicit %m0, implicit %exec, implicit %vgpr0_vgpr1_vgpr2_vgpr3
|
|
# CHECK: S_CBRANCH_EXECNZ %bb.1, implicit %exec
|
|
|
|
# CHECK: bb.2:
|
|
# CHECK: liveins: %sgpr6_sgpr7, %sgpr7, %sgpr4_sgpr5, %sgpr5, %sgpr4_sgpr5_sgpr6_sgpr7, %sgpr6, %sgpr4, %vgpr0_vgpr1_vgpr2_vgpr3, %vgpr0, %vgpr1, %vgpr2, %vgpr3, %vgpr0_vgpr1, %vgpr2_vgpr3, %vgpr0_vgpr1_vgpr2, %vgpr1_vgpr2, %vgpr1_vgpr2_vgpr3, %vgpr4, %sgpr0_sgpr1, %sgpr0, %sgpr1{{$}}
|
|
|
|
name: insert_undef_value_offset_vgpr
|
|
alignment: 0
|
|
exposesReturnsTwice: false
|
|
hasInlineAsm: false
|
|
allVRegsAllocated: true
|
|
isSSA: false
|
|
tracksRegLiveness: true
|
|
tracksSubRegLiveness: true
|
|
liveins:
|
|
- { reg: '%sgpr0_sgpr1' }
|
|
frameInfo:
|
|
isFrameAddressTaken: false
|
|
isReturnAddressTaken: false
|
|
hasStackMap: false
|
|
hasPatchPoint: false
|
|
stackSize: 0
|
|
offsetAdjustment: 0
|
|
maxAlignment: 0
|
|
adjustsStack: false
|
|
hasCalls: false
|
|
maxCallFrameSize: 0
|
|
hasOpaqueSPAdjustment: false
|
|
hasVAStart: false
|
|
hasMustTailInVarArgFunc: false
|
|
body: |
|
|
bb.0.entry:
|
|
liveins: %sgpr0_sgpr1
|
|
|
|
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM %sgpr0_sgpr1, 11 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
|
|
%sgpr7 = S_MOV_B32 61440
|
|
%sgpr6 = S_MOV_B32 -1
|
|
%vgpr4 = V_MOV_B32_e32 2, implicit %exec
|
|
S_WAITCNT 127
|
|
%vgpr0_vgpr1_vgpr2_vgpr3 = BUFFER_LOAD_DWORDX4_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (load 16 from %ir.in)
|
|
%sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 9 :: (non-temporal invariant load 8 from `i64 addrspace(2)* undef`)
|
|
S_WAITCNT 3952
|
|
%vgpr0_vgpr1_vgpr2_vgpr3, dead %sgpr0_sgpr1 = SI_INDIRECT_DST_V4 %vgpr0_vgpr1_vgpr2_vgpr3, killed %vgpr4, 0, undef %vgpr10, implicit-def dead %exec, implicit-def dead %vcc, implicit-def dead %m0, implicit %exec
|
|
S_WAITCNT 127
|
|
BUFFER_STORE_DWORDX4_OFFSET killed %vgpr0_vgpr1_vgpr2_vgpr3, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 16 into %ir.out)
|
|
S_ENDPGM
|
|
|
|
...
|