You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
294 lines
11 KiB
294 lines
11 KiB
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
|
# RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs -run-pass si-fold-operands,dead-mi-elimination %s -o - | FileCheck -check-prefix=GCN %s
|
|
|
|
# Kernels can have no FP
|
|
---
|
|
name: kernel_no_fold_fi_non_stack_rsrc_and_soffset
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
|
|
|
; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc_and_soffset
|
|
; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
|
; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
|
|
; GCN: SI_RETURN_TO_EPILOG $vgpr0
|
|
%0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
|
%1:sreg_32_xm0 = S_MOV_B32 0
|
|
%2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %3
|
|
SI_RETURN_TO_EPILOG $vgpr0
|
|
|
|
...
|
|
|
|
---
|
|
name: kernel_no_fold_fi_non_stack_rsrc
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
|
|
|
; GCN-LABEL: name: kernel_no_fold_fi_non_stack_rsrc
|
|
; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
|
; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
|
|
; GCN: SI_RETURN_TO_EPILOG $vgpr0
|
|
%0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
|
%2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %3
|
|
SI_RETURN_TO_EPILOG $vgpr0
|
|
|
|
...
|
|
|
|
---
|
|
name: kernel_no_fold_fi_non_stack_soffset
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
|
|
; GCN-LABEL: name: kernel_no_fold_fi_non_stack_soffset
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
; GCN: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_1]], [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
|
; GCN: S_ENDPGM 0, implicit $vgpr0
|
|
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
%2:sreg_32_xm0 = S_MOV_B32 0
|
|
|
|
BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
%3:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, %2, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %3
|
|
S_ENDPGM 0, implicit $vgpr0
|
|
|
|
...
|
|
|
|
---
|
|
name: kernel_fold_fi_mubuf
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: true
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
|
|
; GCN-LABEL: name: kernel_fold_fi_mubuf
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
|
; GCN: S_ENDPGM 0, implicit $vgpr0
|
|
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
|
|
BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
%2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %2
|
|
S_ENDPGM 0, implicit $vgpr0
|
|
|
|
...
|
|
|
|
|
|
# Functions have an unswizzled SP/FP relative to the wave offset
|
|
---
|
|
name: function_no_fold_fi_non_stack_rsrc_and_soffset
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: false
|
|
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
|
|
frameOffsetReg: '$sgpr32'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
|
|
|
; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc_and_soffset
|
|
; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
|
; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
|
|
; GCN: SI_RETURN_TO_EPILOG $vgpr0
|
|
%0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
|
%1:sreg_32_xm0 = S_MOV_B32 0
|
|
%2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %3
|
|
SI_RETURN_TO_EPILOG $vgpr0
|
|
|
|
...
|
|
|
|
---
|
|
name: function_no_fold_fi_non_stack_rsrc
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: false
|
|
scratchRSrcReg: '$sgpr96_sgpr97_sgpr98_sgpr99'
|
|
frameOffsetReg: '$sgpr32'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
|
|
|
; GCN-LABEL: name: function_no_fold_fi_non_stack_rsrc
|
|
; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
|
; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
|
|
; GCN: SI_RETURN_TO_EPILOG $vgpr0
|
|
%0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
|
%2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %3
|
|
SI_RETURN_TO_EPILOG $vgpr0
|
|
|
|
...
|
|
|
|
---
|
|
name: function_no_fold_fi_non_stack_soffset
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: false
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
frameOffsetReg: '$sgpr32'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
|
|
; GCN-LABEL: name: function_no_fold_fi_non_stack_soffset
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
|
; GCN: S_ENDPGM 0, implicit $vgpr0
|
|
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
|
|
BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
%2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %2
|
|
S_ENDPGM 0, implicit $vgpr0
|
|
|
|
...
|
|
|
|
---
|
|
name: function_fold_fi_mubuf_wave_relative
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: false
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
frameOffsetReg: '$sgpr32'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
|
|
; GCN-LABEL: name: function_fold_fi_mubuf_wave_relative
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
|
; GCN: S_ENDPGM 0, implicit $vgpr0
|
|
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
|
|
BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
%2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %2
|
|
S_ENDPGM 0, implicit $vgpr0
|
|
|
|
...
|
|
|
|
---
|
|
name: function_fold_fi_mubuf_stack_relative
|
|
tracksRegLiveness: true
|
|
frameInfo:
|
|
maxAlignment: 4
|
|
localFrameSize: 4
|
|
stack:
|
|
- { id: 0, size: 4, alignment: 4, local-offset: 0 }
|
|
machineFunctionInfo:
|
|
isEntryFunction: false
|
|
scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
|
frameOffsetReg: '$sgpr32'
|
|
stackPtrOffsetReg: '$sgpr32'
|
|
body: |
|
|
bb.0:
|
|
|
|
; GCN-LABEL: name: function_fold_fi_mubuf_stack_relative
|
|
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
|
; GCN: S_ENDPGM 0, implicit $vgpr0
|
|
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
|
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
|
|
|
BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
%2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
|
$vgpr0 = COPY %2
|
|
S_ENDPGM 0, implicit $vgpr0
|
|
|
|
...
|