You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

163 lines
11 KiB

; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(uint node_ptr, float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, uint4 texture_descr)
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(uint node_ptr, float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, uint4 texture_descr)
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(ulong node_ptr, float ray_extent, float4 ray_origin, float4 ray_dir, float4 ray_inv_dir, uint4 texture_descr)
; uint4 llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(ulong node_ptr, float ray_extent, float4 ray_origin, half4 ray_dir, half4 ray_inv_dir, uint4 texture_descr)
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32, float, <4 x float>, <4 x float>, <4 x float>, <4 x i32>)
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32, float, <4 x float>, <4 x half>, <4 x half>, <4 x i32>)
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64, float, <4 x float>, <4 x float>, <4 x float>, <4 x i32>)
declare <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64, float, <4 x float>, <4 x half>, <4 x half>, <4 x i32>)
; GCN-LABEL: {{^}}image_bvh_intersect_ray:
; GCN: image_bvh_intersect_ray v[0:3], v[0:15], s[0:3]{{$}}
; Arguments are flattened to represent the actual VGPR_A layout, so we have no
; extra moves in the generated kernel.
define amdgpu_ps <4 x float> @image_bvh_intersect_ray(i32 %node_ptr, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
main_body:
%ray_origin0 = insertelement <4 x float> undef, float %ray_origin_x, i32 0
%ray_origin1 = insertelement <4 x float> %ray_origin0, float %ray_origin_y, i32 1
%ray_origin = insertelement <4 x float> %ray_origin1, float %ray_origin_z, i32 2
%ray_dir0 = insertelement <4 x float> undef, float %ray_dir_x, i32 0
%ray_dir1 = insertelement <4 x float> %ray_dir0, float %ray_dir_y, i32 1
%ray_dir = insertelement <4 x float> %ray_dir1, float %ray_dir_z, i32 2
%ray_inv_dir0 = insertelement <4 x float> undef, float %ray_inv_dir_x, i32 0
%ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float %ray_inv_dir_y, i32 1
%ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float %ray_inv_dir_z, i32 2
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
%r = bitcast <4 x i32> %v to <4 x float>
ret <4 x float> %r
}
; GCN-LABEL: {{^}}image_bvh_intersect_ray_a16:
; GCN: image_bvh_intersect_ray v[0:3], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}}
define amdgpu_ps <4 x float> @image_bvh_intersect_ray_a16(i32 inreg %node_ptr, float inreg %ray_extent, <4 x float> inreg %ray_origin, <4 x half> inreg %ray_dir, <4 x half> inreg %ray_inv_dir, <4 x i32> inreg %tdescr) {
main_body:
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
%r = bitcast <4 x i32> %v to <4 x float>
ret <4 x float> %r
}
; GCN-LABEL: {{^}}image_bvh64_intersect_ray:
; GCN: image_bvh64_intersect_ray v[0:3], v[0:15], s[0:3]{{$}}
; Arguments are flattened to represent the actual VGPR_A layout, so we have no
; extra moves in the generated kernel.
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray(<2 x i32> %node_ptr_vec, float %ray_extent, float %ray_origin_x, float %ray_origin_y, float %ray_origin_z, float %ray_dir_x, float %ray_dir_y, float %ray_dir_z, float %ray_inv_dir_x, float %ray_inv_dir_y, float %ray_inv_dir_z, <4 x i32> inreg %tdescr) {
main_body:
%node_ptr = bitcast <2 x i32> %node_ptr_vec to i64
%ray_origin0 = insertelement <4 x float> undef, float %ray_origin_x, i32 0
%ray_origin1 = insertelement <4 x float> %ray_origin0, float %ray_origin_y, i32 1
%ray_origin = insertelement <4 x float> %ray_origin1, float %ray_origin_z, i32 2
%ray_dir0 = insertelement <4 x float> undef, float %ray_dir_x, i32 0
%ray_dir1 = insertelement <4 x float> %ray_dir0, float %ray_dir_y, i32 1
%ray_dir = insertelement <4 x float> %ray_dir1, float %ray_dir_z, i32 2
%ray_inv_dir0 = insertelement <4 x float> undef, float %ray_inv_dir_x, i32 0
%ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float %ray_inv_dir_y, i32 1
%ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float %ray_inv_dir_z, i32 2
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
%r = bitcast <4 x i32> %v to <4 x float>
ret <4 x float> %r
}
; GCN-LABEL: {{^}}image_bvh64_intersect_ray_a16:
; GCN: image_bvh64_intersect_ray v[0:3], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}}
define amdgpu_ps <4 x float> @image_bvh64_intersect_ray_a16(i64 inreg %node_ptr, float inreg %ray_extent, <4 x float> inreg %ray_origin, <4 x half> inreg %ray_dir, <4 x half> inreg %ray_inv_dir, <4 x i32> inreg %tdescr) {
main_body:
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
%r = bitcast <4 x i32> %v to <4 x float>
ret <4 x float> %r
}
; TODO: NSA reassign is very limited and cannot work with VGPR tuples and subregs.
; GCN-LABEL: {{^}}image_bvh_intersect_ray_nsa_reassign:
; GCN: image_bvh_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
define amdgpu_kernel void @image_bvh_intersect_ray_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) {
main_body:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep_node_ptr = getelementptr inbounds i32, i32* %p_node_ptr, i32 %lid
%node_ptr = load i32, i32* %gep_node_ptr, align 4
%gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
%ray_extent = load float, float* %gep_ray, align 4
%ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0
%ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1
%ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2
%ray_dir0 = insertelement <4 x float> undef, float 3.0, i32 0
%ray_dir1 = insertelement <4 x float> %ray_dir0, float 4.0, i32 1
%ray_dir = insertelement <4 x float> %ray_dir1, float 5.0, i32 2
%ray_inv_dir0 = insertelement <4 x float> undef, float 6.0, i32 0
%ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float 7.0, i32 1
%ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float 8.0, i32 2
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f32(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
store <4 x i32> %v, <4 x i32>* undef
ret void
}
; GCN-LABEL: {{^}}image_bvh_intersect_ray_a16_nsa_reassign:
; GCN: image_bvh_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}}
define amdgpu_kernel void @image_bvh_intersect_ray_a16_nsa_reassign(i32* %p_node_ptr, float* %p_ray, <4 x i32> inreg %tdescr) {
main_body:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep_node_ptr = getelementptr inbounds i32, i32* %p_node_ptr, i32 %lid
%node_ptr = load i32, i32* %gep_node_ptr, align 4
%gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
%ray_extent = load float, float* %gep_ray, align 4
%ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0
%ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1
%ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2
%ray_dir0 = insertelement <4 x half> undef, half 3.0, i32 0
%ray_dir1 = insertelement <4 x half> %ray_dir0, half 4.0, i32 1
%ray_dir = insertelement <4 x half> %ray_dir1, half 5.0, i32 2
%ray_inv_dir0 = insertelement <4 x half> undef, half 6.0, i32 0
%ray_inv_dir1 = insertelement <4 x half> %ray_inv_dir0, half 7.0, i32 1
%ray_inv_dir = insertelement <4 x half> %ray_inv_dir1, half 8.0, i32 2
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i32.v4f16(i32 %node_ptr, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
store <4 x i32> %v, <4 x i32>* undef
ret void
}
; GCN-LABEL: {{^}}image_bvh64_intersect_ray_nsa_reassign:
; GCN: image_bvh64_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}]{{$}}
define amdgpu_kernel void @image_bvh64_intersect_ray_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) {
main_body:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
%ray_extent = load float, float* %gep_ray, align 4
%ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0
%ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1
%ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2
%ray_dir0 = insertelement <4 x float> undef, float 3.0, i32 0
%ray_dir1 = insertelement <4 x float> %ray_dir0, float 4.0, i32 1
%ray_dir = insertelement <4 x float> %ray_dir1, float 5.0, i32 2
%ray_inv_dir0 = insertelement <4 x float> undef, float 6.0, i32 0
%ray_inv_dir1 = insertelement <4 x float> %ray_inv_dir0, float 7.0, i32 1
%ray_inv_dir = insertelement <4 x float> %ray_inv_dir1, float 8.0, i32 2
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f32(i64 1111111111111, float %ray_extent, <4 x float> %ray_origin, <4 x float> %ray_dir, <4 x float> %ray_inv_dir, <4 x i32> %tdescr)
store <4 x i32> %v, <4 x i32>* undef
ret void
}
; GCN-LABEL: {{^}}image_bvh64_intersect_ray_a16_nsa_reassign:
; GCN: image_bvh64_intersect_ray v[{{[0-9:]+}}], v[{{[0-9:]+}}], s[{{[0-9:]+}}] a16{{$}}
define amdgpu_kernel void @image_bvh64_intersect_ray_a16_nsa_reassign(float* %p_ray, <4 x i32> inreg %tdescr) {
main_body:
%lid = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep_ray = getelementptr inbounds float, float* %p_ray, i32 %lid
%ray_extent = load float, float* %gep_ray, align 4
%ray_origin0 = insertelement <4 x float> undef, float 0.0, i32 0
%ray_origin1 = insertelement <4 x float> %ray_origin0, float 1.0, i32 1
%ray_origin = insertelement <4 x float> %ray_origin1, float 2.0, i32 2
%ray_dir0 = insertelement <4 x half> undef, half 3.0, i32 0
%ray_dir1 = insertelement <4 x half> %ray_dir0, half 4.0, i32 1
%ray_dir = insertelement <4 x half> %ray_dir1, half 5.0, i32 2
%ray_inv_dir0 = insertelement <4 x half> undef, half 6.0, i32 0
%ray_inv_dir1 = insertelement <4 x half> %ray_inv_dir0, half 7.0, i32 1
%ray_inv_dir = insertelement <4 x half> %ray_inv_dir1, half 8.0, i32 2
%v = call <4 x i32> @llvm.amdgcn.image.bvh.intersect.ray.i64.v4f16(i64 1111111111110, float %ray_extent, <4 x float> %ray_origin, <4 x half> %ray_dir, <4 x half> %ray_inv_dir, <4 x i32> %tdescr)
store <4 x i32> %v, <4 x i32>* undef
ret void
}
declare i32 @llvm.amdgcn.workitem.id.x()