; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON ; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true -mattr=-neon < %s | FileCheck %s -check-prefix=NONEON ; NEON-LABEL: load_factor2: ; NEON: ld2 { v0.8b, v1.8b }, [x0] ; NONEON-LABEL: load_factor2: ; NONEON-NOT: ld2 define <8 x i8> @load_factor2(<16 x i8>* %ptr) { %wide.vec = load <16 x i8>, <16 x i8>* %ptr, align 4 %strided.v0 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> %strided.v1 = shufflevector <16 x i8> %wide.vec, <16 x i8> undef, <8 x i32> %add = add nsw <8 x i8> %strided.v0, %strided.v1 ret <8 x i8> %add } ; NEON-LABEL: load_factor3: ; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0] ; NONEON-LABEL: load_factor3: ; NONEON-NOT: ld3 define <4 x i32> @load_factor3(i32* %ptr) { %base = bitcast i32* %ptr to <12 x i32>* %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4 %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> %add = add nsw <4 x i32> %strided.v2, %strided.v1 ret <4 x i32> %add } ; NEON-LABEL: load_factor4: ; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] ; NONEON-LABEL: load_factor4: ; NONEON-NOT: ld4 define <4 x i32> @load_factor4(i32* %ptr) { %base = bitcast i32* %ptr to <16 x i32>* %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4 %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> %add = add nsw <4 x i32> %strided.v0, %strided.v2 ret <4 x i32> %add } ; NEON-LABEL: store_factor2: ; NEON: st2 { v0.8b, v1.8b }, [x0] ; NONEON-LABEL: store_factor2: ; NONEON-NOT: st2 define void @store_factor2(<16 x i8>* %ptr, <8 x i8> %v0, <8 x i8> %v1) { %interleaved.vec = shufflevector <8 x i8> %v0, <8 x i8> %v1, <16 x i32> store <16 x i8> %interleaved.vec, <16 x i8>* %ptr, align 4 ret void } ; NEON-LABEL: store_factor3: ; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0] ; NONEON-LABEL: store_factor3: ; NONEON-NOT: st3 define void @store_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { %base = bitcast i32* %ptr to <12 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4 ret void } ; NEON-LABEL: store_factor4: ; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] ; NONEON-LABEL: store_factor4: ; NONEON-NOT: st4 define void @store_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { %base = bitcast i32* %ptr to <16 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4 ret void } ; The following cases test that interleaved access of pointer vectors can be ; matched to ldN/stN instruction. ; NEON-LABEL: load_ptrvec_factor2: ; NEON: ld2 { v0.2d, v1.2d }, [x0] ; NONEON-LABEL: load_ptrvec_factor2: ; NONEON-NOT: ld2 define <2 x i32*> @load_ptrvec_factor2(i32** %ptr) { %base = bitcast i32** %ptr to <4 x i32*>* %wide.vec = load <4 x i32*>, <4 x i32*>* %base, align 4 %strided.v0 = shufflevector <4 x i32*> %wide.vec, <4 x i32*> undef, <2 x i32> ret <2 x i32*> %strided.v0 } ; NEON-LABEL: load_ptrvec_factor3: ; NEON: ld3 { v0.2d, v1.2d, v2.2d }, [x0] ; NONEON-LABEL: load_ptrvec_factor3: ; NONEON-NOT: ld3 define void @load_ptrvec_factor3(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) { %base = bitcast i32** %ptr to <6 x i32*>* %wide.vec = load <6 x i32*>, <6 x i32*>* %base, align 4 %strided.v2 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> store <2 x i32*> %strided.v2, <2 x i32*>* %ptr1 %strided.v1 = shufflevector <6 x i32*> %wide.vec, <6 x i32*> undef, <2 x i32> store <2 x i32*> %strided.v1, <2 x i32*>* %ptr2 ret void } ; NEON-LABEL: load_ptrvec_factor4: ; NEON: ld4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] ; NONEON-LABEL: load_ptrvec_factor4: ; NONEON-NOT: ld4 define void @load_ptrvec_factor4(i32** %ptr, <2 x i32*>* %ptr1, <2 x i32*>* %ptr2) { %base = bitcast i32** %ptr to <8 x i32*>* %wide.vec = load <8 x i32*>, <8 x i32*>* %base, align 4 %strided.v1 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> %strided.v3 = shufflevector <8 x i32*> %wide.vec, <8 x i32*> undef, <2 x i32> store <2 x i32*> %strided.v1, <2 x i32*>* %ptr1 store <2 x i32*> %strided.v3, <2 x i32*>* %ptr2 ret void } ; NEON-LABEL: store_ptrvec_factor2: ; NEON: st2 { v0.2d, v1.2d }, [x0] ; NONEON-LABEL: store_ptrvec_factor2: ; NONEON-NOT: st2 define void @store_ptrvec_factor2(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1) { %base = bitcast i32** %ptr to <4 x i32*>* %interleaved.vec = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> store <4 x i32*> %interleaved.vec, <4 x i32*>* %base, align 4 ret void } ; NEON-LABEL: store_ptrvec_factor3: ; NEON: st3 { v0.2d, v1.2d, v2.2d }, [x0] ; NONEON-LABEL: store_ptrvec_factor3: ; NONEON-NOT: st3 define void @store_ptrvec_factor3(i32** %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2) { %base = bitcast i32** %ptr to <6 x i32*>* %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> %v2_u = shufflevector <2 x i32*> %v2, <2 x i32*> undef, <4 x i32> %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_u, <6 x i32> store <6 x i32*> %interleaved.vec, <6 x i32*>* %base, align 4 ret void } ; NEON-LABEL: store_ptrvec_factor4: ; NEON: st4 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] ; NONEON-LABEL: store_ptrvec_factor4: ; NONEON-NOT: st4 define void @store_ptrvec_factor4(i32* %ptr, <2 x i32*> %v0, <2 x i32*> %v1, <2 x i32*> %v2, <2 x i32*> %v3) { %base = bitcast i32* %ptr to <8 x i32*>* %v0_v1 = shufflevector <2 x i32*> %v0, <2 x i32*> %v1, <4 x i32> %v2_v3 = shufflevector <2 x i32*> %v2, <2 x i32*> %v3, <4 x i32> %interleaved.vec = shufflevector <4 x i32*> %v0_v1, <4 x i32*> %v2_v3, <8 x i32> store <8 x i32*> %interleaved.vec, <8 x i32*>* %base, align 4 ret void } ; Following cases check that shuffle maskes with undef indices can be matched ; into ldN/stN instruction. ; NEON-LABEL: load_undef_mask_factor2: ; NEON: ld2 { v0.4s, v1.4s }, [x0] ; NONEON-LABEL: load_undef_mask_factor2: ; NONEON-NOT: ld2 define <4 x i32> @load_undef_mask_factor2(i32* %ptr) { %base = bitcast i32* %ptr to <8 x i32>* %wide.vec = load <8 x i32>, <8 x i32>* %base, align 4 %strided.v0 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> %strided.v1 = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> %add = add nsw <4 x i32> %strided.v0, %strided.v1 ret <4 x i32> %add } ; NEON-LABEL: load_undef_mask_factor3: ; NEON: ld3 { v0.4s, v1.4s, v2.4s }, [x0] ; NONEON-LABEL: load_undef_mask_factor3: ; NONEON-NOT: ld3 define <4 x i32> @load_undef_mask_factor3(i32* %ptr) { %base = bitcast i32* %ptr to <12 x i32>* %wide.vec = load <12 x i32>, <12 x i32>* %base, align 4 %strided.v2 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> %strided.v1 = shufflevector <12 x i32> %wide.vec, <12 x i32> undef, <4 x i32> %add = add nsw <4 x i32> %strided.v2, %strided.v1 ret <4 x i32> %add } ; NEON-LABEL: load_undef_mask_factor4: ; NEON: ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] ; NONEON-LABEL: load_undef_mask_factor4: ; NONEON-NOT: ld4 define <4 x i32> @load_undef_mask_factor4(i32* %ptr) { %base = bitcast i32* %ptr to <16 x i32>* %wide.vec = load <16 x i32>, <16 x i32>* %base, align 4 %strided.v0 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> %strided.v2 = shufflevector <16 x i32> %wide.vec, <16 x i32> undef, <4 x i32> %add = add nsw <4 x i32> %strided.v0, %strided.v2 ret <4 x i32> %add } ; NEON-LABEL: store_undef_mask_factor2: ; NEON: st2 { v0.4s, v1.4s }, [x0] ; NONEON-LABEL: store_undef_mask_factor2: ; NONEON-NOT: st2 define void @store_undef_mask_factor2(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1) { %base = bitcast i32* %ptr to <8 x i32>* %interleaved.vec = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> store <8 x i32> %interleaved.vec, <8 x i32>* %base, align 4 ret void } ; NEON-LABEL: store_undef_mask_factor3: ; NEON: st3 { v0.4s, v1.4s, v2.4s }, [x0] ; NONEON-LABEL: store_undef_mask_factor3: ; NONEON-NOT: st3 define void @store_undef_mask_factor3(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2) { %base = bitcast i32* %ptr to <12 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> %v2_u = shufflevector <4 x i32> %v2, <4 x i32> undef, <8 x i32> %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_u, <12 x i32> store <12 x i32> %interleaved.vec, <12 x i32>* %base, align 4 ret void } ; NEON-LABEL: store_undef_mask_factor4: ; NEON: st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [x0] ; NONEON-LABEL: store_undef_mask_factor4: ; NONEON-NOT: st4 define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { %base = bitcast i32* %ptr to <16 x i32>* %v0_v1 = shufflevector <4 x i32> %v0, <4 x i32> %v1, <8 x i32> %v2_v3 = shufflevector <4 x i32> %v2, <4 x i32> %v3, <8 x i32> %interleaved.vec = shufflevector <8 x i32> %v0_v1, <8 x i32> %v2_v3, <16 x i32> store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4 ret void } ; Check that we do something sane with illegal types. ; NEON-LABEL: load_illegal_factor2: ; NEON: BB#0: ; NEON-NEXT: ldr q[[V:[0-9]+]], [x0] ; NEON-NEXT: uzp1 v0.4s, v[[V]].4s, v{{.*}}.4s ; NEON-NEXT: ret ; NONEON-LABEL: load_illegal_factor2: ; NONEON: BB#0: ; NONEON-NEXT: ldr s0, [x0] ; NONEON-NEXT: ldr s1, [x0, #8] ; NONEON-NEXT: ret define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind { %tmp1 = load <3 x float>, <3 x float>* %p, align 16 %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> ret <3 x float> %tmp2 } ; NEON-LABEL: store_illegal_factor2: ; NEON: BB#0: ; NEON-NEXT: uzp1 v0.4s, v0.4s, v{{.*}}.4s ; NEON-NEXT: st1 { v0.d }[0], [x0] ; NEON-NEXT: ret ; NONEON-LABEL: store_illegal_factor2: ; NONEON: BB#0: ; NONEON-NEXT: fmov w[[ELT2:[0-9]+]], s2 ; NONEON-NEXT: fmov w[[RES:[0-9]+]], s0 ; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32 ; NONEON-NEXT: str x[[RES]], [x0] ; NONEON-NEXT: ret define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind { %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> store <3 x float> %tmp1, <3 x float>* %p, align 16 ret void } ; NEON-LABEL: load_factor2_with_extract_user: ; NEON: ld2 { v0.4s, v1.4s }, [x0] ; NEON: mov w0, v0.s[1] ; NONEON-LABEL: load_factor2_with_extract_user: ; NONEON-NOT: ld2 define i32 @load_factor2_with_extract_user(<8 x i32>* %a) { %1 = load <8 x i32>, <8 x i32>* %a, align 8 %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <4 x i32> %3 = extractelement <8 x i32> %1, i32 2 ret i32 %3 }