You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

212 lines
5.8 KiB

; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
; CHECK: vpaddq %ymm
define <4 x i64> @test_vpaddq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
%x = add <4 x i64> %i, %j
ret <4 x i64> %x
}
; CHECK: vpaddd %ymm
define <8 x i32> @test_vpaddd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
%x = add <8 x i32> %i, %j
ret <8 x i32> %x
}
; CHECK: vpaddw %ymm
define <16 x i16> @test_vpaddw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
%x = add <16 x i16> %i, %j
ret <16 x i16> %x
}
; CHECK: vpaddb %ymm
define <32 x i8> @test_vpaddb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
%x = add <32 x i8> %i, %j
ret <32 x i8> %x
}
; CHECK: vpsubq %ymm
define <4 x i64> @test_vpsubq(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
%x = sub <4 x i64> %i, %j
ret <4 x i64> %x
}
; CHECK: vpsubd %ymm
define <8 x i32> @test_vpsubd(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
%x = sub <8 x i32> %i, %j
ret <8 x i32> %x
}
; CHECK: vpsubw %ymm
define <16 x i16> @test_vpsubw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
%x = sub <16 x i16> %i, %j
ret <16 x i16> %x
}
; CHECK: vpsubb %ymm
define <32 x i8> @test_vpsubb(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
%x = sub <32 x i8> %i, %j
ret <32 x i8> %x
}
; CHECK: vpmulld %ymm
define <8 x i32> @test_vpmulld(<8 x i32> %i, <8 x i32> %j) nounwind readnone {
%x = mul <8 x i32> %i, %j
ret <8 x i32> %x
}
; CHECK: vpmullw %ymm
define <16 x i16> @test_vpmullw(<16 x i16> %i, <16 x i16> %j) nounwind readnone {
%x = mul <16 x i16> %i, %j
ret <16 x i16> %x
}
; CHECK: mul-v16i8
; CHECK: # BB#0:
; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1
; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpshufb %xmm2, %xmm1, %xmm1
; CHECK-NEXT: vpshufb %xmm2, %xmm0, %xmm0
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vzeroupper
; CHECK-NEXT: retq
define <16 x i8> @mul-v16i8(<16 x i8> %i, <16 x i8> %j) nounwind readnone {
%x = mul <16 x i8> %i, %j
ret <16 x i8> %x
}
; CHECK: mul-v32i8
; CHECK: # BB#0:
; CHECK-NEXT: vextracti128 $1, %ymm1, %xmm2
; CHECK-NEXT: vpmovsxbw %xmm2, %ymm2
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm3
; CHECK-NEXT: vpmovsxbw %xmm3, %ymm3
; CHECK-NEXT: vpmullw %ymm2, %ymm3, %ymm2
; CHECK-NEXT: vextracti128 $1, %ymm2, %xmm3
; CHECK-NEXT: vmovdqa {{.*#+}} xmm4 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u>
; CHECK-NEXT: vpshufb %xmm4, %xmm3, %xmm3
; CHECK-NEXT: vpshufb %xmm4, %xmm2, %xmm2
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
; CHECK-NEXT: vpmovsxbw %xmm1, %ymm1
; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0
; CHECK-NEXT: vpmullw %ymm1, %ymm0, %ymm0
; CHECK-NEXT: vextracti128 $1, %ymm0, %xmm1
; CHECK-NEXT: vpshufb %xmm4, %xmm1, %xmm1
; CHECK-NEXT: vpshufb %xmm4, %xmm0, %xmm0
; CHECK-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
; CHECK-NEXT: retq
define <32 x i8> @mul-v32i8(<32 x i8> %i, <32 x i8> %j) nounwind readnone {
%x = mul <32 x i8> %i, %j
ret <32 x i8> %x
}
; CHECK: mul-v4i64
; CHECK: vpmuludq %ymm
; CHECK-NEXT: vpsrlq $32, %ymm
; CHECK-NEXT: vpmuludq %ymm
; CHECK-NEXT: vpsllq $32, %ymm
; CHECK-NEXT: vpaddq %ymm
; CHECK-NEXT: vpsrlq $32, %ymm
; CHECK-NEXT: vpmuludq %ymm
; CHECK-NEXT: vpsllq $32, %ymm
; CHECK-NEXT: vpaddq %ymm
define <4 x i64> @mul-v4i64(<4 x i64> %i, <4 x i64> %j) nounwind readnone {
%x = mul <4 x i64> %i, %j
ret <4 x i64> %x
}
; CHECK: mul_const1
; CHECK: vpaddd
; CHECK: ret
define <8 x i32> @mul_const1(<8 x i32> %x) {
%y = mul <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
ret <8 x i32> %y
}
; CHECK: mul_const2
; CHECK: vpsllq $2
; CHECK: ret
define <4 x i64> @mul_const2(<4 x i64> %x) {
%y = mul <4 x i64> %x, <i64 4, i64 4, i64 4, i64 4>
ret <4 x i64> %y
}
; CHECK: mul_const3
; CHECK: vpsllw $3
; CHECK: ret
define <16 x i16> @mul_const3(<16 x i16> %x) {
%y = mul <16 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
ret <16 x i16> %y
}
; CHECK: mul_const4
; CHECK: vpxor
; CHECK: vpsubq
; CHECK: ret
define <4 x i64> @mul_const4(<4 x i64> %x) {
%y = mul <4 x i64> %x, <i64 -1, i64 -1, i64 -1, i64 -1>
ret <4 x i64> %y
}
; CHECK: mul_const5
; CHECK: vxorps
; CHECK-NEXT: ret
define <8 x i32> @mul_const5(<8 x i32> %x) {
%y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i32> %y
}
; CHECK: mul_const6
; CHECK: vpmulld
; CHECK: ret
define <8 x i32> @mul_const6(<8 x i32> %x) {
%y = mul <8 x i32> %x, <i32 0, i32 0, i32 0, i32 2, i32 0, i32 2, i32 0, i32 0>
ret <8 x i32> %y
}
; CHECK: mul_const7
; CHECK: vpaddq
; CHECK: vpaddq
; CHECK: ret
define <8 x i64> @mul_const7(<8 x i64> %x) {
%y = mul <8 x i64> %x, <i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2, i64 2>
ret <8 x i64> %y
}
; CHECK: mul_const8
; CHECK: vpsllw $3
; CHECK: ret
define <8 x i16> @mul_const8(<8 x i16> %x) {
%y = mul <8 x i16> %x, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
ret <8 x i16> %y
}
; CHECK: mul_const9
; CHECK: vpmulld
; CHECK: ret
define <8 x i32> @mul_const9(<8 x i32> %x) {
%y = mul <8 x i32> %x, <i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
ret <8 x i32> %y
}
; CHECK: mul_const10
; CHECK: vpmulld
; CHECK: ret
define <4 x i32> @mul_const10(<4 x i32> %x) {
; %x * 0x01010101
%m = mul <4 x i32> %x, <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
ret <4 x i32> %m
}
; CHECK: mul_const11
; CHECK: vpmulld
; CHECK: ret
define <4 x i32> @mul_const11(<4 x i32> %x) {
; %x * 0x80808080
%m = mul <4 x i32> %x, <i32 2155905152, i32 2155905152, i32 2155905152, i32 2155905152>
ret <4 x i32> %m
}