You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
149 lines
4.0 KiB
149 lines
4.0 KiB
/*
|
|
Copyright (C) 2019 The Android Open Source Project
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions
|
|
are met:
|
|
* Redistributions of source code must retain the above copyright
|
|
notice, this list of conditions and the following disclaimer.
|
|
* Redistributions in binary form must reproduce the above copyright
|
|
notice, this list of conditions and the following disclaimer in
|
|
the documentation and/or other materials provided with the
|
|
distribution.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
|
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
|
|
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
|
BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
|
|
OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
|
|
AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
|
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
|
OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <private/bionic_asm.h>
|
|
|
|
#ifndef WMEMSET
|
|
#define WMEMSET wmemset_avx2
|
|
#endif
|
|
|
|
ENTRY(WMEMSET)
|
|
# BB#0:
|
|
pushl %ebp
|
|
pushl %ebx
|
|
pushl %edi
|
|
pushl %esi
|
|
pushl %eax
|
|
movl 32(%esp), %ecx
|
|
movl 24(%esp), %eax
|
|
testl %ecx, %ecx
|
|
je .LBB0_12
|
|
# BB#1:
|
|
movl 28(%esp), %edx
|
|
xorl %edi, %edi
|
|
movl %eax, %esi
|
|
cmpl $32, %ecx
|
|
jb .LBB0_10
|
|
# BB#2:
|
|
movl %ecx, %eax
|
|
andl $-32, %eax
|
|
vmovd %edx, %xmm0
|
|
vpbroadcastd %xmm0, %ymm0
|
|
movl %eax, (%esp) # 4-byte Spill
|
|
leal -32(%eax), %esi
|
|
movl %esi, %eax
|
|
shrl $5, %eax
|
|
leal 1(%eax), %edi
|
|
andl $7, %edi
|
|
xorl %ebx, %ebx
|
|
cmpl $224, %esi
|
|
jb .LBB0_5
|
|
# BB#3:
|
|
movl 24(%esp), %esi
|
|
leal 992(%esi), %ebp
|
|
leal -1(%edi), %esi
|
|
subl %eax, %esi
|
|
xorl %ebx, %ebx
|
|
.p2align 4, 0x90
|
|
.LBB0_4: # =>This Inner Loop Header: Depth=1
|
|
vmovdqu %ymm0, -992(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -960(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -928(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -896(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -864(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -832(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -800(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -768(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -736(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -704(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -672(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -640(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -608(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -576(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -544(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -512(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -480(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -448(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -416(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -384(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -352(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -320(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -288(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -256(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -224(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -192(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -160(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -128(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -96(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -64(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, -32(%ebp,%ebx,4)
|
|
vmovdqu %ymm0, (%ebp,%ebx,4)
|
|
addl $256, %ebx # imm = 0x100
|
|
addl $8, %esi
|
|
jne .LBB0_4
|
|
.LBB0_5:
|
|
testl %edi, %edi
|
|
movl 24(%esp), %eax
|
|
je .LBB0_8
|
|
# BB#6:
|
|
leal (%eax,%ebx,4), %esi
|
|
addl $96, %esi
|
|
negl %edi
|
|
.p2align 4, 0x90
|
|
.LBB0_7: # =>This Inner Loop Header: Depth=1
|
|
vmovdqu %ymm0, -96(%esi)
|
|
vmovdqu %ymm0, -64(%esi)
|
|
vmovdqu %ymm0, -32(%esi)
|
|
vmovdqu %ymm0, (%esi)
|
|
subl $-128, %esi
|
|
addl $1, %edi
|
|
jne .LBB0_7
|
|
.LBB0_8:
|
|
movl (%esp), %edi # 4-byte Reload
|
|
cmpl %ecx, %edi
|
|
je .LBB0_12
|
|
# BB#9:
|
|
leal (%eax,%edi,4), %esi
|
|
.LBB0_10:
|
|
subl %edi, %ecx
|
|
.p2align 4, 0x90
|
|
.LBB0_11: # =>This Inner Loop Header: Depth=1
|
|
movl %edx, (%esi)
|
|
addl $4, %esi
|
|
addl $-1, %ecx
|
|
jne .LBB0_11
|
|
.LBB0_12:
|
|
addl $4, %esp
|
|
popl %esi
|
|
popl %edi
|
|
popl %ebx
|
|
popl %ebp
|
|
vzeroupper
|
|
retl
|
|
END(WMEMSET)
|