You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1039 lines
24 KiB
1039 lines
24 KiB
/*
|
|
* Copyright (C) 2014 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include "asm_support_x86.S"
|
|
|
|
#define MEMCMP __memcmp16
|
|
|
|
/* int32_t memcmp16_compare(const uint16_t* s0, const uint16_t* s1, size_t count); */
|
|
|
|
#ifndef L
|
|
# define L(label) .L##label
|
|
#endif
|
|
|
|
#define CFI_PUSH(REG) \
|
|
CFI_ADJUST_CFA_OFFSET(4); \
|
|
CFI_REL_OFFSET(REG, 0)
|
|
|
|
#define CFI_POP(REG) \
|
|
CFI_ADJUST_CFA_OFFSET(-4); \
|
|
CFI_RESTORE(REG)
|
|
|
|
#define PUSH(REG) pushl REG; CFI_PUSH (REG)
|
|
#define POP(REG) popl REG; CFI_POP (REG)
|
|
|
|
#define PARMS 4
|
|
#define BLK1 PARMS
|
|
#define BLK2 BLK1+4
|
|
#define LEN BLK2+4
|
|
#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret
|
|
#define RETURN RETURN_END; CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16); CFI_REMEMBER_STATE
|
|
|
|
DEFINE_FUNCTION MEMCMP
|
|
movl LEN(%esp), %ecx
|
|
|
|
shl $1, %ecx
|
|
jz L(zero)
|
|
|
|
movl BLK1(%esp), %eax
|
|
cmp $48, %ecx
|
|
movl BLK2(%esp), %edx
|
|
jae L(48bytesormore)
|
|
|
|
PUSH (%ebx)
|
|
add %ecx, %edx
|
|
add %ecx, %eax
|
|
jmp L(less48bytes)
|
|
|
|
CFI_POP (%ebx)
|
|
|
|
.p2align 4
|
|
L(zero):
|
|
xor %eax, %eax
|
|
ret
|
|
|
|
.p2align 4
|
|
L(48bytesormore):
|
|
PUSH (%ebx)
|
|
PUSH (%esi)
|
|
PUSH (%edi)
|
|
CFI_REMEMBER_STATE
|
|
movdqu (%eax), %xmm3
|
|
movdqu (%edx), %xmm0
|
|
movl %eax, %edi
|
|
movl %edx, %esi
|
|
pcmpeqb %xmm0, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 16(%edi), %edi
|
|
|
|
sub $0xffff, %edx
|
|
lea 16(%esi), %esi
|
|
jnz L(less16bytes)
|
|
mov %edi, %edx
|
|
and $0xf, %edx
|
|
xor %edx, %edi
|
|
sub %edx, %esi
|
|
add %edx, %ecx
|
|
mov %esi, %edx
|
|
and $0xf, %edx
|
|
jz L(shr_0)
|
|
xor %edx, %esi
|
|
|
|
cmp $0, %edx
|
|
je L(shr_0)
|
|
cmp $2, %edx
|
|
je L(shr_2)
|
|
cmp $4, %edx
|
|
je L(shr_4)
|
|
cmp $6, %edx
|
|
je L(shr_6)
|
|
cmp $8, %edx
|
|
je L(shr_8)
|
|
cmp $10, %edx
|
|
je L(shr_10)
|
|
cmp $12, %edx
|
|
je L(shr_12)
|
|
jmp L(shr_14)
|
|
|
|
.p2align 4
|
|
L(shr_0):
|
|
cmp $80, %ecx
|
|
jae L(shr_0_gobble)
|
|
lea -48(%ecx), %ecx
|
|
xor %eax, %eax
|
|
movaps (%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
movaps 16(%esi), %xmm2
|
|
pcmpeqb 16(%edi), %xmm2
|
|
pand %xmm1, %xmm2
|
|
pmovmskb %xmm2, %edx
|
|
add $32, %edi
|
|
add $32, %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea (%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_0_gobble):
|
|
lea -48(%ecx), %ecx
|
|
movdqa (%esi), %xmm0
|
|
xor %eax, %eax
|
|
pcmpeqb (%edi), %xmm0
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm2
|
|
pcmpeqb 16(%edi), %xmm2
|
|
L(shr_0_gobble_loop):
|
|
pand %xmm0, %xmm2
|
|
sub $32, %ecx
|
|
pmovmskb %xmm2, %edx
|
|
movdqa %xmm0, %xmm1
|
|
movdqa 32(%esi), %xmm0
|
|
movdqa 48(%esi), %xmm2
|
|
sbb $0xffff, %edx
|
|
pcmpeqb 32(%edi), %xmm0
|
|
pcmpeqb 48(%edi), %xmm2
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
jz L(shr_0_gobble_loop)
|
|
|
|
pand %xmm0, %xmm2
|
|
cmp $0, %ecx
|
|
jge L(shr_0_gobble_loop_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_0_gobble_loop_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm2, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea (%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_2):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_2_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $2,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $2,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 2(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_2_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $2,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $2,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_2_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $2,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $2,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_2_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_2_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_2_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 2(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_4):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_4_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $4,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $4,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 4(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_4_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $4,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $4,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_4_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $4,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $4,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_4_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_4_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_4_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 4(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_6):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_6_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $6,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $6,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 6(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_6_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $6,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $6,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_6_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $6,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $6,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_6_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_6_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_6_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 6(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_8):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_8_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $8,(%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $8,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 8(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_8_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $8,(%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $8,16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_8_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $8,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $8,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_8_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_8_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_8_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 8(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_10):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_10_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $10, (%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $10,%xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 10(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_10_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $10, (%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $10, 16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_10_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $10,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $10,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_10_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_10_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_10_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 10(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_12):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_12_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $12, (%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $12, %xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 12(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_12_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $12, (%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $12, 16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_12_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $12,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $12,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_12_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_12_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_12_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 12(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_14):
|
|
cmp $80, %ecx
|
|
lea -48(%ecx), %ecx
|
|
mov %edx, %eax
|
|
jae L(shr_14_gobble)
|
|
|
|
movdqa 16(%esi), %xmm1
|
|
movdqa %xmm1, %xmm2
|
|
palignr $14, (%esi), %xmm1
|
|
pcmpeqb (%edi), %xmm1
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $14, %xmm2, %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
pand %xmm1, %xmm3
|
|
pmovmskb %xmm3, %edx
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 14(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(shr_14_gobble):
|
|
sub $32, %ecx
|
|
movdqa 16(%esi), %xmm0
|
|
palignr $14, (%esi), %xmm0
|
|
pcmpeqb (%edi), %xmm0
|
|
|
|
movdqa 32(%esi), %xmm3
|
|
palignr $14, 16(%esi), %xmm3
|
|
pcmpeqb 16(%edi), %xmm3
|
|
|
|
L(shr_14_gobble_loop):
|
|
pand %xmm0, %xmm3
|
|
sub $32, %ecx
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
|
|
movdqa 64(%esi), %xmm3
|
|
palignr $14,48(%esi), %xmm3
|
|
sbb $0xffff, %edx
|
|
movdqa 48(%esi), %xmm0
|
|
palignr $14,32(%esi), %xmm0
|
|
pcmpeqb 32(%edi), %xmm0
|
|
lea 32(%esi), %esi
|
|
pcmpeqb 48(%edi), %xmm3
|
|
|
|
lea 32(%edi), %edi
|
|
jz L(shr_14_gobble_loop)
|
|
pand %xmm0, %xmm3
|
|
|
|
cmp $0, %ecx
|
|
jge L(shr_14_gobble_next)
|
|
inc %edx
|
|
add $32, %ecx
|
|
L(shr_14_gobble_next):
|
|
test %edx, %edx
|
|
jnz L(exit)
|
|
|
|
pmovmskb %xmm3, %edx
|
|
movdqa %xmm0, %xmm1
|
|
lea 32(%edi), %edi
|
|
lea 32(%esi), %esi
|
|
sub $0xffff, %edx
|
|
jnz L(exit)
|
|
|
|
lea (%ecx, %edi,1), %eax
|
|
lea 14(%ecx, %esi,1), %edx
|
|
POP (%edi)
|
|
POP (%esi)
|
|
jmp L(less48bytes)
|
|
|
|
CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16)
|
|
CFI_REMEMBER_STATE
|
|
.p2align 4
|
|
L(exit):
|
|
pmovmskb %xmm1, %ebx
|
|
sub $0xffff, %ebx
|
|
jz L(first16bytes)
|
|
lea -16(%esi), %esi
|
|
lea -16(%edi), %edi
|
|
mov %ebx, %edx
|
|
|
|
L(first16bytes):
|
|
add %eax, %esi
|
|
L(less16bytes):
|
|
test %dl, %dl
|
|
jz L(next_four_words)
|
|
test $15, %dl
|
|
jz L(second_two_words)
|
|
test $3, %dl
|
|
jz L(second_word)
|
|
movzwl -16(%edi), %eax
|
|
movzwl -16(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(second_word):
|
|
movzwl -14(%edi), %eax
|
|
movzwl -14(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(second_two_words):
|
|
test $63, %dl
|
|
jz L(fourth_word)
|
|
movzwl -12(%edi), %eax
|
|
movzwl -12(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(fourth_word):
|
|
movzwl -10(%edi), %eax
|
|
movzwl -10(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(next_four_words):
|
|
test $15, %dh
|
|
jz L(fourth_two_words)
|
|
test $3, %dh
|
|
jz L(sixth_word)
|
|
movzwl -8(%edi), %eax
|
|
movzwl -8(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(sixth_word):
|
|
movzwl -6(%edi), %eax
|
|
movzwl -6(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(fourth_two_words):
|
|
test $63, %dh
|
|
jz L(eighth_word)
|
|
movzwl -4(%edi), %eax
|
|
movzwl -4(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
.p2align 4
|
|
L(eighth_word):
|
|
movzwl -2(%edi), %eax
|
|
movzwl -2(%esi), %ebx
|
|
subl %ebx, %eax
|
|
RETURN
|
|
|
|
|
|
CFI_PUSH (%ebx)
|
|
|
|
.p2align 4
|
|
L(more8bytes):
|
|
cmp $16, %ecx
|
|
jae L(more16bytes)
|
|
cmp $8, %ecx
|
|
je L(8bytes)
|
|
cmp $10, %ecx
|
|
je L(10bytes)
|
|
cmp $12, %ecx
|
|
je L(12bytes)
|
|
jmp L(14bytes)
|
|
|
|
.p2align 4
|
|
L(more16bytes):
|
|
cmp $24, %ecx
|
|
jae L(more24bytes)
|
|
cmp $16, %ecx
|
|
je L(16bytes)
|
|
cmp $18, %ecx
|
|
je L(18bytes)
|
|
cmp $20, %ecx
|
|
je L(20bytes)
|
|
jmp L(22bytes)
|
|
|
|
.p2align 4
|
|
L(more24bytes):
|
|
cmp $32, %ecx
|
|
jae L(more32bytes)
|
|
cmp $24, %ecx
|
|
je L(24bytes)
|
|
cmp $26, %ecx
|
|
je L(26bytes)
|
|
cmp $28, %ecx
|
|
je L(28bytes)
|
|
jmp L(30bytes)
|
|
|
|
.p2align 4
|
|
L(more32bytes):
|
|
cmp $40, %ecx
|
|
jae L(more40bytes)
|
|
cmp $32, %ecx
|
|
je L(32bytes)
|
|
cmp $34, %ecx
|
|
je L(34bytes)
|
|
cmp $36, %ecx
|
|
je L(36bytes)
|
|
jmp L(38bytes)
|
|
|
|
.p2align 4
|
|
L(less48bytes):
|
|
cmp $8, %ecx
|
|
jae L(more8bytes)
|
|
cmp $2, %ecx
|
|
je L(2bytes)
|
|
cmp $4, %ecx
|
|
je L(4bytes)
|
|
jmp L(6bytes)
|
|
|
|
.p2align 4
|
|
L(more40bytes):
|
|
cmp $40, %ecx
|
|
je L(40bytes)
|
|
cmp $42, %ecx
|
|
je L(42bytes)
|
|
cmp $44, %ecx
|
|
je L(44bytes)
|
|
jmp L(46bytes)
|
|
|
|
.p2align 4
|
|
L(46bytes):
|
|
movzwl -46(%eax), %ecx
|
|
movzwl -46(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(44bytes):
|
|
movzwl -44(%eax), %ecx
|
|
movzwl -44(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(42bytes):
|
|
movzwl -42(%eax), %ecx
|
|
movzwl -42(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(40bytes):
|
|
movzwl -40(%eax), %ecx
|
|
movzwl -40(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(38bytes):
|
|
movzwl -38(%eax), %ecx
|
|
movzwl -38(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(36bytes):
|
|
movzwl -36(%eax), %ecx
|
|
movzwl -36(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(34bytes):
|
|
movzwl -34(%eax), %ecx
|
|
movzwl -34(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(32bytes):
|
|
movzwl -32(%eax), %ecx
|
|
movzwl -32(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(30bytes):
|
|
movzwl -30(%eax), %ecx
|
|
movzwl -30(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(28bytes):
|
|
movzwl -28(%eax), %ecx
|
|
movzwl -28(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(26bytes):
|
|
movzwl -26(%eax), %ecx
|
|
movzwl -26(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(24bytes):
|
|
movzwl -24(%eax), %ecx
|
|
movzwl -24(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(22bytes):
|
|
movzwl -22(%eax), %ecx
|
|
movzwl -22(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(20bytes):
|
|
movzwl -20(%eax), %ecx
|
|
movzwl -20(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(18bytes):
|
|
movzwl -18(%eax), %ecx
|
|
movzwl -18(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(16bytes):
|
|
movzwl -16(%eax), %ecx
|
|
movzwl -16(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(14bytes):
|
|
movzwl -14(%eax), %ecx
|
|
movzwl -14(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(12bytes):
|
|
movzwl -12(%eax), %ecx
|
|
movzwl -12(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(10bytes):
|
|
movzwl -10(%eax), %ecx
|
|
movzwl -10(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(8bytes):
|
|
movzwl -8(%eax), %ecx
|
|
movzwl -8(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(6bytes):
|
|
movzwl -6(%eax), %ecx
|
|
movzwl -6(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(4bytes):
|
|
movzwl -4(%eax), %ecx
|
|
movzwl -4(%edx), %ebx
|
|
subl %ebx, %ecx
|
|
jne L(memcmp16_exit)
|
|
L(2bytes):
|
|
movzwl -2(%eax), %eax
|
|
movzwl -2(%edx), %ebx
|
|
subl %ebx, %eax
|
|
POP (%ebx)
|
|
ret
|
|
CFI_PUSH (%ebx)
|
|
|
|
.p2align 4
|
|
L(memcmp16_exit):
|
|
POP (%ebx)
|
|
mov %ecx, %eax
|
|
ret
|
|
END_FUNCTION MEMCMP
|