/* Copyright (c) 2014, Intel Corporation All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * Neither the name of Intel Corporation nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef L # define L(label) .L##label #endif #ifndef cfi_startproc # define cfi_startproc .cfi_startproc #endif #ifndef cfi_endproc # define cfi_endproc .cfi_endproc #endif #ifndef cfi_rel_offset # define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off #endif #ifndef cfi_restore # define cfi_restore(reg) .cfi_restore reg #endif #ifndef cfi_adjust_cfa_offset # define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off #endif #ifndef ENTRY # define ENTRY(name) \ .type name, @function; \ .globl name; \ .p2align 4; \ name: \ cfi_startproc #endif #ifndef END # define END(name) \ cfi_endproc; \ .size name, .-name #endif #define CFI_PUSH(REG) \ cfi_adjust_cfa_offset (4); \ cfi_rel_offset (REG, 0) #define CFI_POP(REG) \ cfi_adjust_cfa_offset (-4); \ cfi_restore (REG) #define PUSH(REG) pushl REG; CFI_PUSH (REG) #define POP(REG) popl REG; CFI_POP (REG) #ifndef STRCPY # define STRCPY strcpy_generic #endif #ifdef USE_AS_STPNCPY # define USE_AS_STRNCPY # define USE_AS_STPCPY #endif #ifdef USE_AS_STRNCPY # define PARMS 16 # define ENTRANCE PUSH(%ebx); PUSH(%esi); PUSH(%edi) # define RETURN POP(%edi); POP(%esi); POP(%ebx); ret; CFI_PUSH(%ebx); CFI_PUSH(%edi); CFI_PUSH(%edi); #else # define PARMS 12 # define ENTRANCE PUSH(%esi); PUSH(%edi) # define RETURN POP(%edi); POP(%esi); ret; CFI_PUSH(%esi); CFI_PUSH(%edi); #endif #define STR1 PARMS #define STR2 STR1+4 #define LEN STR2+4 #if (defined SHARED || defined __PIC__) # define JMPTBL(I, B) I - B /* Load an entry in a jump table into ECX and branch to it. TABLE is a jump table with relative offsets. INDEX is a register contains the index into the jump table. SCALE is the scale of INDEX. */ # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ /* We first load PC into ECX. */ \ call __x86.get_pc_thunk.cx; \ /* Get the address of the jump table. */ \ addl $(TABLE - .), %ecx; \ /* Get the entry and convert the relative offset to the \ absolute address. */ \ addl (%ecx,INDEX,SCALE), %ecx; \ /* We loaded the jump table and adjuested ECX. Go. */ \ jmp *%ecx #else # define JMPTBL(I, B) I /* Branch to an entry in a jump table. TABLE is a jump table with absolute offsets. INDEX is a register contains the index into the jump table. SCALE is the scale of INDEX. */ # define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ jmp *TABLE(,INDEX,SCALE) #endif .text ENTRY (STRCPY) ENTRANCE mov STR1(%esp), %edi mov STR2(%esp), %esi #ifdef USE_AS_STRNCPY movl LEN(%esp), %ebx test %ebx, %ebx jz L(ExitZero) #endif mov %esi, %ecx #ifndef USE_AS_STPCPY mov %edi, %eax /* save result */ #endif and $15, %ecx jz L(SourceStringAlignmentZero) and $-16, %esi pxor %xmm0, %xmm0 pxor %xmm1, %xmm1 pcmpeqb (%esi), %xmm1 #ifdef USE_AS_STRNCPY add %ecx, %ebx #endif pmovmskb %xmm1, %edx shr %cl, %edx #ifdef USE_AS_STRNCPY #ifdef USE_AS_STPCPY cmp $16, %ebx jbe L(CopyFrom1To16BytesTailCase2OrCase3) #else cmp $17, %ebx jbe L(CopyFrom1To16BytesTailCase2OrCase3) #endif #endif test %edx, %edx jnz L(CopyFrom1To16BytesTail) pcmpeqb 16(%esi), %xmm0 pmovmskb %xmm0, %edx #ifdef USE_AS_STRNCPY #ifdef USE_AS_STPCPY cmp $32, %ebx jbe L(CopyFrom1To32BytesCase2OrCase3) #else cmp $33, %ebx jbe L(CopyFrom1To32BytesCase2OrCase3) #endif #endif test %edx, %edx jnz L(CopyFrom1To32Bytes) movdqu (%esi, %ecx), %xmm1 /* copy 16 bytes */ movdqu %xmm1, (%edi) sub %ecx, %edi mov %edi, %edx mov $16, %ecx and $15, %edx jz L(Align16Both) /* If source adress alignment != destination adress alignment */ .p2align 4 L(Unalign16Both): movdqa (%esi, %ecx), %xmm1 movaps 16(%esi, %ecx), %xmm2 movdqu %xmm1, (%edi, %ecx) pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %edx add $16, %ecx #ifdef USE_AS_STRNCPY sub $48, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm2) #else test %edx, %edx jnz L(CopyFrom1To16Bytes) #endif movaps 16(%esi, %ecx), %xmm3 movdqu %xmm2, (%edi, %ecx) pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %edx add $16, %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm3) #else test %edx, %edx jnz L(CopyFrom1To16Bytes) #endif movaps 16(%esi, %ecx), %xmm4 movdqu %xmm3, (%edi, %ecx) pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %edx add $16, %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm4) #else test %edx, %edx jnz L(CopyFrom1To16Bytes) #endif movaps 16(%esi, %ecx), %xmm1 movdqu %xmm4, (%edi, %ecx) pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %edx add $16, %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm1) #else test %edx, %edx jnz L(CopyFrom1To16Bytes) #endif movaps 16(%esi, %ecx), %xmm2 movdqu %xmm1, (%edi, %ecx) pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %edx add $16, %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm2) #else test %edx, %edx jnz L(CopyFrom1To16Bytes) #endif movaps 16(%esi, %ecx), %xmm3 movdqu %xmm2, (%edi, %ecx) pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %edx add $16, %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm3) #else test %edx, %edx jnz L(CopyFrom1To16Bytes) #endif movdqu %xmm3, (%edi, %ecx) mov %esi, %edx lea 16(%esi, %ecx), %esi and $-0x40, %esi sub %esi, %edx sub %edx, %edi #ifdef USE_AS_STRNCPY lea 64+64(%ebx, %edx), %ebx #endif L(Unaligned64Loop): movaps (%esi), %xmm2 movaps %xmm2, %xmm4 movaps 16(%esi), %xmm5 movaps 32(%esi), %xmm3 movaps %xmm3, %xmm6 movaps 48(%esi), %xmm7 pminub %xmm5, %xmm2 pminub %xmm7, %xmm3 pminub %xmm2, %xmm3 pcmpeqb %xmm0, %xmm3 pmovmskb %xmm3, %edx #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(UnalignedLeaveCase2OrCase3) #endif test %edx, %edx jnz L(Unaligned64Leave) L(Unaligned64Loop_start): add $64, %edi add $64, %esi movdqu %xmm4, -64(%edi) movaps (%esi), %xmm2 movdqa %xmm2, %xmm4 movdqu %xmm5, -48(%edi) movaps 16(%esi), %xmm5 pminub %xmm5, %xmm2 movaps 32(%esi), %xmm3 movdqu %xmm6, -32(%edi) movaps %xmm3, %xmm6 movdqu %xmm7, -16(%edi) movaps 48(%esi), %xmm7 pminub %xmm7, %xmm3 pminub %xmm2, %xmm3 pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %edx #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(UnalignedLeaveCase2OrCase3) #endif test %edx, %edx jz L(Unaligned64Loop_start) L(Unaligned64Leave): pxor %xmm0, %xmm0 pxor %xmm1, %xmm1 pcmpeqb %xmm4, %xmm0 pcmpeqb %xmm5, %xmm1 pmovmskb %xmm0, %edx pmovmskb %xmm1, %ecx test %edx, %edx jnz L(CopyFrom1To16BytesUnaligned_0) test %ecx, %ecx jnz L(CopyFrom1To16BytesUnaligned_16) pcmpeqb %xmm6, %xmm0 pcmpeqb %xmm7, %xmm1 pmovmskb %xmm0, %edx pmovmskb %xmm1, %ecx test %edx, %edx jnz L(CopyFrom1To16BytesUnaligned_32) bsf %ecx, %edx movdqu %xmm4, (%edi) movdqu %xmm5, 16(%edi) movdqu %xmm6, 32(%edi) #ifdef USE_AS_STRNCPY #ifdef USE_AS_STPCPY lea 48(%edi, %edx), %eax #endif movdqu %xmm7, 48(%edi) add $15, %ebx sub %edx, %ebx lea 49(%edi, %edx), %edi jmp L(StrncpyFillTailWithZero) #else add $48, %esi add $48, %edi BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) #endif /* If source adress alignment == destination adress alignment */ L(SourceStringAlignmentZero): pxor %xmm0, %xmm0 movdqa (%esi), %xmm1 pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %edx #ifdef USE_AS_STRNCPY #ifdef USE_AS_STPCPY cmp $16, %ebx jbe L(CopyFrom1To16BytesTail1Case2OrCase3) #else cmp $17, %ebx jbe L(CopyFrom1To16BytesTail1Case2OrCase3) #endif #endif test %edx, %edx jnz L(CopyFrom1To16BytesTail1) pcmpeqb 16(%esi), %xmm0 movdqu %xmm1, (%edi) pmovmskb %xmm0, %edx #ifdef USE_AS_STRNCPY #ifdef USE_AS_STPCPY cmp $32, %ebx jbe L(CopyFrom1To32Bytes1Case2OrCase3) #else cmp $33, %ebx jbe L(CopyFrom1To32Bytes1Case2OrCase3) #endif #endif test %edx, %edx jnz L(CopyFrom1To32Bytes1) mov %edi, %edx mov $16, %ecx and $15, %edx jnz L(Unalign16Both) L(Align16Both): movdqa (%esi, %ecx), %xmm1 movdqa 16(%esi, %ecx), %xmm2 movdqa %xmm1, (%edi, %ecx) pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %edx add $16, %ecx #ifdef USE_AS_STRNCPY sub $48, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesXmm2) #else test %edx, %edx jnz L(CopyFrom1To16Bytes) #endif movdqa 16(%esi, %ecx), %xmm3 movdqa %xmm2, (%edi, %ecx) pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %edx lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesXmm3) #else test %edx, %edx jnz L(CopyFrom1To16Bytes) #endif movdqa 16(%esi, %ecx), %xmm4 movdqa %xmm3, (%edi, %ecx) pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %edx lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesXmm4) #else test %edx, %edx jnz L(CopyFrom1To16Bytes) #endif movdqa 16(%esi, %ecx), %xmm1 movdqa %xmm4, (%edi, %ecx) pcmpeqb %xmm1, %xmm0 pmovmskb %xmm0, %edx lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesXmm1) #else test %edx, %edx jnz L(CopyFrom1To16Bytes) #endif movdqa 16(%esi, %ecx), %xmm2 movdqa %xmm1, (%edi, %ecx) pcmpeqb %xmm2, %xmm0 pmovmskb %xmm0, %edx lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesXmm2) #else test %edx, %edx jnz L(CopyFrom1To16Bytes) #endif movdqa 16(%esi, %ecx), %xmm3 movdqa %xmm2, (%edi, %ecx) pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %edx lea 16(%ecx), %ecx #ifdef USE_AS_STRNCPY sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesXmm3) #else test %edx, %edx jnz L(CopyFrom1To16Bytes) #endif movdqa %xmm3, (%edi, %ecx) mov %esi, %edx lea 16(%esi, %ecx), %esi and $-0x40, %esi sub %esi, %edx sub %edx, %edi #ifdef USE_AS_STRNCPY lea 64+64(%ebx, %edx), %ebx #endif L(Aligned64Loop): movdqa (%esi), %xmm2 movdqa %xmm2, %xmm4 movaps 16(%esi), %xmm5 movdqa 32(%esi), %xmm3 movdqa %xmm3, %xmm6 movaps 48(%esi), %xmm7 pminub %xmm5, %xmm2 pminub %xmm7, %xmm3 pminub %xmm2, %xmm3 pcmpeqb %xmm0, %xmm3 pmovmskb %xmm3, %edx #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(AlignedLeaveCase2OrCase3) #endif test %edx, %edx jnz L(Aligned64Leave) L(Aligned64Loop_start): add $64, %esi add $64, %edi movaps %xmm4, -64(%edi) movdqa (%esi), %xmm2 movdqa %xmm2, %xmm4 movaps %xmm5, -48(%edi) movaps 16(%esi), %xmm5 pminub %xmm5, %xmm2 movaps 32(%esi), %xmm3 movaps %xmm6, -32(%edi) movdqa %xmm3, %xmm6 movaps %xmm7, -16(%edi) movaps 48(%esi), %xmm7 pminub %xmm7, %xmm3 pminub %xmm2, %xmm3 pcmpeqb %xmm3, %xmm0 pmovmskb %xmm0, %edx #ifdef USE_AS_STRNCPY sub $64, %ebx jbe L(AlignedLeaveCase2OrCase3) #endif test %edx, %edx jz L(Aligned64Loop_start) L(Aligned64Leave): pxor %xmm0, %xmm0 pxor %xmm1, %xmm1 pcmpeqb %xmm4, %xmm0 pcmpeqb %xmm5, %xmm1 pmovmskb %xmm0, %edx pmovmskb %xmm1, %ecx test %edx, %edx jnz L(CopyFrom1To16Bytes_0) test %ecx, %ecx jnz L(CopyFrom1To16Bytes_16) pcmpeqb %xmm6, %xmm0 pcmpeqb %xmm7, %xmm1 pmovmskb %xmm0, %edx pmovmskb %xmm1, %ecx test %edx, %edx jnz L(CopyFrom1To16Bytes_32) bsf %ecx, %edx movdqa %xmm4, (%edi) movdqa %xmm5, 16(%edi) movdqa %xmm6, 32(%edi) #ifdef USE_AS_STRNCPY #ifdef USE_AS_STPCPY lea 48(%edi, %edx), %eax #endif movdqa %xmm7, 48(%edi) add $15, %ebx sub %edx, %ebx lea 49(%edi, %edx), %edi jmp L(StrncpyFillTailWithZero) #else add $48, %esi add $48, %edi BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) #endif /*----------------------------------------------------*/ /* Case1 */ #ifndef USE_AS_STRNCPY .p2align 4 L(CopyFrom1To16Bytes): add %ecx, %edi add %ecx, %esi bsf %edx, %edx BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) #endif .p2align 4 L(CopyFrom1To16BytesTail): #ifdef USE_AS_STRNCPY sub %ecx, %ebx #endif add %ecx, %esi bsf %edx, %edx BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) .p2align 4 L(CopyFrom1To32Bytes1): add $16, %esi add $16, %edi #ifdef USE_AS_STRNCPY sub $16, %ebx #endif L(CopyFrom1To16BytesTail1): bsf %edx, %edx BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) .p2align 4 L(CopyFrom1To32Bytes): #ifdef USE_AS_STRNCPY sub %ecx, %ebx #endif bsf %edx, %edx add %ecx, %esi add $16, %edx sub %ecx, %edx BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) .p2align 4 L(CopyFrom1To16Bytes_0): bsf %edx, %edx #ifdef USE_AS_STRNCPY #ifdef USE_AS_STPCPY lea (%edi, %edx), %eax #endif movdqa %xmm4, (%edi) add $63, %ebx sub %edx, %ebx lea 1(%edi, %edx), %edi jmp L(StrncpyFillTailWithZero) #else BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) #endif .p2align 4 L(CopyFrom1To16Bytes_16): bsf %ecx, %edx movdqa %xmm4, (%edi) #ifdef USE_AS_STRNCPY #ifdef USE_AS_STPCPY lea 16(%edi, %edx), %eax #endif movdqa %xmm5, 16(%edi) add $47, %ebx sub %edx, %ebx lea 17(%edi, %edx), %edi jmp L(StrncpyFillTailWithZero) #else add $16, %esi add $16, %edi BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) #endif .p2align 4 L(CopyFrom1To16Bytes_32): bsf %edx, %edx movdqa %xmm4, (%edi) movdqa %xmm5, 16(%edi) #ifdef USE_AS_STRNCPY #ifdef USE_AS_STPCPY lea 32(%edi, %edx), %eax #endif movdqa %xmm6, 32(%edi) add $31, %ebx sub %edx, %ebx lea 33(%edi, %edx), %edi jmp L(StrncpyFillTailWithZero) #else add $32, %esi add $32, %edi BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) #endif .p2align 4 L(CopyFrom1To16BytesUnaligned_0): bsf %edx, %edx #ifdef USE_AS_STRNCPY #ifdef USE_AS_STPCPY lea (%edi, %edx), %eax #endif movdqu %xmm4, (%edi) add $63, %ebx sub %edx, %ebx lea 1(%edi, %edx), %edi jmp L(StrncpyFillTailWithZero) #else BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) #endif .p2align 4 L(CopyFrom1To16BytesUnaligned_16): bsf %ecx, %edx movdqu %xmm4, (%edi) #ifdef USE_AS_STRNCPY #ifdef USE_AS_STPCPY lea 16(%edi, %edx), %eax #endif movdqu %xmm5, 16(%edi) add $47, %ebx sub %edx, %ebx lea 17(%edi, %edx), %edi jmp L(StrncpyFillTailWithZero) #else add $16, %esi add $16, %edi BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) #endif .p2align 4 L(CopyFrom1To16BytesUnaligned_32): bsf %edx, %edx movdqu %xmm4, (%edi) movdqu %xmm5, 16(%edi) #ifdef USE_AS_STRNCPY #ifdef USE_AS_STPCPY lea 32(%edi, %edx), %eax #endif movdqu %xmm6, 32(%edi) add $31, %ebx sub %edx, %ebx lea 33(%edi, %edx), %edi jmp L(StrncpyFillTailWithZero) #else add $32, %esi add $32, %edi BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) #endif #ifdef USE_AS_STRNCPY .p2align 4 L(CopyFrom1To16BytesXmm6): movdqa %xmm6, (%edi, %ecx) jmp L(CopyFrom1To16BytesXmmExit) .p2align 4 L(CopyFrom1To16BytesXmm5): movdqa %xmm5, (%edi, %ecx) jmp L(CopyFrom1To16BytesXmmExit) .p2align 4 L(CopyFrom1To16BytesXmm4): movdqa %xmm4, (%edi, %ecx) jmp L(CopyFrom1To16BytesXmmExit) .p2align 4 L(CopyFrom1To16BytesXmm3): movdqa %xmm3, (%edi, %ecx) jmp L(CopyFrom1To16BytesXmmExit) .p2align 4 L(CopyFrom1To16BytesXmm2): movdqa %xmm2, (%edi, %ecx) jmp L(CopyFrom1To16BytesXmmExit) .p2align 4 L(CopyFrom1To16BytesXmm1): movdqa %xmm1, (%edi, %ecx) jmp L(CopyFrom1To16BytesXmmExit) .p2align 4 L(CopyFrom1To16BytesUnalignedXmm6): movdqu %xmm6, (%edi, %ecx) jmp L(CopyFrom1To16BytesXmmExit) .p2align 4 L(CopyFrom1To16BytesUnalignedXmm5): movdqu %xmm5, (%edi, %ecx) jmp L(CopyFrom1To16BytesXmmExit) .p2align 4 L(CopyFrom1To16BytesUnalignedXmm4): movdqu %xmm4, (%edi, %ecx) jmp L(CopyFrom1To16BytesXmmExit) .p2align 4 L(CopyFrom1To16BytesUnalignedXmm3): movdqu %xmm3, (%edi, %ecx) jmp L(CopyFrom1To16BytesXmmExit) .p2align 4 L(CopyFrom1To16BytesUnalignedXmm1): movdqu %xmm1, (%edi, %ecx) jmp L(CopyFrom1To16BytesXmmExit) .p2align 4 L(CopyFrom1To16BytesExit): BRANCH_TO_JMPTBL_ENTRY (L(ExitTable), %edx, 4) /* Case2 */ .p2align 4 L(CopyFrom1To16BytesCase2): add $16, %ebx add %ecx, %edi add %ecx, %esi bsf %edx, %edx cmp %ebx, %edx jb L(CopyFrom1To16BytesExit) BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) .p2align 4 L(CopyFrom1To32BytesCase2): sub %ecx, %ebx add %ecx, %esi bsf %edx, %edx add $16, %edx sub %ecx, %edx cmp %ebx, %edx jb L(CopyFrom1To16BytesExit) BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) L(CopyFrom1To16BytesTailCase2): sub %ecx, %ebx add %ecx, %esi bsf %edx, %edx cmp %ebx, %edx jb L(CopyFrom1To16BytesExit) BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) L(CopyFrom1To16BytesTail1Case2): bsf %edx, %edx cmp %ebx, %edx jb L(CopyFrom1To16BytesExit) BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) /* Case2 or Case3, Case3 */ .p2align 4 L(CopyFrom1To16BytesCase2OrCase3): test %edx, %edx jnz L(CopyFrom1To16BytesCase2) L(CopyFrom1To16BytesCase3): add $16, %ebx add %ecx, %edi add %ecx, %esi BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) .p2align 4 L(CopyFrom1To32BytesCase2OrCase3): test %edx, %edx jnz L(CopyFrom1To32BytesCase2) sub %ecx, %ebx add %ecx, %esi BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) .p2align 4 L(CopyFrom1To16BytesTailCase2OrCase3): test %edx, %edx jnz L(CopyFrom1To16BytesTailCase2) sub %ecx, %ebx add %ecx, %esi BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) .p2align 4 L(CopyFrom1To32Bytes1Case2OrCase3): add $16, %edi add $16, %esi sub $16, %ebx L(CopyFrom1To16BytesTail1Case2OrCase3): test %edx, %edx jnz L(CopyFrom1To16BytesTail1Case2) BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) #endif /*-----------------------------------------------------------------*/ .p2align 4 L(Exit0): #ifdef USE_AS_STPCPY mov %edi, %eax #endif RETURN .p2align 4 L(Exit1): movb %dh, (%edi) #ifdef USE_AS_STPCPY lea (%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $1, %ebx lea 1(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit2): movw (%esi), %dx movw %dx, (%edi) #ifdef USE_AS_STPCPY lea 1(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $2, %ebx lea 2(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit3): movw (%esi), %cx movw %cx, (%edi) movb %dh, 2(%edi) #ifdef USE_AS_STPCPY lea 2(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $3, %ebx lea 3(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit4): movl (%esi), %edx movl %edx, (%edi) #ifdef USE_AS_STPCPY lea 3(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $4, %ebx lea 4(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit5): movl (%esi), %ecx movb %dh, 4(%edi) movl %ecx, (%edi) #ifdef USE_AS_STPCPY lea 4(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $5, %ebx lea 5(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit6): movl (%esi), %ecx movw 4(%esi), %dx movl %ecx, (%edi) movw %dx, 4(%edi) #ifdef USE_AS_STPCPY lea 5(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $6, %ebx lea 6(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit7): movl (%esi), %ecx movl 3(%esi), %edx movl %ecx, (%edi) movl %edx, 3(%edi) #ifdef USE_AS_STPCPY lea 6(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $7, %ebx lea 7(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit8): movlpd (%esi), %xmm0 movlpd %xmm0, (%edi) #ifdef USE_AS_STPCPY lea 7(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $8, %ebx lea 8(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit9): movlpd (%esi), %xmm0 movb %dh, 8(%edi) movlpd %xmm0, (%edi) #ifdef USE_AS_STPCPY lea 8(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $9, %ebx lea 9(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit10): movlpd (%esi), %xmm0 movw 8(%esi), %dx movlpd %xmm0, (%edi) movw %dx, 8(%edi) #ifdef USE_AS_STPCPY lea 9(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $10, %ebx lea 10(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit11): movlpd (%esi), %xmm0 movl 7(%esi), %edx movlpd %xmm0, (%edi) movl %edx, 7(%edi) #ifdef USE_AS_STPCPY lea 10(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $11, %ebx lea 11(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit12): movlpd (%esi), %xmm0 movl 8(%esi), %edx movlpd %xmm0, (%edi) movl %edx, 8(%edi) #ifdef USE_AS_STPCPY lea 11(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $12, %ebx lea 12(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit13): movlpd (%esi), %xmm0 movlpd 5(%esi), %xmm1 movlpd %xmm0, (%edi) movlpd %xmm1, 5(%edi) #ifdef USE_AS_STPCPY lea 12(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $13, %ebx lea 13(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit14): movlpd (%esi), %xmm0 movlpd 6(%esi), %xmm1 movlpd %xmm0, (%edi) movlpd %xmm1, 6(%edi) #ifdef USE_AS_STPCPY lea 13(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $14, %ebx lea 14(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit15): movlpd (%esi), %xmm0 movlpd 7(%esi), %xmm1 movlpd %xmm0, (%edi) movlpd %xmm1, 7(%edi) #ifdef USE_AS_STPCPY lea 14(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $15, %ebx lea 15(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit16): movdqu (%esi), %xmm0 movdqu %xmm0, (%edi) #ifdef USE_AS_STPCPY lea 15(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $16, %ebx lea 16(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit17): movdqu (%esi), %xmm0 xor %cl, %cl movdqu %xmm0, (%edi) movb %cl, 16(%edi) #ifdef USE_AS_STPCPY lea 16(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $17, %ebx lea 17(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit18): movdqu (%esi), %xmm0 movw 16(%esi), %cx movdqu %xmm0, (%edi) movw %cx, 16(%edi) #ifdef USE_AS_STPCPY lea 17(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $18, %ebx lea 18(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit19): movdqu (%esi), %xmm0 movl 15(%esi), %ecx movdqu %xmm0, (%edi) movl %ecx, 15(%edi) #ifdef USE_AS_STPCPY lea 18(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $19, %ebx lea 19(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit20): movdqu (%esi), %xmm0 movl 16(%esi), %ecx movdqu %xmm0, (%edi) movl %ecx, 16(%edi) #ifdef USE_AS_STPCPY lea 19(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $20, %ebx lea 20(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit21): movdqu (%esi), %xmm0 movl 16(%esi), %ecx xor %dl, %dl movdqu %xmm0, (%edi) movl %ecx, 16(%edi) movb %dl, 20(%edi) #ifdef USE_AS_STPCPY lea 20(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $21, %ebx lea 21(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit22): movdqu (%esi), %xmm0 movlpd 14(%esi), %xmm3 movdqu %xmm0, (%edi) movlpd %xmm3, 14(%edi) #ifdef USE_AS_STPCPY lea 21(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $22, %ebx lea 22(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit23): movdqu (%esi), %xmm0 movlpd 15(%esi), %xmm3 movdqu %xmm0, (%edi) movlpd %xmm3, 15(%edi) #ifdef USE_AS_STPCPY lea 22(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $23, %ebx lea 23(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit24): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) #ifdef USE_AS_STPCPY lea 23(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $24, %ebx lea 24(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit25): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 xor %cl, %cl movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) movb %cl, 24(%edi) #ifdef USE_AS_STPCPY lea 24(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $25, %ebx lea 25(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit26): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 movw 24(%esi), %cx movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) movw %cx, 24(%edi) #ifdef USE_AS_STPCPY lea 25(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $26, %ebx lea 26(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit27): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 movl 23(%esi), %ecx movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) movl %ecx, 23(%edi) #ifdef USE_AS_STPCPY lea 26(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $27, %ebx lea 27(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit28): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 movl 24(%esi), %ecx movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) movl %ecx, 24(%edi) #ifdef USE_AS_STPCPY lea 27(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $28, %ebx lea 28(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit29): movdqu (%esi), %xmm0 movdqu 13(%esi), %xmm2 movdqu %xmm0, (%edi) movdqu %xmm2, 13(%edi) #ifdef USE_AS_STPCPY lea 28(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $29, %ebx lea 29(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit30): movdqu (%esi), %xmm0 movdqu 14(%esi), %xmm2 movdqu %xmm0, (%edi) movdqu %xmm2, 14(%edi) #ifdef USE_AS_STPCPY lea 29(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $30, %ebx lea 30(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit31): movdqu (%esi), %xmm0 movdqu 15(%esi), %xmm2 movdqu %xmm0, (%edi) movdqu %xmm2, 15(%edi) #ifdef USE_AS_STPCPY lea 30(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $31, %ebx lea 31(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN .p2align 4 L(Exit32): movdqu (%esi), %xmm0 movdqu 16(%esi), %xmm2 movdqu %xmm0, (%edi) movdqu %xmm2, 16(%edi) #ifdef USE_AS_STPCPY lea 31(%edi), %eax #endif #ifdef USE_AS_STRNCPY sub $32, %ebx lea 32(%edi), %edi jnz L(StrncpyFillTailWithZero) #endif RETURN #ifdef USE_AS_STRNCPY .p2align 4 L(StrncpyExit1): movb (%esi), %dl movb %dl, (%edi) #ifdef USE_AS_STPCPY lea 1(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit2): movw (%esi), %dx movw %dx, (%edi) #ifdef USE_AS_STPCPY lea 2(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit3): movw (%esi), %cx movb 2(%esi), %dl movw %cx, (%edi) movb %dl, 2(%edi) #ifdef USE_AS_STPCPY lea 3(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit4): movl (%esi), %edx movl %edx, (%edi) #ifdef USE_AS_STPCPY lea 4(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit5): movl (%esi), %ecx movb 4(%esi), %dl movl %ecx, (%edi) movb %dl, 4(%edi) #ifdef USE_AS_STPCPY lea 5(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit6): movl (%esi), %ecx movw 4(%esi), %dx movl %ecx, (%edi) movw %dx, 4(%edi) #ifdef USE_AS_STPCPY lea 6(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit7): movl (%esi), %ecx movl 3(%esi), %edx movl %ecx, (%edi) movl %edx, 3(%edi) #ifdef USE_AS_STPCPY lea 7(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit8): movlpd (%esi), %xmm0 movlpd %xmm0, (%edi) #ifdef USE_AS_STPCPY lea 8(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit9): movlpd (%esi), %xmm0 movb 8(%esi), %dl movlpd %xmm0, (%edi) movb %dl, 8(%edi) #ifdef USE_AS_STPCPY lea 9(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit10): movlpd (%esi), %xmm0 movw 8(%esi), %dx movlpd %xmm0, (%edi) movw %dx, 8(%edi) #ifdef USE_AS_STPCPY lea 10(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit11): movlpd (%esi), %xmm0 movl 7(%esi), %edx movlpd %xmm0, (%edi) movl %edx, 7(%edi) #ifdef USE_AS_STPCPY lea 11(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit12): movlpd (%esi), %xmm0 movl 8(%esi), %edx movlpd %xmm0, (%edi) movl %edx, 8(%edi) #ifdef USE_AS_STPCPY lea 12(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit13): movlpd (%esi), %xmm0 movlpd 5(%esi), %xmm1 movlpd %xmm0, (%edi) movlpd %xmm1, 5(%edi) #ifdef USE_AS_STPCPY lea 13(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit14): movlpd (%esi), %xmm0 movlpd 6(%esi), %xmm1 movlpd %xmm0, (%edi) movlpd %xmm1, 6(%edi) #ifdef USE_AS_STPCPY lea 14(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit15): movlpd (%esi), %xmm0 movlpd 7(%esi), %xmm1 movlpd %xmm0, (%edi) movlpd %xmm1, 7(%edi) #ifdef USE_AS_STPCPY lea 15(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit16): movdqu (%esi), %xmm0 movdqu %xmm0, (%edi) #ifdef USE_AS_STPCPY lea 16(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit17): movdqu (%esi), %xmm0 movb 16(%esi), %cl movdqu %xmm0, (%edi) movb %cl, 16(%edi) #ifdef USE_AS_STPCPY lea 17(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit18): movdqu (%esi), %xmm0 movw 16(%esi), %cx movdqu %xmm0, (%edi) movw %cx, 16(%edi) #ifdef USE_AS_STPCPY lea 18(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit19): movdqu (%esi), %xmm0 movl 15(%esi), %ecx movdqu %xmm0, (%edi) movl %ecx, 15(%edi) #ifdef USE_AS_STPCPY lea 19(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit20): movdqu (%esi), %xmm0 movl 16(%esi), %ecx movdqu %xmm0, (%edi) movl %ecx, 16(%edi) #ifdef USE_AS_STPCPY lea 20(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit21): movdqu (%esi), %xmm0 movl 16(%esi), %ecx movb 20(%esi), %dl movdqu %xmm0, (%edi) movl %ecx, 16(%edi) movb %dl, 20(%edi) #ifdef USE_AS_STPCPY lea 21(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit22): movdqu (%esi), %xmm0 movlpd 14(%esi), %xmm3 movdqu %xmm0, (%edi) movlpd %xmm3, 14(%edi) #ifdef USE_AS_STPCPY lea 22(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit23): movdqu (%esi), %xmm0 movlpd 15(%esi), %xmm3 movdqu %xmm0, (%edi) movlpd %xmm3, 15(%edi) #ifdef USE_AS_STPCPY lea 23(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit24): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) #ifdef USE_AS_STPCPY lea 24(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit25): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 movb 24(%esi), %cl movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) movb %cl, 24(%edi) #ifdef USE_AS_STPCPY lea 25(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit26): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 movw 24(%esi), %cx movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) movw %cx, 24(%edi) #ifdef USE_AS_STPCPY lea 26(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit27): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 movl 23(%esi), %ecx movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) movl %ecx, 23(%edi) #ifdef USE_AS_STPCPY lea 27(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit28): movdqu (%esi), %xmm0 movlpd 16(%esi), %xmm2 movl 24(%esi), %ecx movdqu %xmm0, (%edi) movlpd %xmm2, 16(%edi) movl %ecx, 24(%edi) #ifdef USE_AS_STPCPY lea 28(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit29): movdqu (%esi), %xmm0 movdqu 13(%esi), %xmm2 movdqu %xmm0, (%edi) movdqu %xmm2, 13(%edi) #ifdef USE_AS_STPCPY lea 29(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit30): movdqu (%esi), %xmm0 movdqu 14(%esi), %xmm2 movdqu %xmm0, (%edi) movdqu %xmm2, 14(%edi) #ifdef USE_AS_STPCPY lea 30(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit31): movdqu (%esi), %xmm0 movdqu 15(%esi), %xmm2 movdqu %xmm0, (%edi) movdqu %xmm2, 15(%edi) #ifdef USE_AS_STPCPY lea 31(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit32): movdqu (%esi), %xmm0 movdqu 16(%esi), %xmm2 movdqu %xmm0, (%edi) movdqu %xmm2, 16(%edi) #ifdef USE_AS_STPCPY lea 32(%edi), %eax #endif RETURN .p2align 4 L(StrncpyExit33): movdqu (%esi), %xmm0 movdqu 16(%esi), %xmm2 movb 32(%esi), %cl movdqu %xmm0, (%edi) movdqu %xmm2, 16(%edi) movb %cl, 32(%edi) RETURN .p2align 4 L(Fill0): RETURN .p2align 4 L(Fill1): movb %dl, (%edi) RETURN .p2align 4 L(Fill2): movw %dx, (%edi) RETURN .p2align 4 L(Fill3): movl %edx, -1(%edi) RETURN .p2align 4 L(Fill4): movl %edx, (%edi) RETURN .p2align 4 L(Fill5): movl %edx, (%edi) movb %dl, 4(%edi) RETURN .p2align 4 L(Fill6): movl %edx, (%edi) movw %dx, 4(%edi) RETURN .p2align 4 L(Fill7): movlpd %xmm0, -1(%edi) RETURN .p2align 4 L(Fill8): movlpd %xmm0, (%edi) RETURN .p2align 4 L(Fill9): movlpd %xmm0, (%edi) movb %dl, 8(%edi) RETURN .p2align 4 L(Fill10): movlpd %xmm0, (%edi) movw %dx, 8(%edi) RETURN .p2align 4 L(Fill11): movlpd %xmm0, (%edi) movl %edx, 7(%edi) RETURN .p2align 4 L(Fill12): movlpd %xmm0, (%edi) movl %edx, 8(%edi) RETURN .p2align 4 L(Fill13): movlpd %xmm0, (%edi) movlpd %xmm0, 5(%edi) RETURN .p2align 4 L(Fill14): movlpd %xmm0, (%edi) movlpd %xmm0, 6(%edi) RETURN .p2align 4 L(Fill15): movdqu %xmm0, -1(%edi) RETURN .p2align 4 L(Fill16): movdqu %xmm0, (%edi) RETURN .p2align 4 L(CopyFrom1To16BytesUnalignedXmm2): movdqu %xmm2, (%edi, %ecx) .p2align 4 L(CopyFrom1To16BytesXmmExit): bsf %edx, %edx add $15, %ebx add %ecx, %edi #ifdef USE_AS_STPCPY lea (%edi, %edx), %eax #endif sub %edx, %ebx lea 1(%edi, %edx), %edi .p2align 4 L(StrncpyFillTailWithZero): pxor %xmm0, %xmm0 xor %edx, %edx sub $16, %ebx jbe L(StrncpyFillExit) movdqu %xmm0, (%edi) add $16, %edi mov %edi, %esi and $0xf, %esi sub %esi, %edi add %esi, %ebx sub $64, %ebx jb L(StrncpyFillLess64) L(StrncpyFillLoopMovdqa): movdqa %xmm0, (%edi) movdqa %xmm0, 16(%edi) movdqa %xmm0, 32(%edi) movdqa %xmm0, 48(%edi) add $64, %edi sub $64, %ebx jae L(StrncpyFillLoopMovdqa) L(StrncpyFillLess64): add $32, %ebx jl L(StrncpyFillLess32) movdqa %xmm0, (%edi) movdqa %xmm0, 16(%edi) add $32, %edi sub $16, %ebx jl L(StrncpyFillExit) movdqa %xmm0, (%edi) add $16, %edi BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) L(StrncpyFillLess32): add $16, %ebx jl L(StrncpyFillExit) movdqa %xmm0, (%edi) add $16, %edi BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) L(StrncpyFillExit): add $16, %ebx BRANCH_TO_JMPTBL_ENTRY (L(FillTable), %ebx, 4) .p2align 4 L(AlignedLeaveCase2OrCase3): test %edx, %edx jnz L(Aligned64LeaveCase2) L(Aligned64LeaveCase3): lea 64(%ebx), %ecx and $-16, %ecx add $48, %ebx jl L(CopyFrom1To16BytesCase3) movdqa %xmm4, (%edi) sub $16, %ebx jb L(CopyFrom1To16BytesCase3) movdqa %xmm5, 16(%edi) sub $16, %ebx jb L(CopyFrom1To16BytesCase3) movdqa %xmm6, 32(%edi) sub $16, %ebx jb L(CopyFrom1To16BytesCase3) movdqa %xmm7, 48(%edi) #ifdef USE_AS_STPCPY lea 64(%edi), %eax #endif RETURN .p2align 4 L(Aligned64LeaveCase2): pxor %xmm0, %xmm0 xor %ecx, %ecx pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %edx add $48, %ebx jle L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesXmm4) pcmpeqb %xmm5, %xmm0 pmovmskb %xmm0, %edx movdqa %xmm4, (%edi) add $16, %ecx sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesXmm5) pcmpeqb %xmm6, %xmm0 pmovmskb %xmm0, %edx movdqa %xmm5, 16(%edi) add $16, %ecx sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesXmm6) pcmpeqb %xmm7, %xmm0 pmovmskb %xmm0, %edx movdqa %xmm6, 32(%edi) lea 16(%edi, %ecx), %edi lea 16(%esi, %ecx), %esi bsf %edx, %edx cmp %ebx, %edx jb L(CopyFrom1To16BytesExit) BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) .p2align 4 L(UnalignedLeaveCase2OrCase3): test %edx, %edx jnz L(Unaligned64LeaveCase2) L(Unaligned64LeaveCase3): lea 64(%ebx), %ecx and $-16, %ecx add $48, %ebx jl L(CopyFrom1To16BytesCase3) movdqu %xmm4, (%edi) sub $16, %ebx jb L(CopyFrom1To16BytesCase3) movdqu %xmm5, 16(%edi) sub $16, %ebx jb L(CopyFrom1To16BytesCase3) movdqu %xmm6, 32(%edi) sub $16, %ebx jb L(CopyFrom1To16BytesCase3) movdqu %xmm7, 48(%edi) #ifdef USE_AS_STPCPY lea 64(%edi), %eax #endif RETURN .p2align 4 L(Unaligned64LeaveCase2): pxor %xmm0, %xmm0 xor %ecx, %ecx pcmpeqb %xmm4, %xmm0 pmovmskb %xmm0, %edx add $48, %ebx jle L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm4) pcmpeqb %xmm5, %xmm0 pmovmskb %xmm0, %edx movdqu %xmm4, (%edi) add $16, %ecx sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm5) pcmpeqb %xmm6, %xmm0 pmovmskb %xmm0, %edx movdqu %xmm5, 16(%edi) add $16, %ecx sub $16, %ebx jbe L(CopyFrom1To16BytesCase2OrCase3) test %edx, %edx jnz L(CopyFrom1To16BytesUnalignedXmm6) pcmpeqb %xmm7, %xmm0 pmovmskb %xmm0, %edx movdqu %xmm6, 32(%edi) lea 16(%edi, %ecx), %edi lea 16(%esi, %ecx), %esi bsf %edx, %edx cmp %ebx, %edx jb L(CopyFrom1To16BytesExit) BRANCH_TO_JMPTBL_ENTRY (L(ExitStrncpyTable), %ebx, 4) .p2align 4 L(ExitZero): movl %edi, %eax RETURN #endif END (STRCPY) .p2align 4 .section .rodata L(ExitTable): .int JMPTBL(L(Exit1), L(ExitTable)) .int JMPTBL(L(Exit2), L(ExitTable)) .int JMPTBL(L(Exit3), L(ExitTable)) .int JMPTBL(L(Exit4), L(ExitTable)) .int JMPTBL(L(Exit5), L(ExitTable)) .int JMPTBL(L(Exit6), L(ExitTable)) .int JMPTBL(L(Exit7), L(ExitTable)) .int JMPTBL(L(Exit8), L(ExitTable)) .int JMPTBL(L(Exit9), L(ExitTable)) .int JMPTBL(L(Exit10), L(ExitTable)) .int JMPTBL(L(Exit11), L(ExitTable)) .int JMPTBL(L(Exit12), L(ExitTable)) .int JMPTBL(L(Exit13), L(ExitTable)) .int JMPTBL(L(Exit14), L(ExitTable)) .int JMPTBL(L(Exit15), L(ExitTable)) .int JMPTBL(L(Exit16), L(ExitTable)) .int JMPTBL(L(Exit17), L(ExitTable)) .int JMPTBL(L(Exit18), L(ExitTable)) .int JMPTBL(L(Exit19), L(ExitTable)) .int JMPTBL(L(Exit20), L(ExitTable)) .int JMPTBL(L(Exit21), L(ExitTable)) .int JMPTBL(L(Exit22), L(ExitTable)) .int JMPTBL(L(Exit23), L(ExitTable)) .int JMPTBL(L(Exit24), L(ExitTable)) .int JMPTBL(L(Exit25), L(ExitTable)) .int JMPTBL(L(Exit26), L(ExitTable)) .int JMPTBL(L(Exit27), L(ExitTable)) .int JMPTBL(L(Exit28), L(ExitTable)) .int JMPTBL(L(Exit29), L(ExitTable)) .int JMPTBL(L(Exit30), L(ExitTable)) .int JMPTBL(L(Exit31), L(ExitTable)) .int JMPTBL(L(Exit32), L(ExitTable)) #ifdef USE_AS_STRNCPY L(ExitStrncpyTable): .int JMPTBL(L(Exit0), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit1), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit2), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit3), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit4), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit5), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit6), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit7), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit8), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit9), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit10), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit11), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit12), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit13), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit14), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit15), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit16), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit17), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit18), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit19), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit20), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit21), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit22), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit23), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit24), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit25), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit26), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit27), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit28), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit29), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit30), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit31), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit32), L(ExitStrncpyTable)) .int JMPTBL(L(StrncpyExit33), L(ExitStrncpyTable)) .p2align 4 L(FillTable): .int JMPTBL(L(Fill0), L(FillTable)) .int JMPTBL(L(Fill1), L(FillTable)) .int JMPTBL(L(Fill2), L(FillTable)) .int JMPTBL(L(Fill3), L(FillTable)) .int JMPTBL(L(Fill4), L(FillTable)) .int JMPTBL(L(Fill5), L(FillTable)) .int JMPTBL(L(Fill6), L(FillTable)) .int JMPTBL(L(Fill7), L(FillTable)) .int JMPTBL(L(Fill8), L(FillTable)) .int JMPTBL(L(Fill9), L(FillTable)) .int JMPTBL(L(Fill10), L(FillTable)) .int JMPTBL(L(Fill11), L(FillTable)) .int JMPTBL(L(Fill12), L(FillTable)) .int JMPTBL(L(Fill13), L(FillTable)) .int JMPTBL(L(Fill14), L(FillTable)) .int JMPTBL(L(Fill15), L(FillTable)) .int JMPTBL(L(Fill16), L(FillTable)) #endif