You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
238 lines
3.9 KiB
238 lines
3.9 KiB
; AesOpt.asm -- Intel's AES.
|
|
; 2009-12-12 : Igor Pavlov : Public domain
|
|
|
|
include 7zAsm.asm
|
|
|
|
MY_ASM_START
|
|
|
|
ifndef x64
|
|
.xmm
|
|
endif
|
|
|
|
ifdef x64
|
|
num equ r8
|
|
else
|
|
num equ [r4 + REG_SIZE * 4]
|
|
endif
|
|
|
|
rD equ r2
|
|
rN equ r0
|
|
|
|
MY_PROLOG macro reg:req
|
|
ifdef x64
|
|
movdqa [r4 + 8], xmm6
|
|
movdqa [r4 + 8 + 16], xmm7
|
|
endif
|
|
|
|
push r3
|
|
push r5
|
|
push r6
|
|
|
|
mov rN, num
|
|
mov x6, [r1 + 16]
|
|
shl x6, 5
|
|
|
|
movdqa reg, [r1]
|
|
add r1, 32
|
|
endm
|
|
|
|
MY_EPILOG macro
|
|
pop r6
|
|
pop r5
|
|
pop r3
|
|
|
|
ifdef x64
|
|
movdqa xmm6, [r4 + 8]
|
|
movdqa xmm7, [r4 + 8 + 16]
|
|
endif
|
|
|
|
MY_ENDP
|
|
endm
|
|
|
|
ways equ 4
|
|
ways16 equ (ways * 16)
|
|
|
|
OP_W macro op, op2
|
|
i = 0
|
|
rept ways
|
|
op @CatStr(xmm,%i), op2
|
|
i = i + 1
|
|
endm
|
|
endm
|
|
|
|
LOAD_OP macro op:req, offs:req
|
|
op xmm0, [r1 + r3 offs]
|
|
endm
|
|
|
|
LOAD_OP_W macro op:req, offs:req
|
|
movdqa xmm7, [r1 + r3 offs]
|
|
OP_W op, xmm7
|
|
endm
|
|
|
|
|
|
; ---------- AES-CBC Decode ----------
|
|
|
|
CBC_DEC_UPDATE macro reg, offs
|
|
pxor reg, xmm6
|
|
movdqa xmm6, [rD + offs]
|
|
movdqa [rD + offs], reg
|
|
endm
|
|
|
|
DECODE macro op:req
|
|
op aesdec, +16
|
|
@@:
|
|
op aesdec, +0
|
|
op aesdec, -16
|
|
sub x3, 32
|
|
jnz @B
|
|
op aesdeclast, +0
|
|
endm
|
|
|
|
MY_PROC AesCbc_Decode_Intel, 3
|
|
MY_PROLOG xmm6
|
|
|
|
sub x6, 32
|
|
|
|
jmp check2
|
|
|
|
align 16
|
|
nextBlocks2:
|
|
mov x3, x6
|
|
OP_W movdqa, [rD + i * 16]
|
|
LOAD_OP_W pxor, +32
|
|
DECODE LOAD_OP_W
|
|
OP_W CBC_DEC_UPDATE, i * 16
|
|
add rD, ways16
|
|
check2:
|
|
sub rN, ways
|
|
jnc nextBlocks2
|
|
|
|
add rN, ways
|
|
jmp check
|
|
|
|
nextBlock:
|
|
mov x3, x6
|
|
movdqa xmm1, [rD]
|
|
LOAD_OP movdqa, +32
|
|
pxor xmm0, xmm1
|
|
DECODE LOAD_OP
|
|
pxor xmm0, xmm6
|
|
movdqa [rD], xmm0
|
|
movdqa xmm6, xmm1
|
|
add rD, 16
|
|
check:
|
|
sub rN, 1
|
|
jnc nextBlock
|
|
|
|
movdqa [r1 - 32], xmm6
|
|
MY_EPILOG
|
|
|
|
|
|
; ---------- AES-CBC Encode ----------
|
|
|
|
ENCODE macro op:req
|
|
op aesenc, -16
|
|
@@:
|
|
op aesenc, +0
|
|
op aesenc, +16
|
|
add r3, 32
|
|
jnz @B
|
|
op aesenclast, +0
|
|
endm
|
|
|
|
MY_PROC AesCbc_Encode_Intel, 3
|
|
MY_PROLOG xmm0
|
|
|
|
add r1, r6
|
|
neg r6
|
|
add r6, 32
|
|
|
|
jmp check_e
|
|
|
|
align 16
|
|
nextBlock_e:
|
|
mov r3, r6
|
|
pxor xmm0, [rD]
|
|
pxor xmm0, [r1 + r3 - 32]
|
|
ENCODE LOAD_OP
|
|
movdqa [rD], xmm0
|
|
add rD, 16
|
|
check_e:
|
|
sub rN, 1
|
|
jnc nextBlock_e
|
|
|
|
movdqa [r1 + r6 - 64], xmm0
|
|
MY_EPILOG
|
|
|
|
|
|
; ---------- AES-CTR ----------
|
|
|
|
XOR_UPD_1 macro reg, offs
|
|
pxor reg, [rD + offs]
|
|
endm
|
|
|
|
XOR_UPD_2 macro reg, offs
|
|
movdqa [rD + offs], reg
|
|
endm
|
|
|
|
MY_PROC AesCtr_Code_Intel, 3
|
|
MY_PROLOG xmm6
|
|
|
|
mov r5, r4
|
|
shr r5, 4
|
|
dec r5
|
|
shl r5, 4
|
|
|
|
mov DWORD PTR [r5], 1
|
|
mov DWORD PTR [r5 + 4], 0
|
|
mov DWORD PTR [r5 + 8], 0
|
|
mov DWORD PTR [r5 + 12], 0
|
|
|
|
add r1, r6
|
|
neg r6
|
|
add r6, 32
|
|
|
|
jmp check2_c
|
|
|
|
align 16
|
|
nextBlocks2_c:
|
|
movdqa xmm7, [r5]
|
|
|
|
i = 0
|
|
rept ways
|
|
paddq xmm6, xmm7
|
|
movdqa @CatStr(xmm,%i), xmm6
|
|
i = i + 1
|
|
endm
|
|
|
|
mov r3, r6
|
|
LOAD_OP_W pxor, -32
|
|
ENCODE LOAD_OP_W
|
|
OP_W XOR_UPD_1, i * 16
|
|
OP_W XOR_UPD_2, i * 16
|
|
add rD, ways16
|
|
check2_c:
|
|
sub rN, ways
|
|
jnc nextBlocks2_c
|
|
|
|
add rN, ways
|
|
jmp check_c
|
|
|
|
nextBlock_c:
|
|
paddq xmm6, [r5]
|
|
mov r3, r6
|
|
movdqa xmm0, [r1 + r3 - 32]
|
|
pxor xmm0, xmm6
|
|
ENCODE LOAD_OP
|
|
XOR_UPD_1 xmm0, 0
|
|
XOR_UPD_2 xmm0, 0
|
|
add rD, 16
|
|
check_c:
|
|
sub rN, 1
|
|
jnc nextBlock_c
|
|
|
|
movdqa [r1 + r6 - 64], xmm6
|
|
MY_EPILOG
|
|
|
|
end
|