You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1259 lines
31 KiB
1259 lines
31 KiB
; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
|
|
; 2018-02-06: Igor Pavlov : Public domain
|
|
;
|
|
; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
|
|
; function for check at link time.
|
|
; That code is tightly coupled with LzmaDec_TryDummy()
|
|
; and with another functions in LzmaDec.c file.
|
|
; CLzmaDec structure, (probs) array layout, input and output of
|
|
; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM).
|
|
|
|
ifndef x64
|
|
; x64=1
|
|
; .err <x64_IS_REQUIRED>
|
|
endif
|
|
|
|
include 7zAsm.asm
|
|
|
|
MY_ASM_START
|
|
|
|
_TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
|
|
|
|
MY_ALIGN macro num:req
|
|
align num
|
|
endm
|
|
|
|
MY_ALIGN_16 macro
|
|
MY_ALIGN 16
|
|
endm
|
|
|
|
MY_ALIGN_32 macro
|
|
MY_ALIGN 32
|
|
endm
|
|
|
|
MY_ALIGN_64 macro
|
|
MY_ALIGN 64
|
|
endm
|
|
|
|
|
|
; _LZMA_SIZE_OPT equ 1
|
|
|
|
; _LZMA_PROB32 equ 1
|
|
|
|
ifdef _LZMA_PROB32
|
|
PSHIFT equ 2
|
|
PLOAD macro dest, mem
|
|
mov dest, dword ptr [mem]
|
|
endm
|
|
PSTORE macro src, mem
|
|
mov dword ptr [mem], src
|
|
endm
|
|
else
|
|
PSHIFT equ 1
|
|
PLOAD macro dest, mem
|
|
movzx dest, word ptr [mem]
|
|
endm
|
|
PSTORE macro src, mem
|
|
mov word ptr [mem], @CatStr(src, _W)
|
|
endm
|
|
endif
|
|
|
|
PMULT equ (1 SHL PSHIFT)
|
|
PMULT_HALF equ (1 SHL (PSHIFT - 1))
|
|
PMULT_2 equ (1 SHL (PSHIFT + 1))
|
|
|
|
|
|
; x0 range
|
|
; x1 pbPos / (prob) TREE
|
|
; x2 probBranch / prm (MATCHED) / pbPos / cnt
|
|
; x3 sym
|
|
;====== r4 === RSP
|
|
; x5 cod
|
|
; x6 t1 NORM_CALC / probs_state / dist
|
|
; x7 t0 NORM_CALC / prob2 IF_BIT_1
|
|
; x8 state
|
|
; x9 match (MATCHED) / sym2 / dist2 / lpMask_reg
|
|
; x10 kBitModelTotal_reg
|
|
; r11 probs
|
|
; x12 offs (MATCHED) / dic / len_temp
|
|
; x13 processedPos
|
|
; x14 bit (MATCHED) / dicPos
|
|
; r15 buf
|
|
|
|
|
|
cod equ x5
|
|
cod_L equ x5_L
|
|
range equ x0
|
|
state equ x8
|
|
state_R equ r8
|
|
buf equ r15
|
|
processedPos equ x13
|
|
kBitModelTotal_reg equ x10
|
|
|
|
probBranch equ x2
|
|
probBranch_R equ r2
|
|
probBranch_W equ x2_W
|
|
|
|
pbPos equ x1
|
|
pbPos_R equ r1
|
|
|
|
cnt equ x2
|
|
cnt_R equ r2
|
|
|
|
lpMask_reg equ x9
|
|
dicPos equ r14
|
|
|
|
sym equ x3
|
|
sym_R equ r3
|
|
sym_L equ x3_L
|
|
|
|
probs equ r11
|
|
dic equ r12
|
|
|
|
t0 equ x7
|
|
t0_W equ x7_W
|
|
t0_R equ r7
|
|
|
|
prob2 equ t0
|
|
prob2_W equ t0_W
|
|
|
|
t1 equ x6
|
|
t1_R equ r6
|
|
|
|
probs_state equ t1
|
|
probs_state_R equ t1_R
|
|
|
|
prm equ r2
|
|
match equ x9
|
|
match_R equ r9
|
|
offs equ x12
|
|
offs_R equ r12
|
|
bit equ x14
|
|
bit_R equ r14
|
|
|
|
sym2 equ x9
|
|
sym2_R equ r9
|
|
|
|
len_temp equ x12
|
|
|
|
dist equ sym
|
|
dist2 equ x9
|
|
|
|
|
|
|
|
kNumBitModelTotalBits equ 11
|
|
kBitModelTotal equ (1 SHL kNumBitModelTotalBits)
|
|
kNumMoveBits equ 5
|
|
kBitModelOffset equ ((1 SHL kNumMoveBits) - 1)
|
|
kTopValue equ (1 SHL 24)
|
|
|
|
NORM_2 macro
|
|
; movzx t0, BYTE PTR [buf]
|
|
shl cod, 8
|
|
mov cod_L, BYTE PTR [buf]
|
|
shl range, 8
|
|
; or cod, t0
|
|
inc buf
|
|
endm
|
|
|
|
|
|
NORM macro
|
|
cmp range, kTopValue
|
|
jae SHORT @F
|
|
NORM_2
|
|
@@:
|
|
endm
|
|
|
|
|
|
; ---------- Branch MACROS ----------
|
|
|
|
UPDATE_0 macro probsArray:req, probOffset:req, probDisp:req
|
|
mov prob2, kBitModelTotal_reg
|
|
sub prob2, probBranch
|
|
shr prob2, kNumMoveBits
|
|
add probBranch, prob2
|
|
PSTORE probBranch, probOffset * 1 + probsArray + probDisp * PMULT
|
|
endm
|
|
|
|
|
|
UPDATE_1 macro probsArray:req, probOffset:req, probDisp:req
|
|
sub prob2, range
|
|
sub cod, range
|
|
mov range, prob2
|
|
mov prob2, probBranch
|
|
shr probBranch, kNumMoveBits
|
|
sub prob2, probBranch
|
|
PSTORE prob2, probOffset * 1 + probsArray + probDisp * PMULT
|
|
endm
|
|
|
|
|
|
CMP_COD macro probsArray:req, probOffset:req, probDisp:req
|
|
PLOAD probBranch, probOffset * 1 + probsArray + probDisp * PMULT
|
|
NORM
|
|
mov prob2, range
|
|
shr range, kNumBitModelTotalBits
|
|
imul range, probBranch
|
|
cmp cod, range
|
|
endm
|
|
|
|
|
|
IF_BIT_1_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
|
|
CMP_COD probsArray, probOffset, probDisp
|
|
jae toLabel
|
|
endm
|
|
|
|
|
|
IF_BIT_1 macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
|
|
IF_BIT_1_NOUP probsArray, probOffset, probDisp, toLabel
|
|
UPDATE_0 probsArray, probOffset, probDisp
|
|
endm
|
|
|
|
|
|
IF_BIT_0_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
|
|
CMP_COD probsArray, probOffset, probDisp
|
|
jb toLabel
|
|
endm
|
|
|
|
|
|
; ---------- CMOV MACROS ----------
|
|
|
|
NORM_CALC macro prob:req
|
|
NORM
|
|
mov t0, range
|
|
shr range, kNumBitModelTotalBits
|
|
imul range, prob
|
|
sub t0, range
|
|
mov t1, cod
|
|
sub cod, range
|
|
endm
|
|
|
|
|
|
PUP macro prob:req, probPtr:req
|
|
sub t0, prob
|
|
; only sar works for both 16/32 bit prob modes
|
|
sar t0, kNumMoveBits
|
|
add t0, prob
|
|
PSTORE t0, probPtr
|
|
endm
|
|
|
|
|
|
PUP_SUB macro prob:req, probPtr:req, symSub:req
|
|
sbb sym, symSub
|
|
PUP prob, probPtr
|
|
endm
|
|
|
|
|
|
PUP_COD macro prob:req, probPtr:req, symSub:req
|
|
mov t0, kBitModelOffset
|
|
cmovb cod, t1
|
|
mov t1, sym
|
|
cmovb t0, kBitModelTotal_reg
|
|
PUP_SUB prob, probPtr, symSub
|
|
endm
|
|
|
|
|
|
BIT_0 macro prob:req, probNext:req
|
|
PLOAD prob, probs + 1 * PMULT
|
|
PLOAD probNext, probs + 1 * PMULT_2
|
|
|
|
NORM_CALC prob
|
|
|
|
cmovae range, t0
|
|
PLOAD t0, probs + 1 * PMULT_2 + PMULT
|
|
cmovae probNext, t0
|
|
mov t0, kBitModelOffset
|
|
cmovb cod, t1
|
|
cmovb t0, kBitModelTotal_reg
|
|
mov sym, 2
|
|
PUP_SUB prob, probs + 1 * PMULT, 0 - 1
|
|
endm
|
|
|
|
|
|
BIT_1 macro prob:req, probNext:req
|
|
PLOAD probNext, probs + sym_R * PMULT_2
|
|
add sym, sym
|
|
|
|
NORM_CALC prob
|
|
|
|
cmovae range, t0
|
|
PLOAD t0, probs + sym_R * PMULT + PMULT
|
|
cmovae probNext, t0
|
|
PUP_COD prob, probs + t1_R * PMULT_HALF, 0 - 1
|
|
endm
|
|
|
|
|
|
BIT_2 macro prob:req, symSub:req
|
|
add sym, sym
|
|
|
|
NORM_CALC prob
|
|
|
|
cmovae range, t0
|
|
PUP_COD prob, probs + t1_R * PMULT_HALF, symSub
|
|
endm
|
|
|
|
|
|
; ---------- MATCHED LITERAL ----------
|
|
|
|
LITM_0 macro
|
|
mov offs, 256 * PMULT
|
|
shl match, (PSHIFT + 1)
|
|
mov bit, offs
|
|
and bit, match
|
|
PLOAD x1, probs + 256 * PMULT + bit_R * 1 + 1 * PMULT
|
|
lea prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT]
|
|
; lea prm, [probs + 256 * PMULT + 1 * PMULT]
|
|
; add prm, bit_R
|
|
xor offs, bit
|
|
add match, match
|
|
|
|
NORM_CALC x1
|
|
|
|
cmovae offs, bit
|
|
mov bit, match
|
|
cmovae range, t0
|
|
mov t0, kBitModelOffset
|
|
cmovb cod, t1
|
|
cmovb t0, kBitModelTotal_reg
|
|
mov sym, 0
|
|
PUP_SUB x1, prm, -2-1
|
|
endm
|
|
|
|
|
|
LITM macro
|
|
and bit, offs
|
|
lea prm, [probs + offs_R * 1]
|
|
add prm, bit_R
|
|
PLOAD x1, prm + sym_R * PMULT
|
|
xor offs, bit
|
|
add sym, sym
|
|
add match, match
|
|
|
|
NORM_CALC x1
|
|
|
|
cmovae offs, bit
|
|
mov bit, match
|
|
cmovae range, t0
|
|
PUP_COD x1, prm + t1_R * PMULT_HALF, - 1
|
|
endm
|
|
|
|
|
|
LITM_2 macro
|
|
and bit, offs
|
|
lea prm, [probs + offs_R * 1]
|
|
add prm, bit_R
|
|
PLOAD x1, prm + sym_R * PMULT
|
|
add sym, sym
|
|
|
|
NORM_CALC x1
|
|
|
|
cmovae range, t0
|
|
PUP_COD x1, prm + t1_R * PMULT_HALF, 256 - 1
|
|
endm
|
|
|
|
|
|
; ---------- REVERSE BITS ----------
|
|
|
|
REV_0 macro prob:req, probNext:req
|
|
; PLOAD prob, probs + 1 * PMULT
|
|
; lea sym2_R, [probs + 2 * PMULT]
|
|
; PLOAD probNext, probs + 2 * PMULT
|
|
PLOAD probNext, sym2_R
|
|
|
|
NORM_CALC prob
|
|
|
|
cmovae range, t0
|
|
PLOAD t0, probs + 3 * PMULT
|
|
cmovae probNext, t0
|
|
cmovb cod, t1
|
|
mov t0, kBitModelOffset
|
|
cmovb t0, kBitModelTotal_reg
|
|
lea t1_R, [probs + 3 * PMULT]
|
|
cmovae sym2_R, t1_R
|
|
PUP prob, probs + 1 * PMULT
|
|
endm
|
|
|
|
|
|
REV_1 macro prob:req, probNext:req, step:req
|
|
add sym2_R, step * PMULT
|
|
PLOAD probNext, sym2_R
|
|
|
|
NORM_CALC prob
|
|
|
|
cmovae range, t0
|
|
PLOAD t0, sym2_R + step * PMULT
|
|
cmovae probNext, t0
|
|
cmovb cod, t1
|
|
mov t0, kBitModelOffset
|
|
cmovb t0, kBitModelTotal_reg
|
|
lea t1_R, [sym2_R + step * PMULT]
|
|
cmovae sym2_R, t1_R
|
|
PUP prob, t1_R - step * PMULT_2
|
|
endm
|
|
|
|
|
|
REV_2 macro prob:req, step:req
|
|
sub sym2_R, probs
|
|
shr sym2, PSHIFT
|
|
or sym, sym2
|
|
|
|
NORM_CALC prob
|
|
|
|
cmovae range, t0
|
|
lea t0, [sym - step]
|
|
cmovb sym, t0
|
|
cmovb cod, t1
|
|
mov t0, kBitModelOffset
|
|
cmovb t0, kBitModelTotal_reg
|
|
PUP prob, probs + sym2_R * PMULT
|
|
endm
|
|
|
|
|
|
REV_1_VAR macro prob:req
|
|
PLOAD prob, sym_R
|
|
mov probs, sym_R
|
|
add sym_R, sym2_R
|
|
|
|
NORM_CALC prob
|
|
|
|
cmovae range, t0
|
|
lea t0_R, [sym_R + sym2_R]
|
|
cmovae sym_R, t0_R
|
|
mov t0, kBitModelOffset
|
|
cmovb cod, t1
|
|
; mov t1, kBitModelTotal
|
|
; cmovb t0, t1
|
|
cmovb t0, kBitModelTotal_reg
|
|
add sym2, sym2
|
|
PUP prob, probs
|
|
endm
|
|
|
|
|
|
|
|
|
|
LIT_PROBS macro lpMaskParam:req
|
|
; prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
|
|
mov t0, processedPos
|
|
shl t0, 8
|
|
add sym, t0
|
|
and sym, lpMaskParam
|
|
add probs_state_R, pbPos_R
|
|
mov x1, LOC lc2
|
|
lea sym, dword ptr[sym_R + 2 * sym_R]
|
|
add probs, Literal * PMULT
|
|
shl sym, x1_L
|
|
add probs, sym_R
|
|
UPDATE_0 probs_state_R, 0, IsMatch
|
|
inc processedPos
|
|
endm
|
|
|
|
|
|
|
|
kNumPosBitsMax equ 4
|
|
kNumPosStatesMax equ (1 SHL kNumPosBitsMax)
|
|
|
|
kLenNumLowBits equ 3
|
|
kLenNumLowSymbols equ (1 SHL kLenNumLowBits)
|
|
kLenNumHighBits equ 8
|
|
kLenNumHighSymbols equ (1 SHL kLenNumHighBits)
|
|
kNumLenProbs equ (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols)
|
|
|
|
LenLow equ 0
|
|
LenChoice equ LenLow
|
|
LenChoice2 equ (LenLow + kLenNumLowSymbols)
|
|
LenHigh equ (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax)
|
|
|
|
kNumStates equ 12
|
|
kNumStates2 equ 16
|
|
kNumLitStates equ 7
|
|
|
|
kStartPosModelIndex equ 4
|
|
kEndPosModelIndex equ 14
|
|
kNumFullDistances equ (1 SHL (kEndPosModelIndex SHR 1))
|
|
|
|
kNumPosSlotBits equ 6
|
|
kNumLenToPosStates equ 4
|
|
|
|
kNumAlignBits equ 4
|
|
kAlignTableSize equ (1 SHL kNumAlignBits)
|
|
|
|
kMatchMinLen equ 2
|
|
kMatchSpecLenStart equ (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
|
|
|
|
kStartOffset equ 1664
|
|
SpecPos equ (-kStartOffset)
|
|
IsRep0Long equ (SpecPos + kNumFullDistances)
|
|
RepLenCoder equ (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax))
|
|
LenCoder equ (RepLenCoder + kNumLenProbs)
|
|
IsMatch equ (LenCoder + kNumLenProbs)
|
|
kAlign equ (IsMatch + (kNumStates2 SHL kNumPosBitsMax))
|
|
IsRep equ (kAlign + kAlignTableSize)
|
|
IsRepG0 equ (IsRep + kNumStates)
|
|
IsRepG1 equ (IsRepG0 + kNumStates)
|
|
IsRepG2 equ (IsRepG1 + kNumStates)
|
|
PosSlot equ (IsRepG2 + kNumStates)
|
|
Literal equ (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits))
|
|
NUM_BASE_PROBS equ (Literal + kStartOffset)
|
|
|
|
if kAlign ne 0
|
|
.err <Stop_Compiling_Bad_LZMA_kAlign>
|
|
endif
|
|
|
|
if NUM_BASE_PROBS ne 1984
|
|
.err <Stop_Compiling_Bad_LZMA_PROBS>
|
|
endif
|
|
|
|
|
|
PTR_FIELD equ dq ?
|
|
|
|
CLzmaDec_Asm struct
|
|
lc db ?
|
|
lp db ?
|
|
pb db ?
|
|
_pad_ db ?
|
|
dicSize dd ?
|
|
|
|
probs_Spec PTR_FIELD
|
|
probs_1664 PTR_FIELD
|
|
dic_Spec PTR_FIELD
|
|
dicBufSize PTR_FIELD
|
|
dicPos_Spec PTR_FIELD
|
|
buf_Spec PTR_FIELD
|
|
|
|
range_Spec dd ?
|
|
code_Spec dd ?
|
|
processedPos_Spec dd ?
|
|
checkDicSize dd ?
|
|
rep0 dd ?
|
|
rep1 dd ?
|
|
rep2 dd ?
|
|
rep3 dd ?
|
|
state_Spec dd ?
|
|
remainLen dd ?
|
|
CLzmaDec_Asm ends
|
|
|
|
|
|
CLzmaDec_Asm_Loc struct
|
|
OLD_RSP PTR_FIELD
|
|
lzmaPtr PTR_FIELD
|
|
_pad0_ PTR_FIELD
|
|
_pad1_ PTR_FIELD
|
|
_pad2_ PTR_FIELD
|
|
dicBufSize PTR_FIELD
|
|
probs_Spec PTR_FIELD
|
|
dic_Spec PTR_FIELD
|
|
|
|
limit PTR_FIELD
|
|
bufLimit PTR_FIELD
|
|
lc2 dd ?
|
|
lpMask dd ?
|
|
pbMask dd ?
|
|
checkDicSize dd ?
|
|
|
|
_pad_ dd ?
|
|
remainLen dd ?
|
|
dicPos_Spec PTR_FIELD
|
|
rep0 dd ?
|
|
rep1 dd ?
|
|
rep2 dd ?
|
|
rep3 dd ?
|
|
CLzmaDec_Asm_Loc ends
|
|
|
|
|
|
GLOB_2 equ [sym_R].CLzmaDec_Asm.
|
|
GLOB equ [r1].CLzmaDec_Asm.
|
|
LOC_0 equ [r0].CLzmaDec_Asm_Loc.
|
|
LOC equ [RSP].CLzmaDec_Asm_Loc.
|
|
|
|
|
|
COPY_VAR macro name
|
|
mov t0, GLOB_2 name
|
|
mov LOC_0 name, t0
|
|
endm
|
|
|
|
|
|
RESTORE_VAR macro name
|
|
mov t0, LOC name
|
|
mov GLOB name, t0
|
|
endm
|
|
|
|
|
|
|
|
IsMatchBranch_Pre macro reg
|
|
; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
|
|
mov pbPos, LOC pbMask
|
|
and pbPos, processedPos
|
|
shl pbPos, (kLenNumLowBits + 1 + PSHIFT)
|
|
lea probs_state_R, [probs + state_R]
|
|
endm
|
|
|
|
|
|
IsMatchBranch macro reg
|
|
IsMatchBranch_Pre
|
|
IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label
|
|
endm
|
|
|
|
|
|
CheckLimits macro reg
|
|
cmp buf, LOC bufLimit
|
|
jae fin_OK
|
|
cmp dicPos, LOC limit
|
|
jae fin_OK
|
|
endm
|
|
|
|
|
|
|
|
; RSP is (16x + 8) bytes aligned in WIN64-x64
|
|
; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8)
|
|
|
|
PARAM_lzma equ REG_PARAM_0
|
|
PARAM_limit equ REG_PARAM_1
|
|
PARAM_bufLimit equ REG_PARAM_2
|
|
|
|
; MY_ALIGN_64
|
|
MY_PROC LzmaDec_DecodeReal_3, 3
|
|
MY_PUSH_PRESERVED_REGS
|
|
|
|
lea r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)]
|
|
and r0, -128
|
|
mov r5, RSP
|
|
mov RSP, r0
|
|
mov LOC_0 Old_RSP, r5
|
|
mov LOC_0 lzmaPtr, PARAM_lzma
|
|
|
|
mov LOC_0 remainLen, 0 ; remainLen must be ZERO
|
|
|
|
mov LOC_0 bufLimit, PARAM_bufLimit
|
|
mov sym_R, PARAM_lzma ; CLzmaDec_Asm_Loc pointer for GLOB_2
|
|
mov dic, GLOB_2 dic_Spec
|
|
add PARAM_limit, dic
|
|
mov LOC_0 limit, PARAM_limit
|
|
|
|
COPY_VAR(rep0)
|
|
COPY_VAR(rep1)
|
|
COPY_VAR(rep2)
|
|
COPY_VAR(rep3)
|
|
|
|
mov dicPos, GLOB_2 dicPos_Spec
|
|
add dicPos, dic
|
|
mov LOC_0 dicPos_Spec, dicPos
|
|
mov LOC_0 dic_Spec, dic
|
|
|
|
mov x1_L, GLOB_2 pb
|
|
mov t0, 1
|
|
shl t0, x1_L
|
|
dec t0
|
|
mov LOC_0 pbMask, t0
|
|
|
|
; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
|
|
; unsigned lc = p->prop.lc;
|
|
; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
|
|
|
|
mov x1_L, GLOB_2 lc
|
|
mov x2, 100h
|
|
mov t0, x2
|
|
shr x2, x1_L
|
|
; inc x1
|
|
add x1_L, PSHIFT
|
|
mov LOC_0 lc2, x1
|
|
mov x1_L, GLOB_2 lp
|
|
shl t0, x1_L
|
|
sub t0, x2
|
|
mov LOC_0 lpMask, t0
|
|
mov lpMask_reg, t0
|
|
|
|
; mov probs, GLOB_2 probs_Spec
|
|
; add probs, kStartOffset SHL PSHIFT
|
|
mov probs, GLOB_2 probs_1664
|
|
mov LOC_0 probs_Spec, probs
|
|
|
|
mov t0_R, GLOB_2 dicBufSize
|
|
mov LOC_0 dicBufSize, t0_R
|
|
|
|
mov x1, GLOB_2 checkDicSize
|
|
mov LOC_0 checkDicSize, x1
|
|
|
|
mov processedPos, GLOB_2 processedPos_Spec
|
|
|
|
mov state, GLOB_2 state_Spec
|
|
shl state, PSHIFT
|
|
|
|
mov buf, GLOB_2 buf_Spec
|
|
mov range, GLOB_2 range_Spec
|
|
mov cod, GLOB_2 code_Spec
|
|
mov kBitModelTotal_reg, kBitModelTotal
|
|
xor sym, sym
|
|
|
|
; if (processedPos != 0 || checkDicSize != 0)
|
|
or x1, processedPos
|
|
jz @f
|
|
|
|
add t0_R, dic
|
|
cmp dicPos, dic
|
|
cmovnz t0_R, dicPos
|
|
movzx sym, byte ptr[t0_R - 1]
|
|
|
|
@@:
|
|
IsMatchBranch_Pre
|
|
cmp state, 4 * PMULT
|
|
jb lit_end
|
|
cmp state, kNumLitStates * PMULT
|
|
jb lit_matched_end
|
|
jmp lz_end
|
|
|
|
|
|
|
|
|
|
; ---------- LITERAL ----------
|
|
MY_ALIGN_64
|
|
lit_start:
|
|
xor state, state
|
|
lit_start_2:
|
|
LIT_PROBS lpMask_reg
|
|
|
|
ifdef _LZMA_SIZE_OPT
|
|
|
|
PLOAD x1, probs + 1 * PMULT
|
|
mov sym, 1
|
|
MY_ALIGN_16
|
|
lit_loop:
|
|
BIT_1 x1, x2
|
|
mov x1, x2
|
|
cmp sym, 127
|
|
jbe lit_loop
|
|
|
|
else
|
|
|
|
BIT_0 x1, x2
|
|
BIT_1 x2, x1
|
|
BIT_1 x1, x2
|
|
BIT_1 x2, x1
|
|
BIT_1 x1, x2
|
|
BIT_1 x2, x1
|
|
BIT_1 x1, x2
|
|
|
|
endif
|
|
|
|
BIT_2 x2, 256 - 1
|
|
|
|
; mov dic, LOC dic_Spec
|
|
mov probs, LOC probs_Spec
|
|
IsMatchBranch_Pre
|
|
mov byte ptr[dicPos], sym_L
|
|
inc dicPos
|
|
|
|
CheckLimits
|
|
lit_end:
|
|
IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start
|
|
|
|
; jmp IsMatch_label
|
|
|
|
; ---------- MATCHES ----------
|
|
; MY_ALIGN_32
|
|
IsMatch_label:
|
|
UPDATE_1 probs_state_R, pbPos_R, IsMatch
|
|
IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label
|
|
|
|
add probs, LenCoder * PMULT
|
|
add state, kNumStates * PMULT
|
|
|
|
; ---------- LEN DECODE ----------
|
|
len_decode:
|
|
mov len_temp, 8 - 1 - kMatchMinLen
|
|
IF_BIT_0_NOUP probs, 0, 0, len_mid_0
|
|
UPDATE_1 probs, 0, 0
|
|
add probs, (1 SHL (kLenNumLowBits + PSHIFT))
|
|
mov len_temp, -1 - kMatchMinLen
|
|
IF_BIT_0_NOUP probs, 0, 0, len_mid_0
|
|
UPDATE_1 probs, 0, 0
|
|
add probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT))
|
|
mov sym, 1
|
|
PLOAD x1, probs + 1 * PMULT
|
|
|
|
MY_ALIGN_32
|
|
len8_loop:
|
|
BIT_1 x1, x2
|
|
mov x1, x2
|
|
cmp sym, 64
|
|
jb len8_loop
|
|
|
|
mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen
|
|
jmp len_mid_2
|
|
|
|
MY_ALIGN_32
|
|
len_mid_0:
|
|
UPDATE_0 probs, 0, 0
|
|
add probs, pbPos_R
|
|
BIT_0 x2, x1
|
|
len_mid_2:
|
|
BIT_1 x1, x2
|
|
BIT_2 x2, len_temp
|
|
mov probs, LOC probs_Spec
|
|
cmp state, kNumStates * PMULT
|
|
jb copy_match
|
|
|
|
|
|
; ---------- DECODE DISTANCE ----------
|
|
; probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
|
|
|
|
mov t0, 3 + kMatchMinLen
|
|
cmp sym, 3 + kMatchMinLen
|
|
cmovb t0, sym
|
|
add probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT))
|
|
shl t0, (kNumPosSlotBits + PSHIFT)
|
|
add probs, t0_R
|
|
|
|
; sym = Len
|
|
; mov LOC remainLen, sym
|
|
mov len_temp, sym
|
|
|
|
ifdef _LZMA_SIZE_OPT
|
|
|
|
PLOAD x1, probs + 1 * PMULT
|
|
mov sym, 1
|
|
MY_ALIGN_16
|
|
slot_loop:
|
|
BIT_1 x1, x2
|
|
mov x1, x2
|
|
cmp sym, 32
|
|
jb slot_loop
|
|
|
|
else
|
|
|
|
BIT_0 x1, x2
|
|
BIT_1 x2, x1
|
|
BIT_1 x1, x2
|
|
BIT_1 x2, x1
|
|
BIT_1 x1, x2
|
|
|
|
endif
|
|
|
|
mov x1, sym
|
|
BIT_2 x2, 64-1
|
|
|
|
and sym, 3
|
|
mov probs, LOC probs_Spec
|
|
cmp x1, 32 + kEndPosModelIndex / 2
|
|
jb short_dist
|
|
|
|
; unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
|
|
sub x1, (32 + 1 + kNumAlignBits)
|
|
; distance = (2 | (distance & 1));
|
|
or sym, 2
|
|
PLOAD x2, probs + 1 * PMULT
|
|
shl sym, kNumAlignBits + 1
|
|
lea sym2_R, [probs + 2 * PMULT]
|
|
|
|
jmp direct_norm
|
|
; lea t1, [sym_R + (1 SHL kNumAlignBits)]
|
|
; cmp range, kTopValue
|
|
; jb direct_norm
|
|
|
|
; ---------- DIRECT DISTANCE ----------
|
|
MY_ALIGN_32
|
|
direct_loop:
|
|
shr range, 1
|
|
mov t0, cod
|
|
sub cod, range
|
|
cmovs cod, t0
|
|
cmovns sym, t1
|
|
|
|
comment ~
|
|
sub cod, range
|
|
mov x2, cod
|
|
sar x2, 31
|
|
lea sym, dword ptr [r2 + sym_R * 2 + 1]
|
|
and x2, range
|
|
add cod, x2
|
|
~
|
|
dec x1
|
|
je direct_end
|
|
|
|
add sym, sym
|
|
direct_norm:
|
|
lea t1, [sym_R + (1 SHL kNumAlignBits)]
|
|
cmp range, kTopValue
|
|
jae near ptr direct_loop
|
|
; we align for 32 here with "near ptr" command above
|
|
NORM_2
|
|
jmp direct_loop
|
|
|
|
MY_ALIGN_32
|
|
direct_end:
|
|
; prob = + kAlign;
|
|
; distance <<= kNumAlignBits;
|
|
REV_0 x2, x1
|
|
REV_1 x1, x2, 2
|
|
REV_1 x2, x1, 4
|
|
REV_2 x1, 8
|
|
|
|
decode_dist_end:
|
|
|
|
; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
|
|
|
|
mov t0, LOC checkDicSize
|
|
test t0, t0
|
|
cmove t0, processedPos
|
|
cmp sym, t0
|
|
jae end_of_payload
|
|
|
|
; rep3 = rep2;
|
|
; rep2 = rep1;
|
|
; rep1 = rep0;
|
|
; rep0 = distance + 1;
|
|
|
|
inc sym
|
|
mov t0, LOC rep0
|
|
mov t1, LOC rep1
|
|
mov x1, LOC rep2
|
|
mov LOC rep0, sym
|
|
; mov sym, LOC remainLen
|
|
mov sym, len_temp
|
|
mov LOC rep1, t0
|
|
mov LOC rep2, t1
|
|
mov LOC rep3, x1
|
|
|
|
; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
|
|
cmp state, (kNumStates + kNumLitStates) * PMULT
|
|
mov state, kNumLitStates * PMULT
|
|
mov t0, (kNumLitStates + 3) * PMULT
|
|
cmovae state, t0
|
|
|
|
|
|
; ---------- COPY MATCH ----------
|
|
copy_match:
|
|
|
|
; len += kMatchMinLen;
|
|
; add sym, kMatchMinLen
|
|
|
|
; if ((rem = limit - dicPos) == 0)
|
|
; {
|
|
; p->dicPos = dicPos;
|
|
; return SZ_ERROR_DATA;
|
|
; }
|
|
mov cnt_R, LOC limit
|
|
sub cnt_R, dicPos
|
|
jz fin_ERROR
|
|
|
|
; curLen = ((rem < len) ? (unsigned)rem : len);
|
|
cmp cnt_R, sym_R
|
|
; cmovae cnt_R, sym_R ; 64-bit
|
|
cmovae cnt, sym ; 32-bit
|
|
|
|
mov dic, LOC dic_Spec
|
|
mov x1, LOC rep0
|
|
|
|
mov t0_R, dicPos
|
|
add dicPos, cnt_R
|
|
; processedPos += curLen;
|
|
add processedPos, cnt
|
|
; len -= curLen;
|
|
sub sym, cnt
|
|
mov LOC remainLen, sym
|
|
|
|
sub t0_R, dic
|
|
|
|
; pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
|
|
sub t0_R, r1
|
|
jae @f
|
|
|
|
mov r1, LOC dicBufSize
|
|
add t0_R, r1
|
|
sub r1, t0_R
|
|
cmp cnt_R, r1
|
|
ja copy_match_cross
|
|
@@:
|
|
; if (curLen <= dicBufSize - pos)
|
|
|
|
; ---------- COPY MATCH FAST ----------
|
|
; Byte *dest = dic + dicPos;
|
|
; mov r1, dic
|
|
; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
|
|
; sub t0_R, dicPos
|
|
; dicPos += curLen;
|
|
|
|
; const Byte *lim = dest + curLen;
|
|
add t0_R, dic
|
|
movzx sym, byte ptr[t0_R]
|
|
add t0_R, cnt_R
|
|
neg cnt_R
|
|
; lea r1, [dicPos - 1]
|
|
copy_common:
|
|
dec dicPos
|
|
; cmp LOC rep0, 1
|
|
; je rep0Label
|
|
|
|
; t0_R - src_lim
|
|
; r1 - dest_lim - 1
|
|
; cnt_R - (-cnt)
|
|
|
|
IsMatchBranch_Pre
|
|
inc cnt_R
|
|
jz copy_end
|
|
MY_ALIGN_16
|
|
@@:
|
|
mov byte ptr[cnt_R * 1 + dicPos], sym_L
|
|
movzx sym, byte ptr[cnt_R * 1 + t0_R]
|
|
inc cnt_R
|
|
jnz @b
|
|
|
|
copy_end:
|
|
lz_end_match:
|
|
mov byte ptr[dicPos], sym_L
|
|
inc dicPos
|
|
|
|
; IsMatchBranch_Pre
|
|
CheckLimits
|
|
lz_end:
|
|
IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label
|
|
|
|
|
|
|
|
; ---------- LITERAL MATCHED ----------
|
|
|
|
LIT_PROBS LOC lpMask
|
|
|
|
; matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
|
|
mov x1, LOC rep0
|
|
; mov dic, LOC dic_Spec
|
|
mov LOC dicPos_Spec, dicPos
|
|
|
|
; state -= (state < 10) ? 3 : 6;
|
|
lea t0, [state_R - 6 * PMULT]
|
|
sub state, 3 * PMULT
|
|
cmp state, 7 * PMULT
|
|
cmovae state, t0
|
|
|
|
sub dicPos, dic
|
|
sub dicPos, r1
|
|
jae @f
|
|
add dicPos, LOC dicBufSize
|
|
@@:
|
|
comment ~
|
|
xor t0, t0
|
|
sub dicPos, r1
|
|
cmovb t0_R, LOC dicBufSize
|
|
~
|
|
|
|
movzx match, byte ptr[dic + dicPos * 1]
|
|
|
|
ifdef _LZMA_SIZE_OPT
|
|
|
|
mov offs, 256 * PMULT
|
|
shl match, (PSHIFT + 1)
|
|
mov bit, match
|
|
mov sym, 1
|
|
MY_ALIGN_16
|
|
litm_loop:
|
|
LITM
|
|
cmp sym, 256
|
|
jb litm_loop
|
|
sub sym, 256
|
|
|
|
else
|
|
|
|
LITM_0
|
|
LITM
|
|
LITM
|
|
LITM
|
|
LITM
|
|
LITM
|
|
LITM
|
|
LITM_2
|
|
|
|
endif
|
|
|
|
mov probs, LOC probs_Spec
|
|
IsMatchBranch_Pre
|
|
; mov dic, LOC dic_Spec
|
|
mov dicPos, LOC dicPos_Spec
|
|
mov byte ptr[dicPos], sym_L
|
|
inc dicPos
|
|
|
|
CheckLimits
|
|
lit_matched_end:
|
|
IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label
|
|
; IsMatchBranch
|
|
mov lpMask_reg, LOC lpMask
|
|
sub state, 3 * PMULT
|
|
jmp lit_start_2
|
|
|
|
|
|
|
|
; ---------- REP 0 LITERAL ----------
|
|
MY_ALIGN_32
|
|
IsRep0Short_label:
|
|
UPDATE_0 probs_state_R, pbPos_R, IsRep0Long
|
|
|
|
; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
|
|
mov dic, LOC dic_Spec
|
|
mov t0_R, dicPos
|
|
mov probBranch, LOC rep0
|
|
sub t0_R, dic
|
|
|
|
sub probs, RepLenCoder * PMULT
|
|
inc processedPos
|
|
; state = state < kNumLitStates ? 9 : 11;
|
|
or state, 1 * PMULT
|
|
IsMatchBranch_Pre
|
|
|
|
sub t0_R, probBranch_R
|
|
jae @f
|
|
add t0_R, LOC dicBufSize
|
|
@@:
|
|
movzx sym, byte ptr[dic + t0_R * 1]
|
|
jmp lz_end_match
|
|
|
|
|
|
MY_ALIGN_32
|
|
IsRep_label:
|
|
UPDATE_1 probs_state_R, 0, IsRep
|
|
|
|
; The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode.
|
|
; So we don't check it here.
|
|
|
|
; mov t0, processedPos
|
|
; or t0, LOC checkDicSize
|
|
; jz fin_ERROR_2
|
|
|
|
; state = state < kNumLitStates ? 8 : 11;
|
|
cmp state, kNumLitStates * PMULT
|
|
mov state, 8 * PMULT
|
|
mov probBranch, 11 * PMULT
|
|
cmovae state, probBranch
|
|
|
|
; prob = probs + RepLenCoder;
|
|
add probs, RepLenCoder * PMULT
|
|
|
|
IF_BIT_1 probs_state_R, 0, IsRepG0, IsRepG0_label
|
|
IF_BIT_0_NOUP probs_state_R, pbPos_R, IsRep0Long, IsRep0Short_label
|
|
UPDATE_1 probs_state_R, pbPos_R, IsRep0Long
|
|
jmp len_decode
|
|
|
|
MY_ALIGN_32
|
|
IsRepG0_label:
|
|
UPDATE_1 probs_state_R, 0, IsRepG0
|
|
mov dist2, LOC rep0
|
|
mov dist, LOC rep1
|
|
mov LOC rep1, dist2
|
|
|
|
IF_BIT_1 probs_state_R, 0, IsRepG1, IsRepG1_label
|
|
mov LOC rep0, dist
|
|
jmp len_decode
|
|
|
|
; MY_ALIGN_32
|
|
IsRepG1_label:
|
|
UPDATE_1 probs_state_R, 0, IsRepG1
|
|
mov dist2, LOC rep2
|
|
mov LOC rep2, dist
|
|
|
|
IF_BIT_1 probs_state_R, 0, IsRepG2, IsRepG2_label
|
|
mov LOC rep0, dist2
|
|
jmp len_decode
|
|
|
|
; MY_ALIGN_32
|
|
IsRepG2_label:
|
|
UPDATE_1 probs_state_R, 0, IsRepG2
|
|
mov dist, LOC rep3
|
|
mov LOC rep3, dist2
|
|
mov LOC rep0, dist
|
|
jmp len_decode
|
|
|
|
|
|
|
|
; ---------- SPEC SHORT DISTANCE ----------
|
|
|
|
MY_ALIGN_32
|
|
short_dist:
|
|
sub x1, 32 + 1
|
|
jbe decode_dist_end
|
|
or sym, 2
|
|
shl sym, x1_L
|
|
lea sym_R, [probs + sym_R * PMULT + SpecPos * PMULT + 1 * PMULT]
|
|
mov sym2, PMULT ; step
|
|
MY_ALIGN_32
|
|
spec_loop:
|
|
REV_1_VAR x2
|
|
dec x1
|
|
jnz spec_loop
|
|
|
|
mov probs, LOC probs_Spec
|
|
sub sym, sym2
|
|
sub sym, SpecPos * PMULT
|
|
sub sym_R, probs
|
|
shr sym, PSHIFT
|
|
|
|
jmp decode_dist_end
|
|
|
|
|
|
; ---------- COPY MATCH CROSS ----------
|
|
copy_match_cross:
|
|
; t0_R - src pos
|
|
; r1 - len to dicBufSize
|
|
; cnt_R - total copy len
|
|
|
|
mov t1_R, t0_R ; srcPos
|
|
mov t0_R, dic
|
|
mov r1, LOC dicBufSize ;
|
|
neg cnt_R
|
|
@@:
|
|
movzx sym, byte ptr[t1_R * 1 + t0_R]
|
|
inc t1_R
|
|
mov byte ptr[cnt_R * 1 + dicPos], sym_L
|
|
inc cnt_R
|
|
cmp t1_R, r1
|
|
jne @b
|
|
|
|
movzx sym, byte ptr[t0_R]
|
|
sub t0_R, cnt_R
|
|
jmp copy_common
|
|
|
|
|
|
|
|
|
|
fin_ERROR:
|
|
mov LOC remainLen, len_temp
|
|
; fin_ERROR_2:
|
|
mov sym, 1
|
|
jmp fin
|
|
|
|
end_of_payload:
|
|
cmp sym, 0FFFFFFFFh ; -1
|
|
jne fin_ERROR
|
|
|
|
mov LOC remainLen, kMatchSpecLenStart
|
|
sub state, kNumStates * PMULT
|
|
|
|
fin_OK:
|
|
xor sym, sym
|
|
|
|
fin:
|
|
NORM
|
|
|
|
mov r1, LOC lzmaPtr
|
|
|
|
sub dicPos, LOC dic_Spec
|
|
mov GLOB dicPos_Spec, dicPos
|
|
mov GLOB buf_Spec, buf
|
|
mov GLOB range_Spec, range
|
|
mov GLOB code_Spec, cod
|
|
shr state, PSHIFT
|
|
mov GLOB state_Spec, state
|
|
mov GLOB processedPos_Spec, processedPos
|
|
|
|
RESTORE_VAR(remainLen)
|
|
RESTORE_VAR(rep0)
|
|
RESTORE_VAR(rep1)
|
|
RESTORE_VAR(rep2)
|
|
RESTORE_VAR(rep3)
|
|
|
|
mov x0, sym
|
|
|
|
mov RSP, LOC Old_RSP
|
|
|
|
MY_POP_PRESERVED_REGS
|
|
MY_ENDP
|
|
|
|
_TEXT$LZMADECOPT ENDS
|
|
|
|
end
|