1
0
mirror of https://github.com/upx/upx synced 2025-09-28 19:06:07 +08:00

Add cl1_d32_2.ash.

This commit is contained in:
John Reiser 2006-08-15 07:08:50 -07:00
parent 6788a05dd1
commit e2dc612153

View File

@ -0,0 +1,248 @@
/*
; cl1_d32.ash -- cl1_decompress_le32 in 32-bit assembly
; schema from ucl/nrv2b_d32.ash
;
; Copyright (C) 2004-2006 John Reiser
; Copyright (C) 1996-2006 Markus Franz Xaver Johannes Oberhumer
; All Rights Reserved.
;
; This file is free software; you can redistribute it and/or
; modify it under the terms of the GNU General Public License as
; published by the Free Software Foundation; either version 2 of
; the License, or (at your option) any later version.
;
; This file is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
; GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License
; along with the UCL library; see the file COPYING.
; If not, write to the Free Software Foundation, Inc.,
; 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
;
; Markus F.X.J. Oberhumer John Reiser
; <markus@oberhumer.com> <jreiser@BitWagon.com>
;
; ------------- DECOMPRESSION -------------
; Input:
; esi - source
; edi - dest
; ebp - -1
; cld
; Output:
; eax - 0
; ecx - 0
*/
#include "macros2.ash"
// One of these two is instantiated many times by buildLoader
section CL1SMA1B
call edx
section CL1FAS1B
add ebx, ebx
jnz 1f
call edx
1:
section CL1GET1B
section CL1ENTER
nop // 'int3' for debugging
call start_cl1 // pic push address of next instr
section CL1SMA10
getbit_cl1: // appears only in small version
add ebx, ebx
jz reload_cl1
ret
section CL1RLOAD
reload_cl1: // for both small and fast version
mov ebx, [esi]
sub esi, -4
adc ebx, ebx
ret
section CL1WID01
widelit_cl1:
sub ecx,ecx // back to 0
// getbit
section CL1WID02
adc ecx,ecx
// getbit
section CL1WID03
jc lit89_cl1
// getbit
section CL1WID04
adc ecx,ecx
// getbit
section CL1WID05
jc lit10_12_cl1
// getbit
section CL1WID06
adc ecx,ecx // 0..7; -1+ (width/2) of length
litwidth_cl1: // ss22 algorithm, counted width<=8 pairs; returns eax= 2..0x15555
// getbit
section CL1WID07
adc eax,eax
dec eax
// getbit
section CL1WID08
adc eax,eax
sub ecx, 1
jnc litwidth_cl1
lea ecx,[17 -2 + eax] // 17: predecessors; ss22 returns 2..
cmp eax,0xffff-(17 -2)
jb litgo_cl1 // not maximal range of search
lea eax,[esi + ecx] // esi after copy
push eax // "parameter" to maxlit_cl1
jmp maxlit_cl1 // can have another literal afterwards
lit13_16_cl1:
// getbit
section CL1WID09
adc ecx,ecx
// getbit
section CL1WID10
adc ecx,ecx
add ecx, 13
jmps litmov_cl1
lit10_12_cl1:
test ecx,ecx
jz lit13_16_cl1
inc ecx // 2,3,4
lit89_cl1:
add ecx, 8
litgo_cl1:
jmps litmov_cl1
section CL1START
start_cl1:
sub ecx,ecx // 0
pop edx // edx= getbit_cl1 or reload_cl1
sub ebx, ebx // cause reload on first bit
section CL1TOP00
top_cl1: // In: 0==ecx
lea eax,[1+ ecx] // 1: the msb of offset or large width
// getbit
section CL1TOP01
jnc match_cl1
// getbit
section CL1TOP02
jc lit1_cl1
// getbit
section CL1TOP03
jc lit2_cl1
// getbit
section CL1TOP04
jc lit3_cl1
add ecx, 2
// getbit
section CL1TOP05
jc lit45_cl1
inc ecx
// getbit
section CL1TOP06
jc lit67_cl1
jmp widelit_cl1
lit67_cl1:
lit45_cl1:
// getbit
section CL1TOP07
adc ecx,ecx
litmov_cl1:
.byte 0xD1,((3<<6)|(5<<3)|1) //shr ecx,1
jnc litmovb_cl1
movsb
litmovb_cl1:
.byte 0xD1,((3<<6)|(5<<3)|1) //shr ecx,1
jnc litmovw_cl1
movsw
litmovw_cl1:
rep
movsd
lea eax,[1+ ecx] // 1: the msb
jmps litdone_cl1
lit3_cl1:
movsb
lit2_cl1:
movsb
lit1_cl1:
movsb
litdone_cl1:
match_cl1: // In: 0==ecx; 1==eax
offset_cl1: // ss11 algorithm
// getbit
section CL1OFF01
adc eax,eax
// getbit
section CL1OFF02
jnc offset_cl1
sub eax, 3 // 2.. ==> -1[prev], (0,,<<8)|byte
jc prev_off_cl1
shl eax,8
lodsb
xor eax, ~0
jz done_cl1 // EOF
mov ebp,eax // -offset
prev_off_cl1: // 1st 2 bits encode (5<=len),2,3,4
// getbit
section CL1OFF03
adc ecx,ecx
// getbit
section CL1OFF04
adc ecx,ecx
jnz wrinkle_cl1
section CL1LEN00
inc ecx // 1: the msb
mlen_cl1:
// getbit
section CL1LEN01
adc ecx,ecx
// getbit
section CL1LEN02
jnc mlen_cl1
add ecx, 2 // 2.. ==> 4..
section CL1COPY0
wrinkle_cl1:
cmp ebp,-0xd00
adc ecx, 1
copy_cl1:
push esi
lea esi,[edi + ebp]
cmp ebp, -4
ja ripple_cl1
maxlit_cl1: // literal copy cannot overlap; omit test for ripple
.byte 0xD1,((3<<6)|(5<<3)|1) //shr ecx,1
jnc maxlitb_cl1
movsb
maxlitb_cl1:
.byte 0xD1,((3<<6)|(5<<3)|1) //shr ecx,1
jnc maxlitw_cl1
movsw
maxlitw_cl1:
rep
movsd
popbot_cl1:
pop esi
bottom_cl1:
jmp top_cl1
ripple_cl1:
cmp ebp, -1
jne ripmov_cl1
lodsb
rep
stosb
jmps popbot_cl1
ripmov_cl1:
rep
movsb
jmps popbot_cl1
done_cl1:
section CL1END
// vi:ts=8:et