mirror of
https://github.com/upx/upx
synced 2025-09-28 19:06:07 +08:00
172 lines
5.5 KiB
ArmAsm
172 lines
5.5 KiB
ArmAsm
/* ppc_d_nrv2b.S -- PowerPC decompressor for NRV2B
|
|
|
|
This file is part of the UPX executable compressor.
|
|
|
|
Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer
|
|
Copyright (C) 1996-2004 Laszlo Molnar
|
|
Copyright (C) 2000-2005 John F. Reiser
|
|
All Rights Reserved.
|
|
|
|
UPX and the UCL library are free software; you can redistribute them
|
|
and/or modify them under the terms of the GNU General Public License as
|
|
published by the Free Software Foundation; either version 2 of
|
|
the License, or (at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program; see the file COPYING.
|
|
If not, write to the Free Software Foundation, Inc.,
|
|
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
|
|
|
Markus F.X.J. Oberhumer Laszlo Molnar
|
|
<mfx@users.sourceforge.net> <ml1050@users.sourceforge.net>
|
|
|
|
John F. Reiser
|
|
<jreiser@users.sourceforge.net>
|
|
*/
|
|
|
|
#include "ppc_regs.h"
|
|
|
|
SZ_DLINE=128 # size of data cache line in Apple G5
|
|
|
|
/* Returns 0 on success; non-zero on failure. */
|
|
decompress: # (uchar const *src, size_t lsrc, uchar *dst, size_t &ldst, uint method)
|
|
|
|
/* PowerPC has no 'cmplis': compare logical [unsigned] immediate shifted [by 16] */
|
|
#define hibit r0 /* holds 0x80000000 during decompress */
|
|
|
|
#define src a0
|
|
#define lsrc a1
|
|
#define dst a2
|
|
#define ldst a3 /* Out: actually a reference: &len_dst */
|
|
#define meth a4
|
|
|
|
#define off a4
|
|
#define len a5
|
|
#define bits a6
|
|
#define disp a7
|
|
|
|
dcbtst 0,dst # prime dcache for store
|
|
|
|
stw dst,0(ldst) # original dst
|
|
add lsrc,lsrc,src # input eof
|
|
|
|
lis hibit,0x8000 # 0x80000000 for detecting next bit
|
|
lis bits,0x8000 # prepare for first load
|
|
addi src,src,-1 # prepare for 'lbzu'
|
|
addi dst,dst,-1 # prepare for 'stbu'
|
|
li disp,-1 # initial displacement
|
|
|
|
mflr t3 # return address
|
|
b bot_n2b
|
|
|
|
/* jump on next bit, with branch prediction: y==>likely; n==>unlikely
|
|
cr0 is set by the cmpl ["compare logical"==>unsigned]:
|
|
lt next bit is 0
|
|
gt next bit is 1
|
|
eq must load next 32 bits from memory
|
|
*/
|
|
#define jnextb0y call get1; blt+ cr0,
|
|
#define jnextb0n call get1; blt- cr0,
|
|
#define jnextb1y call get1; bgt+ cr0,
|
|
#define jnextb1n call get1; bgt- cr0,
|
|
|
|
/* rotate next bit into bottom bit of reg; set cr0 based on entire result reg */
|
|
#define getnextb(reg) call get1; adde. reg,reg,reg
|
|
|
|
get1:
|
|
cmpl cr0,bits,hibit # cr0 for jnextb
|
|
addc bits,bits,bits # CArry for getnextb
|
|
bnelr+ cr0 # return if reload not needed; likely 31/32
|
|
|
|
/* CArry has been set from adding 0x80000000 to itself; preserve for 'adde' */
|
|
# fetch 4 bytes unaligned and LITTLE ENDIAN
|
|
#if 0 /*{ clean; but 4 instr larger, and 3 cycles longer */
|
|
lbz bits,1(src) # lo8
|
|
lbz t0,2(src); rlwimi bits,t0, 8,16,23
|
|
lbz t0,3(src); rlwimi bits,t0,16, 8,15
|
|
lbzu t0,4(src); rlwimi bits,t0,24, 0, 7
|
|
#else /*}{ pray for no unalignment trap or slowdown */
|
|
li bits,1 # compensate for 'lbzu'
|
|
lwbrx bits,bits,src # bits= fetch_le32(bits+src)
|
|
addi src,src,4
|
|
#endif /*}*/
|
|
|
|
cmpl cr0,bits,hibit # cr0 for jnextb
|
|
adde bits,bits,bits # CArry for getnextb; set lo bit from CarryIn
|
|
ret
|
|
|
|
lit_n2b:
|
|
#define tmp len
|
|
lbzu tmp,1(src) # tmp= *++src;
|
|
stbu tmp,1(dst) # *++dst= tmp;
|
|
#undef tmp
|
|
top_n2b:
|
|
jnextb1y lit_n2b
|
|
li off,1 # "the msb"
|
|
offmore_n2b:
|
|
getnextb(off)
|
|
jnextb0n offmore_n2b
|
|
|
|
addic. off,off,-3 # CArry set [and ignored], but no 'addi.'
|
|
li len,0
|
|
blt- offprev_n2b
|
|
lbzu t0,1(src)
|
|
rlwinm off,off,8,0,31-8 # off<<=8;
|
|
nor. disp,off,t0 # disp = -(1+ (off|t0));
|
|
beq- eof_n2b
|
|
|
|
offprev_n2b: # In: 0==len
|
|
getnextb(len); getnextb(len) # two bits; cr0 set on result
|
|
li off,1; bne- gotlen_n2b # raw 1,2,3 ==> 2,3,4
|
|
li off,3 # raw 2.. ==> 5..
|
|
li len,1 # "the msb"
|
|
lenmore_n2b:
|
|
getnextb(len)
|
|
jnextb0n lenmore_n2b
|
|
gotlen_n2b:
|
|
subfic t0,disp,(~0)+(-0xd00) # want CArry only
|
|
adde len,len,off # len += off + (disp < -0xd00);
|
|
|
|
copy:
|
|
#define back off
|
|
add back,disp,dst # point back to match in dst
|
|
mtctr len
|
|
short_n2b:
|
|
#define tmp len
|
|
lbzu tmp,1(back)
|
|
stbu tmp,1(dst)
|
|
#undef tmp
|
|
bdnz+ short_n2b
|
|
|
|
/* This "prefetch for store" is simple, small, and effective. Matches
|
|
usually occur more frequently than once per 128 bytes, but G4 line size
|
|
is only 32 bytes anyway. Assume that an 'unnecessary' dcbtst costs only
|
|
about as much as a hit. The counter register is free at top_n2b, so we could
|
|
pace the dcbtst optimally; but that takes 7 or 8 instructions of space.
|
|
*/
|
|
bot_n2b:
|
|
li back,2*SZ_DLINE
|
|
dcbtst back,dst # 2 lines ahead [-1 for stbu]
|
|
dcbt back,src # jump start auto prefetch at page boundary
|
|
/* Auto prefetch for Read quits at page boundary; needs 2 misses to restart. */
|
|
#undef back
|
|
b top_n2b
|
|
|
|
eof_n2b:
|
|
#define tmp r0 /* hibit is dead */
|
|
lwz tmp,0(ldst) # original dst
|
|
mtlr t3 # return address
|
|
addi dst,dst,1 # uncorrect for 'stbu'
|
|
addi src,src,1 # uncorrect for 'lbzu'
|
|
subf dst,tmp,dst # dst -= tmp; // dst length
|
|
#undef tmp
|
|
subf a0,lsrc,src # src -= eof; // return 0: good; else: bad
|
|
stw dst,0(ldst)
|
|
ret
|
|
|