1
0
mirror of https://github.com/upx/upx synced 2025-09-28 19:06:07 +08:00

conversion of atari/tos to ElfLinker started

This commit is contained in:
László Molnár 2006-07-06 18:30:34 +02:00
parent 9836dd5d10
commit b7bbd81dda
4 changed files with 362 additions and 361 deletions

View File

@ -608,14 +608,21 @@ i386-win32.pe.h : $(srcdir)/src/$$T.asm
m68k-atari.tos-%.h : tc_list = m68k-atari.tos default
tc.m68k-atari.tos.app-a68k = perl -w $(srcdir)/src/arch/m68k/app-a68k.pl
tc.m68k-atari.tos.asm-a68k = a68k
#tc.m68k-atari.tos.asm-a68k = a68k
tc.m68k-atari.tos.pp-asm = gcc -E -nostdinc -x assembler-with-cpp -Wall
tc.m68k-atari.tos.asm-a68k = m68k-unknown-linux-gnu-as --register-prefix-optional
m68k-atari.tos-nrv%.h : $(srcdir)/src/m68k-atari.tos.asm
# call gpp_inc to generate .d file
$(call tc,gpp_inc) --mode=c --MMD=$@ --MF=tmp/$T.i.d $< -o /dev/null
$(call tc,pp-asm) -D__A68K__ $(PP_FLAGS) $< -o tmp/$T.i
$(call tc,asm-a68k) -q -ltmp/$T.o.lst tmp/$T.i -otmp/$T.o
$(call tc,o2bin) tmp/$T.o tmp/$T.bin 'UPX1' 'UPX9'
$(call tc,pp-asm) -D__GAS__ $(PP_FLAGS) $< -o tmp/$T.i
## $(call tc,asm-a68k) -q -ltmp/$T.o.lst tmp/$T.i -otmp/$T.o
$(call tc,asm-a68k) tmp/$T.i -o tmp/$T.bin
## $(call tc,o2bin) tmp/$T.o tmp/$T.bin 'UPX1' 'UPX9'
$(call tc,m-objcopy) --strip-unneeded tmp/$T.bin
$(call tc,m-objcopy) -R .text -R .data -R .bss tmp/$T.bin
$(call tc,m-objcopy) -R .note -R .comment tmp/$T.bin
$(call tc,m-objdump) -trwh tmp/$T.bin >> tmp/$T.bin
$(call tc,bin2h) --ident=$(IDENT_PREFIX)loader$(IDENT_SUFFIX) tmp/$T.bin $@
m68k-atari.tos-nrv2b% : PP_FLAGS = -DNRV2B

View File

@ -1,3 +1,4 @@
/*
; bits.ash -- bit access for decompression
;
; This file is part of the UCL data compression library.
@ -24,15 +25,15 @@
; <markus@oberhumer.com>
; http://www.oberhumer.com/opensource/ucl/
;
*/
; ------------- ADDBITS -------------
// ------------- ADDBITS -------------
macro(ADDBITS)
#if (NRV_BB == 8)
add.b d0,d0 ; sets Z, C and X ; 4
add.b d0,d0 // sets Z, C and X // 4
#elif (NRV_BB == 32)
add.l d0,d0 ; sets Z, C and X ; 6
add.l d0,d0 // sets Z, C and X // 6
#endif
endm
@ -40,95 +41,95 @@ macro(ADDBITS)
#if 0
macro(ADDXBITS)
#if (NRV_BB == 8)
addx.b d0,d0 ; sets C and X ; 4
addx.b d0,d0 // sets C and X // 4
#elif (NRV_BB == 32)
addx.l d0,d0 ; sets C and X ; 8
addx.l d0,d0 // sets C and X // 8
#endif
endm
#endif
; ------------- FILLBYTES_xx -------------
// ------------- FILLBYTES_xx -------------
; get 1 byte; then get 1 bit into both C and X
// get 1 byte// then get 1 bit into both C and X
macro(FILLBYTES_8)
; note: we shift the X flag through -> must init d0.b with $80
move.b (a0)+,d0 ; 8
addx.b d0,d0 ; sets C and X ; 4
// note: we shift the X flag through -> must init d0.b with $80
move.b (a0)+,d0 // 8
addx.b d0,d0 // sets C and X // 4
endm
; get 32 bits in little endian format; then get 1 bit into both C and X
// get 32 bits in little endian format// then get 1 bit into both C and X
macro(FILLBYTES_LE32)
#if 0
move.b (a0)+,d0 ; 8
ror.l #8,d0 ; 24
move.b (a0)+,d0 ; 8
ror.l #8,d0 ; 24
move.b (a0)+,d0 ; 8
ror.l #8,d0 ; 24
move.b (a0)+,d0 ; 8
ror.l #8,d0 ; 24
add.l d0,d0 ; sets C and X ; 6
bset #0,d0 ; only changes Z ; 12
; -----
; 146
move.b (a0)+,d0 // 8
ror.l #8,d0 // 24
move.b (a0)+,d0 // 8
ror.l #8,d0 // 24
move.b (a0)+,d0 // 8
ror.l #8,d0 // 24
move.b (a0)+,d0 // 8
ror.l #8,d0 // 24
add.l d0,d0 // sets C and X // 6
bset #0,d0 // only changes Z // 12
// -----
// 146
#elif 1
move.b 3(a0),d0 ; 12
lsl.w #8,d0 ; 22
move.b 2(a0),d0 ; 12
swap d0 ; 4
move.b 1(a0),d0 ; 12
lsl.w #8,d0 ; 22
move.b (a0),d0 ; 8
addq.l #4,a0 ; does not affect flags ; 8
add.l d0,d0 ; sets C and X ; 6
bset #0,d0 ; only changes Z ; 12
; -----
; 118
move.b 3(a0),d0 // 12
lsl.w #8,d0 // 22
move.b 2(a0),d0 // 12
swap d0 // 4
move.b 1(a0),d0 // 12
lsl.w #8,d0 // 22
move.b (a0),d0 // 8
addq.l #4,a0 // does not affect flags // 8
add.l d0,d0 // sets C and X // 6
bset #0,d0 // only changes Z // 12
// -----
// 118
#elif 1
; note: we shift the X flag through -> must init d0.l with $80000000
; note: rol/ror do not change X flag (but asl/asr/lsl/lsr do)
move.b 3(a0),d0 ; 12
ror.w #8,d0 ; 22
move.b 2(a0),d0 ; 12
swap d0 ; 4
move.b 1(a0),d0 ; 12
ror.w #8,d0 ; 22
move.b (a0),d0 ; 8
addq.l #4,a0 ; does not affect flags ; 8
addx.l d0,d0 ; sets C and X ; 8
; -----
; 108
// note: we shift the X flag through -> must init d0.l with $80000000
// note: rol/ror do not change X flag (but asl/asr/lsl/lsr do)
move.b 3(a0),d0 // 12
ror.w #8,d0 // 22
move.b 2(a0),d0 // 12
swap d0 // 4
move.b 1(a0),d0 // 12
ror.w #8,d0 // 22
move.b (a0),d0 // 8
addq.l #4,a0 // does not affect flags // 8
addx.l d0,d0 // sets C and X // 8
// -----
// 108
#else
; IMPORTANT: movep is not implemented on the 68060
// IMPORTANT: movep is not implemented on the 68060
# error "do not use movep"
; note: we shift the X flag through -> must init d0.l with $80000000
; note: must use dc.l because of a bug in the pasm assembler
; note: may access past the end of the input; this is ok for UPX
dc.l $01080003 ; movep.w 3(a0),d0 ; 16
move.b 2(a0),d0 ; 12
swap d0 ; 4
dc.l $01080001 ; movep.w 1(a0),d0 ; 16
move.b (a0),d0 ; 8
addq.l #4,a0 ; does not affect flags ; 8
addx.l d0,d0 ; sets C and X ; 8
; -----
; 72
// note: we shift the X flag through -> must init d0.l with $80000000
// note: must use dc.l because of a bug in the pasm assembler
// note: may access past the end of the input// this is ok for UPX
dc.l $01080003 // movep.w 3(a0),d0 // 16
move.b 2(a0),d0 // 12
swap d0 // 4
dc.l $01080001 // movep.w 1(a0),d0 // 16
move.b (a0),d0 // 8
addq.l #4,a0 // does not affect flags // 8
addx.l d0,d0 // sets C and X // 8
// -----
// 72
#endif
endm
; ------------- FILLBITS -------------
// ------------- FILLBITS -------------
macro(FILLBITS)
#if (NRV_BB == 8)
; no need for a subroutine
// no need for a subroutine
FILLBYTES_8
#elif (NRV_BB == 32)
# ifdef SMALL
# define FILLBYTES_SR FILLBYTES_LE32
bsr fillbytes_sr ; 18
bsr fillbytes_sr // 18
# else
FILLBYTES_LE32
# endif
@ -136,24 +137,24 @@ macro(FILLBITS)
endm
; ------------- GETBIT -------------
// ------------- GETBIT -------------
; get one bit into both the Carry and eXtended flag
// get one bit into both the Carry and eXtended flag
macro(GETBIT)
#if defined(__A68K__)
ADDBITS ; 4 / 6
bne \@ ; 10 (if jump)
ADDBITS // 4 / 6
bne \@ // 10 (if jump)
FILLBITS
\@:
#elif defined(__ASL__)
ADDBITS ; 4 / 6
bne done ; 10 (if jump)
ADDBITS // 4 / 6
bne done // 10 (if jump)
FILLBITS
done:
#else
LOCAL done
ADDBITS ; 4 / 6
bne done ; 10 (if jump)
ADDBITS // 4 / 6
bne done // 10 (if jump)
FILLBITS
done:
#endif
@ -161,5 +162,5 @@ done:
; vi:ts=8:et
// vi:ts=8:et

View File

@ -1,3 +1,4 @@
/*
; n2e_d.ash -- NRV2E decompression in 68000 assembly
;
; This file is part of the UCL data compression library.
@ -56,27 +57,27 @@
; we have max_match = 65535, so we can use word arithmetics on d2
; we have max_offset < 2**23, so we can use partial word arithmetics on d1
;
*/
; ------------- constants & macros -------------
// ------------- constants & macros -------------
#if !defined(NRV_NO_INIT)
;;move.l #-$500,d6 ; 0xfffffb00
moveq.l #-$50,d6 ; 0xffffffb0
lsl.w #4,d6 ; << 4
////move.l #-0x500,d6 // 0xfffffb00
moveq.l #-0x50,d6 // 0xffffffb0
lsl.w #4,d6 // << 4
moveq.l #0,d7
moveq.l #-1,d5 ; last_off = -1
moveq.l #-1,d5 // last_off = -1
; init d0 with high bit set
// init d0 with high bit set
#if (NRV_BB == 8)
;;move.b #$80,d0 ; init d0.b for FILLBYTES
moveq.l #-128,d0 ; d0.b = $80
////move.b #0x80,d0 // init d0.b for FILLBYTES
moveq.l #-128,d0 // d0.b = 0x80
#elif (NRV_BB == 32)
;;move.l #$80000000,d0 ; init d0.l for FILLBYTES
////move.l #0x80000000,d0 // init d0.l for FILLBYTES
moveq.l #1,d0
ror.l #1,d0 ; d0.l = $80000000
ror.l #1,d0 // d0.l = 0x80000000
#endif
bra decompr_start
@ -88,12 +89,12 @@
#if defined(FILLBYTES_SR)
fillbytes_sr: FILLBYTES_SR
rts ; 16
rts // 16
#endif
; ------------- DECOMPRESSION -------------
// ------------- DECOMPRESSION -------------
decompr_literal:
@ -102,16 +103,16 @@ decompr_literal:
decompr_start:
decompr_loop:
#ifdef SMALL
; cost literal: 4 + 10 + 10
; cost match: 4 + 10 + 8
; cost fillbits: 4 + 8
// cost literal: 4 + 10 + 10
// cost match: 4 + 10 + 8
// cost fillbits: 4 + 8
GETBIT
bcs decompr_literal
#else
; optimization: carry is clear -> we know that bits are available
; cost literal: 4 + 8 + 10
; cost match: 4 + 10
; cost fillbits: 4 + 8 + 8
// optimization: carry is clear -> we know that bits are available
// cost literal: 4 + 8 + 10
// cost match: 4 + 10
// cost fillbits: 4 + 8 + 8
ADDBITS
bcc decompr_match
bne decompr_literal
@ -128,16 +129,16 @@ decompr_l1:
GETBIT
addx.w d1,d1
#ifdef SMALL
; cost loop continue: 4 + 10 + 8
; cost loop break: 4 + 10 + 10
; cost fillbits: 4 + 8
// cost loop continue: 4 + 10 + 8
// cost loop break: 4 + 10 + 10
// cost fillbits: 4 + 8
GETBIT
bcs decompr_break1
#else
; optimization: carry is clear -> we know that bits are available
; cost loop continue: 4 + 10
; cost loop break: 4 + 8 + 10
; cost fillbits: 4 + 8 + 8
// optimization: carry is clear -> we know that bits are available
// cost loop continue: 4 + 10
// cost loop break: 4 + 8 + 10
// cost fillbits: 4 + 8 + 8
ADDBITS
bcc L(continue)
bne decompr_break1
@ -152,7 +153,7 @@ L(continue):
bra decompr_end
decompr_break1:
subq.w #3,d1
bcs decompr_prev_dist ; last m_off
bcs decompr_prev_dist // last m_off
lsl.l #8,d1
move.b (a0)+,d1
not.l d1
@ -175,16 +176,16 @@ decompr_get_mlen2:
decompr_l2: GETBIT
addx.w d2,d2
#ifdef SMALL
; cost loop continue: 4 + 10 + 10
; cost loop break: 4 + 10 + 8
; cost fillbits: 4 + 8
// cost loop continue: 4 + 10 + 10
// cost loop break: 4 + 10 + 8
// cost fillbits: 4 + 8
GETBIT
bcc decompr_l2
#else
; optimization: carry is clear -> we know that bits are available
; cost loop continue: 4 + 10
; cost loop break: 4 + 8 + 10
; cost fillbits: 4 + 8 + 8
// optimization: carry is clear -> we know that bits are available
// cost loop continue: 4 + 10
// cost loop break: 4 + 8 + 10
// cost fillbits: 4 + 8 + 8
ADDBITS
bcc decompr_l2
bne L(break)
@ -200,29 +201,29 @@ decompr_got_mlen:
move.l d1,d5
lea 0(a1,d1.l),a3
; must use sub as cmp doesn't affect the X flag
// must use sub as cmp doesn't affect the X flag
sub.l d6,d1
addx.w d7,d2
; TODO: partly unroll this loop; could use some magic with d7 for address
; computations, then compute a nice `jmp yyy(pc,dx.w)'
// TODO: partly unroll this loop// could use some magic with d7 for address
// computations, then compute a nice `jmp yyy(pc,dx.w)'
#if 1
; cost for any m_len: 12 + 22 * (m_len - 1) + 4
; 38, 60, 82, 104, 126, 148, 170, 192, 214, 236
move.b (a3)+,(a1)+ ; 12
L(copy): move.b (a3)+,(a1)+ ; 12
dbra d2,L(copy) ; 10 / 14
// cost for any m_len: 12 + 22 * (m_len - 1) + 4
// 38, 60, 82, 104, 126, 148, 170, 192, 214, 236
move.b (a3)+,(a1)+ // 12
L(copy): move.b (a3)+,(a1)+ // 12
dbra d2,L(copy) // 10 / 14
#else
; cost for even m_len: 18 + 34 * (m_len / 2) + 4
; cost for odd m_len: 28 + 34 * (m_len / 2) + 4
; 56, 66, 90, 100, 124, 134, 158, 168, 192, 202
lsr.w #1,d2 ; 8
bcc L(copy) ; 10 / 8
move.b (a3)+,(a1)+ ; 12
L(copy): move.b (a3)+,(a1)+ ; 12
move.b (a3)+,(a1)+ ; 12
dbra d2,L(copy) ; 10 / 14
// cost for even m_len: 18 + 34 * (m_len / 2) + 4
// cost for odd m_len: 28 + 34 * (m_len / 2) + 4
// 56, 66, 90, 100, 124, 134, 158, 168, 192, 202
lsr.w #1,d2 // 8
bcc L(copy) // 10 / 8
move.b (a3)+,(a1)+ // 12
L(copy): move.b (a3)+,(a1)+ // 12
move.b (a3)+,(a1)+ // 12
dbra d2,L(copy) // 10 / 14
#endif
bra decompr_loop
@ -232,5 +233,5 @@ L(copy): move.b (a3)+,(a1)+ ; 12
decompr_end:
; vi:ts=8:et
// vi:ts=8:et

View File

@ -1,3 +1,4 @@
/*
; l_tos.s -- loader & decompressor for the atari/tos format
;
; This file is part of the UPX executable compressor.
@ -24,12 +25,12 @@
; Markus F.X.J. Oberhumer Laszlo Molnar
; <mfx@users.sourceforge.net> <ml1050@users.sourceforge.net>
;
*/
#define NRV_BB 8
#include "../../version.h"
/*
;
; see also:
; freemint/sys/mint/basepage.h
@ -44,30 +45,16 @@
; by a simple perl script. We also maintain compatiblity with the pasm
; assembler (which must be started in the emulator window).
;
*/
#define L(label) .L##label
#define macro(name) .macro name
#define endm .endm
#define section .section
#if defined(__A68K__)
# define align4 align 0,4
# define L(label) \/**/label
# define macro(name) name macro
# define text section code
#elif defined(__ASL__)
# define align4 align 4
# define L(label) $$/**/label
# define macro(name) name macro
# define text section code
#else
# define align4 align 4
# define L(label) ./**/label
# define macro(name) macro name
#endif
; defines needed for including ident_[ns].ash
#define db dc.b
#define dw dc.w
#define dd dc.l
.altmacro
/*
; basepage offsets
p_lowtpa equ $0 ; .l pointer to self (bottom of TPA)
p_hitpa equ $4 ; .l pointer to top of TPA + 1
@ -81,7 +68,11 @@ p_dta equ $20 ; .l pointer to current DTA
p_parent equ $24 ; .l pointer to parent's basepage
p_flags equ $28 ; .l memory usage flags
p_env equ $2c ; .l pointer to environment string
*/
p_tbase = 8
/*
;
; long living registers:
; d4 p_tbase - start of text segment
@ -91,12 +82,13 @@ p_env equ $2c ; .l pointer to environment string
; - start of dirty bss
; ASTACK (a7) - final startup code copied below stack
;
*/
/*************************************************************************
// flush cache macros
**************************************************************************/
; /*************************************************************************
; // flush cache macros
; **************************************************************************/
/*
; note:
; GEMDOS/XBIOS trashes d0, d1, d2, a0, a1, a2
@ -108,71 +100,72 @@ p_env equ $2c ; .l pointer to environment string
;
; Note that on a 68060 FreeMiNT just uses `cpusha bc' in all cases,
; so we don't bother passing base and length. (info: base would be d4)
*/
macro(MINT_FLUSH_CACHE)
pea -1 ; length
clr.l -(sp) ; base
pea -1 // length
clr.l -(sp) // base
#if 0
move.w #$0016,-(sp) ; S_FLUSHCACHE (22)
move.w #$0154,-(sp) ; Ssystem (340)
move.w #0x016,-(sp) // S_FLUSHCACHE (22)
move.w #0x154,-(sp) // Ssystem (340)
#else
move.l #$01540016,-(sp)
move.l #0x01540016,-(sp)
#endif
trap #1 ; GEMDOS
trap #1 // GEMDOS
lea 12(sp),sp
endm
; First try `cpusha bc' (68040/68060). If that fails try temporary changing
; the cache control register (68030).
// First try `cpusha bc' (68040/68060). If that fails try temporary changing
// the cache control register (68030).
macro(SUPEXEC_FLUSH_CACHE)
pea \@super(pc)
move.w #$0026,-(sp) ; Supexec (38)
trap #14 ; XBIOS
pea super(pc)
move.w #0x0026,-(sp) // Supexec (38)
trap #14 // XBIOS
addq.l #6,sp
bra \@done
bra done
; exception handler
\@exception: move.l a1,sp ; restore stack (SSP)
jmp (a0) ; and continue
// exception handler
exception: move.l a1,sp // restore stack (SSP)
jmp (a0) // and continue
\@super: move.l ($10),-(sp)
move.l ($2c),-(sp)
move.l ($f4),-(sp)
move.l sp,a1 ; save stack pointer (SSP)
super: move.l (0x10),-(sp)
move.l (0x2c),-(sp)
move.l (0xf4),-(sp)
move.l sp,a1 // save stack pointer (SSP)
; set exception vectors
lea \@exception(pc),a0
move.l a0,($10)
move.l a0,($2c)
move.l a0,($f4)
nop ; flush write pipeline
// set exception vectors
lea exception(pc),a0
move.l a0,(0x10)
move.l a0,(0x2c)
move.l a0,(0xf4)
nop // flush write pipeline
; try 68040 / 68060
lea \@1(pc),a0
dc.w $f4f8 ; cpusha bc
bra \@ret
\@1:
; try 68030
lea \@2(pc),a0
dc.l $4e7a0002 ; movec.l cacr,d0
// try 68040 / 68060
lea 1(pc),a0
dc.w 0xf4f8 // cpusha bc
bra ret
1:
// try 68030
lea 2(pc),a0
movec.l cacr,d0
move.l d0,d1
or.w #$0808,d1
dc.l $4e7b1002 ; movec.l d1,cacr
dc.l $4e7b0002 ; movec.l d0,cacr
;;; bra \@ret
\@2:
or.w #0x0808,d1
movec.l d1,cacr
movec.l d0,cacr
//;; bra \@ret
2:
\@ret: move.l (sp)+,($f4)
move.l (sp)+,($2c)
move.l (sp)+,($10)
nop ; flush write pipeline
ret: move.l (sp)+,(0xf4)
move.l (sp)+,(0x2c)
move.l (sp)+,(0x10)
nop // flush write pipeline
rts
\@done:
done:
endm
@ -180,9 +173,9 @@ macro(SUPEXEC_FLUSH_CACHE)
macro(BOTH_FLUSH_CACHE)
MINT_FLUSH_CACHE
tst.l d0
beq \@done
beq done2
SUPEXEC_FLUSH_CACHE
\@done:
done2:
endm
@ -198,70 +191,76 @@ macro(BOTH_FLUSH_CACHE)
#endif
; /*************************************************************************
; // entry - the text segment of a compressed executable
; //
; // note: compressed programs never have the F_SHTEXT flag set,
; // so we can assume that the text, data & bss segments
; // are contiguous in memory
; **************************************************************************/
/*************************************************************************
// entry - the text segment of a compressed executable
//
// note: compressed programs never have the F_SHTEXT flag set,
// so we can assume that the text, data & bss segments
// are contiguous in memory
**************************************************************************/
#if defined(__ASL__)
padding off
#endif
text
dc.b 'UPX1' ; marker for o2bin.pl
section tos0
//text
//dc.b 'UPX1' // marker for o2bin.pl
start:
move.l a0,d0 ; a0 is basepage if accessory
move.l a0,d0 // a0 is basepage if accessory
beq L(l_app)
move.l 4(a0),sp ; accessory - get stack
move.l 4(a0),sp // accessory - get stack
bra L(start)
L(l_app): move.l 4(sp),d0 ; application - get basepage
L(l_app): move.l 4(sp),d0 // application - get basepage
L(start): movem.l d1-d7/a0-a6,-(sp)
; ------------- restore original basepage
// ------------- restore original basepage
; we also setup d4 and a6 here, and we prepare a4
// we also setup d4 and a6 here, and we prepare a4
move.l d0,a2 ; a2 = basepage
move.l d0,a2 // a2 = basepage
addq.l #p_tbase,a2
move.l (a2)+,a6
move.l a6,d4 ; d4 = p_tbase
move.l #'up11',(a2) ; p_tlen
move.l a6,d4 // d4 = p_tbase
move.l up11,(a2) // p_tlen
add.l (a2)+,a6
move.l a6,(a2)+ ; p_dbase
move.l #'up12',(a2) ; p_dlen
add.l (a2)+,a6 ; a6 = decompressed p_bbase
move.l (a2),a4 ; a4 = compressed p_bbase
move.l a6,(a2)+ ; p_bbase
move.l #'up13',(a2) ; p_blen
move.l a6,(a2)+ // p_dbase
move.l up12,(a2) // p_dlen
add.l (a2)+,a6 // a6 = decompressed p_bbase
move.l (a2),a4 // a4 = compressed p_bbase
move.l a6,(a2)+ // p_bbase
move.l up13,(a2) // p_blen
; ------------- copy data segment (from a4 to a3, downwards)
// ------------- copy data segment (from a4 to a3, downwards)
; a4 (top of compressed data) already initialized above
// a4 (top of compressed data) already initialized above
move.l d4,a3
add.l #'up21',a3 ; top of data segment + offset
add.l up21,a3 // top of data segment + offset
#if defined(SMALL)
move.l #'up22',d0 ; (len / 4)
move.l up22,d0 // (len / 4)
; copy 4 bytes per loop
// copy 4 bytes per loop
L(loop): move.l -(a4),-(a3)
;;subq.l #1,d0
dc.b 'u1' ; subq.l #1,d0 / subq.w #1,d0
section subql_1d0
subq.l #1,d0
section subqw_1d0
subq.w #1,d0
section s_bneloop0
bne L(loop)
#else
move.l #'up22',d0 ; (len / 160)
move.l up22,d0 // (len / 160)
; loop1 - use 10 registers to copy 4*10*4 = 160 bytes per loop
// loop1 - use 10 registers to copy 4*10*4 = 160 bytes per loop
L(loop1):
lea.l -160(a4),a4
movem.l 120(a4),d1-d3/d5-d7/a0-a2/a5
@ -272,125 +271,139 @@ L(loop1):
movem.l d1-d3/d5-d7/a0-a2/a5,-(a3)
movem.l (a4),d1-d3/d5-d7/a0-a2/a5
movem.l d1-d3/d5-d7/a0-a2/a5,-(a3)
;;subq.l #1,d0
dc.b 'u1' ; subq.l #1,d0 / subq.w #1,d0
section subql_1d0
subq.l #1,d0
section subqw_1d0
subq.w #1,d0
section s_bneloop0
bne L(loop1)
; loop2 - copy the remaining 4..160 bytes
;;moveq.l #xx,d0 ; ((len % 160) / 4) - 1
dc.b 'u2' ; moveq.l #xx,d0
// loop2 - copy the remaining 4..160 bytes
//;moveq.l #xx,d0 ; ((len % 160) / 4) - 1
#if 0
dc.b 'u2' // moveq.l #xx,d0
#else
moveq.l #copy_remain,d0
#endif
L(loop2): move.l -(a4),-(a3)
dbra d0,L(loop2)
#endif
; a3 now points to the start of the compressed block
// a3 now points to the start of the compressed block
; ------------- copy code to stack and setup ASTACK
// ------------- copy code to stack and setup ASTACK
; Copy the final startup code below the stack. This will get
; called via "jmp (ASTACK)" after decompression and relocation.
// Copy the final startup code below the stack. This will get
// called via "jmp (ASTACK)" after decompression and relocation.
copy_to_stack:
lea.l clear_bss_end(pc),a2
move.l d4,-(ASTACK) ; entry point for final jmp
move.l d4,-(ASTACK) // entry point for final jmp
moveq.l #((clear_bss_end-clear_bss)/2-1),d5
L(loop): move.w -(a2),-(ASTACK)
// moveq.l #((clear_bss_end-clear_bss)/2-1),d5
moveq.l #copy_to_stack_len,d5
L(loop6): move.w -(a2),-(ASTACK)
subq.l #1,d5
bcc L(loop)
bcc L(loop6)
#ifdef FLUSH_CACHE
; patch code: on the stack, the `rts' becomes a `nop'
move.w #$4e71,flush_cache_rts-clear_bss(ASTACK)
// patch code: on the stack, the `rts' becomes a `nop'
move.w #0x4e71,flush_cache_rts-clear_bss(ASTACK)
#endif
; note: d5.l is now -1 (needed for decompressor)
// note: d5.l is now -1 (needed for decompressor)
; -------------
// -------------
#ifdef FLUSH_CACHE
bsr flush_cache
#endif
; ------------- prepare decompressor
// ------------- prepare decompressor
; a3 still points to the start of the compressed block
move.l d4,a4 ; dest. for decompressing
// a3 still points to the start of the compressed block
move.l d4,a4 // dest. for decompressing
#define NRV_NO_INIT
;;moveq.l #-1,d5 ; last_off = -1
moveq.l #-128,d0 ; d0.b = $80
//;moveq.l #-1,d5 ; last_off = -1
moveq.l #-128,d0 // d0.b = $80
#if defined(NRV2B)
moveq.l #-1,d7
moveq.l #-$68,d6 ; 0xffffff98
lsl.w #5,d6 ; 0xfffff300 == -0xd00
moveq.l #-0x68,d6 // 0xffffff98
lsl.w #5,d6 // 0xfffff300 == -0xd00
#elif defined(NRV2D)
moveq.l #-1,d7
moveq.l #-$50,d6 ; 0xffffffb0
lsl.w #4,d6 ; 0xfffffb00 == -0x500
moveq.l #-0x50,d6 // 0xffffffb0
lsl.w #4,d6 // 0xfffffb00 == -0x500
#elif defined(NRV2E)
moveq.l #0,d7
moveq.l #-$50,d6 ; 0xffffffb0
lsl.w #4,d6 ; 0xfffffb00 == -0x500
moveq.l #-0x50,d6 // 0xffffffb0
lsl.w #4,d6 // 0xfffffb00 == -0x500
#else
# error
#endif
; ------------- jump to copied decompressor
// ------------- jump to copied decompressor
move.l d4,a2
add.l #'up31',a2
jmp (a2) ; jmp decompr_start
add.l #up31,a2
jmp (a2) // jmp decompr_start
; /*************************************************************************
; // this is the final part of the startup code which runs in the stack
; **************************************************************************/
/*************************************************************************
// this is the final part of the startup code which runs in the stack
**************************************************************************/
; ------------- clear dirty bss
// ------------- clear dirty bss
clear_bss:
; on entry:
; ASTACK == pc == clear_bss (on stack)
; a6 start of dirty bss [long living register]
; d6.l number of clr loops
; d3.l 0
// on entry:
// ASTACK == pc == clear_bss (on stack)
// a6 start of dirty bss [long living register]
// d6.l number of clr loops
// d3.l 0
#if defined(SMALL)
L(loop): move.l d3,(a6)+
;;subq.l #1,d6
dc.b 'u4' ; subq.l #1,d6 / subq.w #1,d6
bne L(loop)
L(loop3): move.l d3,(a6)+
section subql_1d6
subq.l #1,d6
section subqw_1d6
subq.w #1,d6
section s_bneloop3
bne L(loop3)
#else
; the dirty bss is usually not too large, so we don't
; bother making movem optimizations here
L(loop): move.l d3,(a6)+
// the dirty bss is usually not too large, so we don't
// bother making movem optimizations here
L(loop3): move.l d3,(a6)+
move.l d3,(a6)+
move.l d3,(a6)+
move.l d3,(a6)+
;;subq.l #1,d6
dc.b 'u4' ; subq.l #1,d6 / subq.w #1,d6
bne L(loop)
section subql_1d6
subq.l #1,d6
section subqw_1d6
subq.w #1,d6
section s_bneloop3
bne L(loop3)
#endif
; ------------- flush the cache
// ------------- flush the cache
#ifdef FLUSH_CACHE
; info:
; This is also called as a subroutine (before decompression, NOT running
; in the stack). When running in the stack the `rts' is replaced by a `nop'.
// info:
// This is also called as a subroutine (before decompression, NOT running
// in the stack). When running in the stack the `rts' is replaced by a `nop'.
flush_cache:
FLUSH_CACHE
@ -400,23 +413,23 @@ flush_cache_rts:
#endif
; ------------- restore ASTACK
// ------------- restore ASTACK
lea clear_bss_end-clear_bss+4(ASTACK),sp
lea clear_bss_size+4(ASTACK),sp
;; assert sp == clear_bss_end(pc)+4
//; assert sp == clear_bss_end(pc)+4
; ------------- clear the dirty stack
// ------------- clear the dirty stack
#if 0
; better don't do this - we are currently running in the stack
; and don't want to make yet another instruction-cache-line dirty
// better don't do this - we are currently running in the stack
// and don't want to make yet another instruction-cache-line dirty
clear_dirty_stack:
; clear down to clear_bss(pc) + 32 extra longs
// clear down to clear_bss(pc) + 32 extra longs
moveq.l #((L(loop)-clear_bss+3)/4+32-1),d0
lea L(loop)(pc),a0
L(loop): move.l d3,-(a0)
@ -425,48 +438,46 @@ L(loop): move.l d3,-(a0)
#endif
; ------------- start program
// ------------- start program
movem.l (sp)+,d1-d7/a0-a6
move.l a0,d0
beq L(l_app)
sub.l sp,sp ; accessory: no stack
L(l_app): dc.w $4ef9 ; jmp $xxxxxxxx - jmp to text segment
beq L(l_app1)
sub.l sp,sp // accessory: no stack
L(l_app1): dc.w 0x4ef9 // jmp $xxxxxxxx - jmp to text segment
clear_bss_end:
; /*************************************************************************
; // UPX ident & packheader
; **************************************************************************/
/*************************************************************************
// UPX ident & packheader
**************************************************************************/
#if 0
#if defined(SMALL)
# include "include/ident_s.ash"
#else
# include "include/ident_n.ash"
#endif
#endif
align4
// align4
; 32 bytes - #include "header.ash"
dc.b 85,80,88,33 ; UPX_MAGIC_LE32
dc.b 161,216,208,213 ; UPX_MAGIC2_LE32
dc.l 0,0,0,0,0
dc.b 0,0,0,45
#include "include/header2.ash"
; end of text segment - size is a multiple of 4
// end of text segment - size is a multiple of 4
; /*************************************************************************
; // This part is appended after the compressed data.
; // It runs in the last part of the dirty bss (after the
; // relocations and the original fileheader).
; **************************************************************************/
/*************************************************************************
// This part is appended after the compressed data.
// It runs in the last part of the dirty bss (after the
// relocations and the original fileheader).
**************************************************************************/
cutpoint:
; ------------- decompress (from a3 to a4)
// ------------- decompress (from a3 to a4)
#define a0 A3
#define a1 A4
@ -474,9 +485,9 @@ cutpoint:
#define d2 D3
#if defined(NRV2B)
# include "arch/m68k/nrv2b_d.ash"
//# include "arch/m68k/nrv2b_d.ash"
#elif defined(NRV2D)
# include "arch/m68k/nrv2d_d.ash"
//# include "arch/m68k/nrv2d_d.ash"
#elif defined(NRV2E)
# include "arch/m68k/nrv2e_d.ash"
#else
@ -488,71 +499,52 @@ cutpoint:
#undef a3
#undef d2
; note: d3.l is 0 from decompressor above
// note: d3.l is 0 from decompressor above
; ------------- prepare d6 for clearing the dirty bss
// ------------- prepare d6 for clearing the dirty bss
#if defined(SMALL)
move.l #'up41',d6 ; dirty_bss / 4
move.l #up41,d6 // dirty_bss / 4
#else
move.l #'up41',d6 ; dirty_bss / 16
move.l #up41,d6 // dirty_bss / 16
#endif
; ------------- test if we need to reloc
section reloc
dc.b 'u3' ; moveq.l #1,d5 / jmp (ASTACK)
moveq.l #1,d5
// The decompressed relocations now are just after the decompressed
// data segment, i.e. at the beginning of the (dirty) bss.
; ------------- reloc
// note: d3.l is still 0
reloc:
; The decompressed relocations now are just after the decompressed
; data segment, i.e. at the beginning of the (dirty) bss.
; note: d3.l is still 0
move.l a6,a0 ; a0 = start of relocations
move.l a6,a0 // a0 = start of relocations
move.l d4,a1
add.l (a0)+,a1 ; get initial fixup
add.l (a0)+,a1 // get initial fixup
L(loop1): add.l d3,a1 ; increase fixup
add.l d4,(a1) ; reloc one address
L(loop2): move.b (a0)+,d3
L(loopx1): add.l d3,a1 // increase fixup
add.l d4,(a1) // reloc one address
L(loopx2): move.b (a0)+,d3
beq reloc_end
cmp.b d5,d3 ; note: d5.b is #1 from above
bne L(loop1)
lea 254(a1),a1 ; d3 == 1 -> add 254, don't reloc
bra L(loop2)
cmp.b d5,d3 // note: d5.b is #1 from above
bne L(loopx1)
lea 254(a1),a1 // d3 == 1 -> add 254, don't reloc
bra L(loopx2)
reloc_end:
; ------------- clear dirty bss & start program
// ------------- clear dirty bss & start program
; We are currently running in the dirty bss.
; Jump to the code we copied below the stack.
// We are currently running in the dirty bss.
// Jump to the code we copied below the stack.
; note: d3.l is still 0
// note: d3.l is still 0
jmp (ASTACK) ; jmp clear_bss (on stack)
jmp (ASTACK) // jmp clear_bss (on stack)
eof:
dc.w cutpoint-start ; size of entry
dc.w eof-cutpoint ; size of decompressor
dc.w decompr_start-cutpoint ; offset of decompressor start
dc.b 'UPX9' ; marker for o2bin.pl
#if defined(__ASL__)
endsection code
#endif
end
; vi:ts=8:et:nowrap
// vi:ts=8:et:nowrap