1
0
mirror of https://github.com/upx/upx synced 2025-09-28 19:06:07 +08:00

conversion of atari/tos to ElfLinker started

This commit is contained in:
László Molnár 2006-07-06 18:30:34 +02:00
parent 9836dd5d10
commit b7bbd81dda
4 changed files with 362 additions and 361 deletions

View File

@ -608,14 +608,21 @@ i386-win32.pe.h : $(srcdir)/src/$$T.asm
m68k-atari.tos-%.h : tc_list = m68k-atari.tos default m68k-atari.tos-%.h : tc_list = m68k-atari.tos default
tc.m68k-atari.tos.app-a68k = perl -w $(srcdir)/src/arch/m68k/app-a68k.pl tc.m68k-atari.tos.app-a68k = perl -w $(srcdir)/src/arch/m68k/app-a68k.pl
tc.m68k-atari.tos.asm-a68k = a68k #tc.m68k-atari.tos.asm-a68k = a68k
tc.m68k-atari.tos.pp-asm = gcc -E -nostdinc -x assembler-with-cpp -Wall
tc.m68k-atari.tos.asm-a68k = m68k-unknown-linux-gnu-as --register-prefix-optional
m68k-atari.tos-nrv%.h : $(srcdir)/src/m68k-atari.tos.asm m68k-atari.tos-nrv%.h : $(srcdir)/src/m68k-atari.tos.asm
# call gpp_inc to generate .d file # call gpp_inc to generate .d file
$(call tc,gpp_inc) --mode=c --MMD=$@ --MF=tmp/$T.i.d $< -o /dev/null $(call tc,gpp_inc) --mode=c --MMD=$@ --MF=tmp/$T.i.d $< -o /dev/null
$(call tc,pp-asm) -D__A68K__ $(PP_FLAGS) $< -o tmp/$T.i $(call tc,pp-asm) -D__GAS__ $(PP_FLAGS) $< -o tmp/$T.i
$(call tc,asm-a68k) -q -ltmp/$T.o.lst tmp/$T.i -otmp/$T.o ## $(call tc,asm-a68k) -q -ltmp/$T.o.lst tmp/$T.i -otmp/$T.o
$(call tc,o2bin) tmp/$T.o tmp/$T.bin 'UPX1' 'UPX9' $(call tc,asm-a68k) tmp/$T.i -o tmp/$T.bin
## $(call tc,o2bin) tmp/$T.o tmp/$T.bin 'UPX1' 'UPX9'
$(call tc,m-objcopy) --strip-unneeded tmp/$T.bin
$(call tc,m-objcopy) -R .text -R .data -R .bss tmp/$T.bin
$(call tc,m-objcopy) -R .note -R .comment tmp/$T.bin
$(call tc,m-objdump) -trwh tmp/$T.bin >> tmp/$T.bin
$(call tc,bin2h) --ident=$(IDENT_PREFIX)loader$(IDENT_SUFFIX) tmp/$T.bin $@ $(call tc,bin2h) --ident=$(IDENT_PREFIX)loader$(IDENT_SUFFIX) tmp/$T.bin $@
m68k-atari.tos-nrv2b% : PP_FLAGS = -DNRV2B m68k-atari.tos-nrv2b% : PP_FLAGS = -DNRV2B

View File

@ -1,3 +1,4 @@
/*
; bits.ash -- bit access for decompression ; bits.ash -- bit access for decompression
; ;
; This file is part of the UCL data compression library. ; This file is part of the UCL data compression library.
@ -24,15 +25,15 @@
; <markus@oberhumer.com> ; <markus@oberhumer.com>
; http://www.oberhumer.com/opensource/ucl/ ; http://www.oberhumer.com/opensource/ucl/
; ;
*/
// ------------- ADDBITS -------------
; ------------- ADDBITS -------------
macro(ADDBITS) macro(ADDBITS)
#if (NRV_BB == 8) #if (NRV_BB == 8)
add.b d0,d0 ; sets Z, C and X ; 4 add.b d0,d0 // sets Z, C and X // 4
#elif (NRV_BB == 32) #elif (NRV_BB == 32)
add.l d0,d0 ; sets Z, C and X ; 6 add.l d0,d0 // sets Z, C and X // 6
#endif #endif
endm endm
@ -40,95 +41,95 @@ macro(ADDBITS)
#if 0 #if 0
macro(ADDXBITS) macro(ADDXBITS)
#if (NRV_BB == 8) #if (NRV_BB == 8)
addx.b d0,d0 ; sets C and X ; 4 addx.b d0,d0 // sets C and X // 4
#elif (NRV_BB == 32) #elif (NRV_BB == 32)
addx.l d0,d0 ; sets C and X ; 8 addx.l d0,d0 // sets C and X // 8
#endif #endif
endm endm
#endif #endif
; ------------- FILLBYTES_xx ------------- // ------------- FILLBYTES_xx -------------
; get 1 byte; then get 1 bit into both C and X // get 1 byte// then get 1 bit into both C and X
macro(FILLBYTES_8) macro(FILLBYTES_8)
; note: we shift the X flag through -> must init d0.b with $80 // note: we shift the X flag through -> must init d0.b with $80
move.b (a0)+,d0 ; 8 move.b (a0)+,d0 // 8
addx.b d0,d0 ; sets C and X ; 4 addx.b d0,d0 // sets C and X // 4
endm endm
; get 32 bits in little endian format; then get 1 bit into both C and X // get 32 bits in little endian format// then get 1 bit into both C and X
macro(FILLBYTES_LE32) macro(FILLBYTES_LE32)
#if 0 #if 0
move.b (a0)+,d0 ; 8 move.b (a0)+,d0 // 8
ror.l #8,d0 ; 24 ror.l #8,d0 // 24
move.b (a0)+,d0 ; 8 move.b (a0)+,d0 // 8
ror.l #8,d0 ; 24 ror.l #8,d0 // 24
move.b (a0)+,d0 ; 8 move.b (a0)+,d0 // 8
ror.l #8,d0 ; 24 ror.l #8,d0 // 24
move.b (a0)+,d0 ; 8 move.b (a0)+,d0 // 8
ror.l #8,d0 ; 24 ror.l #8,d0 // 24
add.l d0,d0 ; sets C and X ; 6 add.l d0,d0 // sets C and X // 6
bset #0,d0 ; only changes Z ; 12 bset #0,d0 // only changes Z // 12
; ----- // -----
; 146 // 146
#elif 1 #elif 1
move.b 3(a0),d0 ; 12 move.b 3(a0),d0 // 12
lsl.w #8,d0 ; 22 lsl.w #8,d0 // 22
move.b 2(a0),d0 ; 12 move.b 2(a0),d0 // 12
swap d0 ; 4 swap d0 // 4
move.b 1(a0),d0 ; 12 move.b 1(a0),d0 // 12
lsl.w #8,d0 ; 22 lsl.w #8,d0 // 22
move.b (a0),d0 ; 8 move.b (a0),d0 // 8
addq.l #4,a0 ; does not affect flags ; 8 addq.l #4,a0 // does not affect flags // 8
add.l d0,d0 ; sets C and X ; 6 add.l d0,d0 // sets C and X // 6
bset #0,d0 ; only changes Z ; 12 bset #0,d0 // only changes Z // 12
; ----- // -----
; 118 // 118
#elif 1 #elif 1
; note: we shift the X flag through -> must init d0.l with $80000000 // note: we shift the X flag through -> must init d0.l with $80000000
; note: rol/ror do not change X flag (but asl/asr/lsl/lsr do) // note: rol/ror do not change X flag (but asl/asr/lsl/lsr do)
move.b 3(a0),d0 ; 12 move.b 3(a0),d0 // 12
ror.w #8,d0 ; 22 ror.w #8,d0 // 22
move.b 2(a0),d0 ; 12 move.b 2(a0),d0 // 12
swap d0 ; 4 swap d0 // 4
move.b 1(a0),d0 ; 12 move.b 1(a0),d0 // 12
ror.w #8,d0 ; 22 ror.w #8,d0 // 22
move.b (a0),d0 ; 8 move.b (a0),d0 // 8
addq.l #4,a0 ; does not affect flags ; 8 addq.l #4,a0 // does not affect flags // 8
addx.l d0,d0 ; sets C and X ; 8 addx.l d0,d0 // sets C and X // 8
; ----- // -----
; 108 // 108
#else #else
; IMPORTANT: movep is not implemented on the 68060 // IMPORTANT: movep is not implemented on the 68060
# error "do not use movep" # error "do not use movep"
; note: we shift the X flag through -> must init d0.l with $80000000 // note: we shift the X flag through -> must init d0.l with $80000000
; note: must use dc.l because of a bug in the pasm assembler // note: must use dc.l because of a bug in the pasm assembler
; note: may access past the end of the input; this is ok for UPX // note: may access past the end of the input// this is ok for UPX
dc.l $01080003 ; movep.w 3(a0),d0 ; 16 dc.l $01080003 // movep.w 3(a0),d0 // 16
move.b 2(a0),d0 ; 12 move.b 2(a0),d0 // 12
swap d0 ; 4 swap d0 // 4
dc.l $01080001 ; movep.w 1(a0),d0 ; 16 dc.l $01080001 // movep.w 1(a0),d0 // 16
move.b (a0),d0 ; 8 move.b (a0),d0 // 8
addq.l #4,a0 ; does not affect flags ; 8 addq.l #4,a0 // does not affect flags // 8
addx.l d0,d0 ; sets C and X ; 8 addx.l d0,d0 // sets C and X // 8
; ----- // -----
; 72 // 72
#endif #endif
endm endm
; ------------- FILLBITS ------------- // ------------- FILLBITS -------------
macro(FILLBITS) macro(FILLBITS)
#if (NRV_BB == 8) #if (NRV_BB == 8)
; no need for a subroutine // no need for a subroutine
FILLBYTES_8 FILLBYTES_8
#elif (NRV_BB == 32) #elif (NRV_BB == 32)
# ifdef SMALL # ifdef SMALL
# define FILLBYTES_SR FILLBYTES_LE32 # define FILLBYTES_SR FILLBYTES_LE32
bsr fillbytes_sr ; 18 bsr fillbytes_sr // 18
# else # else
FILLBYTES_LE32 FILLBYTES_LE32
# endif # endif
@ -136,24 +137,24 @@ macro(FILLBITS)
endm endm
; ------------- GETBIT ------------- // ------------- GETBIT -------------
; get one bit into both the Carry and eXtended flag // get one bit into both the Carry and eXtended flag
macro(GETBIT) macro(GETBIT)
#if defined(__A68K__) #if defined(__A68K__)
ADDBITS ; 4 / 6 ADDBITS // 4 / 6
bne \@ ; 10 (if jump) bne \@ // 10 (if jump)
FILLBITS FILLBITS
\@: \@:
#elif defined(__ASL__) #elif defined(__ASL__)
ADDBITS ; 4 / 6 ADDBITS // 4 / 6
bne done ; 10 (if jump) bne done // 10 (if jump)
FILLBITS FILLBITS
done: done:
#else #else
LOCAL done LOCAL done
ADDBITS ; 4 / 6 ADDBITS // 4 / 6
bne done ; 10 (if jump) bne done // 10 (if jump)
FILLBITS FILLBITS
done: done:
#endif #endif
@ -161,5 +162,5 @@ done:
; vi:ts=8:et // vi:ts=8:et

View File

@ -1,3 +1,4 @@
/*
; n2e_d.ash -- NRV2E decompression in 68000 assembly ; n2e_d.ash -- NRV2E decompression in 68000 assembly
; ;
; This file is part of the UCL data compression library. ; This file is part of the UCL data compression library.
@ -56,27 +57,27 @@
; we have max_match = 65535, so we can use word arithmetics on d2 ; we have max_match = 65535, so we can use word arithmetics on d2
; we have max_offset < 2**23, so we can use partial word arithmetics on d1 ; we have max_offset < 2**23, so we can use partial word arithmetics on d1
; ;
*/
// ------------- constants & macros -------------
; ------------- constants & macros -------------
#if !defined(NRV_NO_INIT) #if !defined(NRV_NO_INIT)
;;move.l #-$500,d6 ; 0xfffffb00 ////move.l #-0x500,d6 // 0xfffffb00
moveq.l #-$50,d6 ; 0xffffffb0 moveq.l #-0x50,d6 // 0xffffffb0
lsl.w #4,d6 ; << 4 lsl.w #4,d6 // << 4
moveq.l #0,d7 moveq.l #0,d7
moveq.l #-1,d5 ; last_off = -1 moveq.l #-1,d5 // last_off = -1
; init d0 with high bit set // init d0 with high bit set
#if (NRV_BB == 8) #if (NRV_BB == 8)
;;move.b #$80,d0 ; init d0.b for FILLBYTES ////move.b #0x80,d0 // init d0.b for FILLBYTES
moveq.l #-128,d0 ; d0.b = $80 moveq.l #-128,d0 // d0.b = 0x80
#elif (NRV_BB == 32) #elif (NRV_BB == 32)
;;move.l #$80000000,d0 ; init d0.l for FILLBYTES ////move.l #0x80000000,d0 // init d0.l for FILLBYTES
moveq.l #1,d0 moveq.l #1,d0
ror.l #1,d0 ; d0.l = $80000000 ror.l #1,d0 // d0.l = 0x80000000
#endif #endif
bra decompr_start bra decompr_start
@ -88,12 +89,12 @@
#if defined(FILLBYTES_SR) #if defined(FILLBYTES_SR)
fillbytes_sr: FILLBYTES_SR fillbytes_sr: FILLBYTES_SR
rts ; 16 rts // 16
#endif #endif
; ------------- DECOMPRESSION ------------- // ------------- DECOMPRESSION -------------
decompr_literal: decompr_literal:
@ -102,16 +103,16 @@ decompr_literal:
decompr_start: decompr_start:
decompr_loop: decompr_loop:
#ifdef SMALL #ifdef SMALL
; cost literal: 4 + 10 + 10 // cost literal: 4 + 10 + 10
; cost match: 4 + 10 + 8 // cost match: 4 + 10 + 8
; cost fillbits: 4 + 8 // cost fillbits: 4 + 8
GETBIT GETBIT
bcs decompr_literal bcs decompr_literal
#else #else
; optimization: carry is clear -> we know that bits are available // optimization: carry is clear -> we know that bits are available
; cost literal: 4 + 8 + 10 // cost literal: 4 + 8 + 10
; cost match: 4 + 10 // cost match: 4 + 10
; cost fillbits: 4 + 8 + 8 // cost fillbits: 4 + 8 + 8
ADDBITS ADDBITS
bcc decompr_match bcc decompr_match
bne decompr_literal bne decompr_literal
@ -128,16 +129,16 @@ decompr_l1:
GETBIT GETBIT
addx.w d1,d1 addx.w d1,d1
#ifdef SMALL #ifdef SMALL
; cost loop continue: 4 + 10 + 8 // cost loop continue: 4 + 10 + 8
; cost loop break: 4 + 10 + 10 // cost loop break: 4 + 10 + 10
; cost fillbits: 4 + 8 // cost fillbits: 4 + 8
GETBIT GETBIT
bcs decompr_break1 bcs decompr_break1
#else #else
; optimization: carry is clear -> we know that bits are available // optimization: carry is clear -> we know that bits are available
; cost loop continue: 4 + 10 // cost loop continue: 4 + 10
; cost loop break: 4 + 8 + 10 // cost loop break: 4 + 8 + 10
; cost fillbits: 4 + 8 + 8 // cost fillbits: 4 + 8 + 8
ADDBITS ADDBITS
bcc L(continue) bcc L(continue)
bne decompr_break1 bne decompr_break1
@ -152,7 +153,7 @@ L(continue):
bra decompr_end bra decompr_end
decompr_break1: decompr_break1:
subq.w #3,d1 subq.w #3,d1
bcs decompr_prev_dist ; last m_off bcs decompr_prev_dist // last m_off
lsl.l #8,d1 lsl.l #8,d1
move.b (a0)+,d1 move.b (a0)+,d1
not.l d1 not.l d1
@ -175,16 +176,16 @@ decompr_get_mlen2:
decompr_l2: GETBIT decompr_l2: GETBIT
addx.w d2,d2 addx.w d2,d2
#ifdef SMALL #ifdef SMALL
; cost loop continue: 4 + 10 + 10 // cost loop continue: 4 + 10 + 10
; cost loop break: 4 + 10 + 8 // cost loop break: 4 + 10 + 8
; cost fillbits: 4 + 8 // cost fillbits: 4 + 8
GETBIT GETBIT
bcc decompr_l2 bcc decompr_l2
#else #else
; optimization: carry is clear -> we know that bits are available // optimization: carry is clear -> we know that bits are available
; cost loop continue: 4 + 10 // cost loop continue: 4 + 10
; cost loop break: 4 + 8 + 10 // cost loop break: 4 + 8 + 10
; cost fillbits: 4 + 8 + 8 // cost fillbits: 4 + 8 + 8
ADDBITS ADDBITS
bcc decompr_l2 bcc decompr_l2
bne L(break) bne L(break)
@ -200,29 +201,29 @@ decompr_got_mlen:
move.l d1,d5 move.l d1,d5
lea 0(a1,d1.l),a3 lea 0(a1,d1.l),a3
; must use sub as cmp doesn't affect the X flag // must use sub as cmp doesn't affect the X flag
sub.l d6,d1 sub.l d6,d1
addx.w d7,d2 addx.w d7,d2
; TODO: partly unroll this loop; could use some magic with d7 for address // TODO: partly unroll this loop// could use some magic with d7 for address
; computations, then compute a nice `jmp yyy(pc,dx.w)' // computations, then compute a nice `jmp yyy(pc,dx.w)'
#if 1 #if 1
; cost for any m_len: 12 + 22 * (m_len - 1) + 4 // cost for any m_len: 12 + 22 * (m_len - 1) + 4
; 38, 60, 82, 104, 126, 148, 170, 192, 214, 236 // 38, 60, 82, 104, 126, 148, 170, 192, 214, 236
move.b (a3)+,(a1)+ ; 12 move.b (a3)+,(a1)+ // 12
L(copy): move.b (a3)+,(a1)+ ; 12 L(copy): move.b (a3)+,(a1)+ // 12
dbra d2,L(copy) ; 10 / 14 dbra d2,L(copy) // 10 / 14
#else #else
; cost for even m_len: 18 + 34 * (m_len / 2) + 4 // cost for even m_len: 18 + 34 * (m_len / 2) + 4
; cost for odd m_len: 28 + 34 * (m_len / 2) + 4 // cost for odd m_len: 28 + 34 * (m_len / 2) + 4
; 56, 66, 90, 100, 124, 134, 158, 168, 192, 202 // 56, 66, 90, 100, 124, 134, 158, 168, 192, 202
lsr.w #1,d2 ; 8 lsr.w #1,d2 // 8
bcc L(copy) ; 10 / 8 bcc L(copy) // 10 / 8
move.b (a3)+,(a1)+ ; 12 move.b (a3)+,(a1)+ // 12
L(copy): move.b (a3)+,(a1)+ ; 12 L(copy): move.b (a3)+,(a1)+ // 12
move.b (a3)+,(a1)+ ; 12 move.b (a3)+,(a1)+ // 12
dbra d2,L(copy) ; 10 / 14 dbra d2,L(copy) // 10 / 14
#endif #endif
bra decompr_loop bra decompr_loop
@ -232,5 +233,5 @@ L(copy): move.b (a3)+,(a1)+ ; 12
decompr_end: decompr_end:
; vi:ts=8:et // vi:ts=8:et

View File

@ -1,3 +1,4 @@
/*
; l_tos.s -- loader & decompressor for the atari/tos format ; l_tos.s -- loader & decompressor for the atari/tos format
; ;
; This file is part of the UPX executable compressor. ; This file is part of the UPX executable compressor.
@ -24,12 +25,12 @@
; Markus F.X.J. Oberhumer Laszlo Molnar ; Markus F.X.J. Oberhumer Laszlo Molnar
; <mfx@users.sourceforge.net> <ml1050@users.sourceforge.net> ; <mfx@users.sourceforge.net> <ml1050@users.sourceforge.net>
; ;
*/
#define NRV_BB 8 #define NRV_BB 8
#include "../../version.h"
/*
; ;
; see also: ; see also:
; freemint/sys/mint/basepage.h ; freemint/sys/mint/basepage.h
@ -44,30 +45,16 @@
; by a simple perl script. We also maintain compatiblity with the pasm ; by a simple perl script. We also maintain compatiblity with the pasm
; assembler (which must be started in the emulator window). ; assembler (which must be started in the emulator window).
; ;
*/
#define L(label) .L##label
#define macro(name) .macro name
#define endm .endm
#define section .section
#if defined(__A68K__) .altmacro
# define align4 align 0,4
# define L(label) \/**/label
# define macro(name) name macro
# define text section code
#elif defined(__ASL__)
# define align4 align 4
# define L(label) $$/**/label
# define macro(name) name macro
# define text section code
#else
# define align4 align 4
# define L(label) ./**/label
# define macro(name) macro name
#endif
; defines needed for including ident_[ns].ash
#define db dc.b
#define dw dc.w
#define dd dc.l
/*
; basepage offsets ; basepage offsets
p_lowtpa equ $0 ; .l pointer to self (bottom of TPA) p_lowtpa equ $0 ; .l pointer to self (bottom of TPA)
p_hitpa equ $4 ; .l pointer to top of TPA + 1 p_hitpa equ $4 ; .l pointer to top of TPA + 1
@ -81,7 +68,11 @@ p_dta equ $20 ; .l pointer to current DTA
p_parent equ $24 ; .l pointer to parent's basepage p_parent equ $24 ; .l pointer to parent's basepage
p_flags equ $28 ; .l memory usage flags p_flags equ $28 ; .l memory usage flags
p_env equ $2c ; .l pointer to environment string p_env equ $2c ; .l pointer to environment string
*/
p_tbase = 8
/*
; ;
; long living registers: ; long living registers:
; d4 p_tbase - start of text segment ; d4 p_tbase - start of text segment
@ -91,12 +82,13 @@ p_env equ $2c ; .l pointer to environment string
; - start of dirty bss ; - start of dirty bss
; ASTACK (a7) - final startup code copied below stack ; ASTACK (a7) - final startup code copied below stack
; ;
*/
/*************************************************************************
// flush cache macros
**************************************************************************/
; /************************************************************************* /*
; // flush cache macros
; **************************************************************************/
; note: ; note:
; GEMDOS/XBIOS trashes d0, d1, d2, a0, a1, a2 ; GEMDOS/XBIOS trashes d0, d1, d2, a0, a1, a2
@ -108,71 +100,72 @@ p_env equ $2c ; .l pointer to environment string
; ;
; Note that on a 68060 FreeMiNT just uses `cpusha bc' in all cases, ; Note that on a 68060 FreeMiNT just uses `cpusha bc' in all cases,
; so we don't bother passing base and length. (info: base would be d4) ; so we don't bother passing base and length. (info: base would be d4)
*/
macro(MINT_FLUSH_CACHE) macro(MINT_FLUSH_CACHE)
pea -1 ; length pea -1 // length
clr.l -(sp) ; base clr.l -(sp) // base
#if 0 #if 0
move.w #$0016,-(sp) ; S_FLUSHCACHE (22) move.w #0x016,-(sp) // S_FLUSHCACHE (22)
move.w #$0154,-(sp) ; Ssystem (340) move.w #0x154,-(sp) // Ssystem (340)
#else #else
move.l #$01540016,-(sp) move.l #0x01540016,-(sp)
#endif #endif
trap #1 ; GEMDOS trap #1 // GEMDOS
lea 12(sp),sp lea 12(sp),sp
endm endm
; First try `cpusha bc' (68040/68060). If that fails try temporary changing // First try `cpusha bc' (68040/68060). If that fails try temporary changing
; the cache control register (68030). // the cache control register (68030).
macro(SUPEXEC_FLUSH_CACHE) macro(SUPEXEC_FLUSH_CACHE)
pea \@super(pc) pea super(pc)
move.w #$0026,-(sp) ; Supexec (38) move.w #0x0026,-(sp) // Supexec (38)
trap #14 ; XBIOS trap #14 // XBIOS
addq.l #6,sp addq.l #6,sp
bra \@done bra done
; exception handler // exception handler
\@exception: move.l a1,sp ; restore stack (SSP) exception: move.l a1,sp // restore stack (SSP)
jmp (a0) ; and continue jmp (a0) // and continue
\@super: move.l ($10),-(sp) super: move.l (0x10),-(sp)
move.l ($2c),-(sp) move.l (0x2c),-(sp)
move.l ($f4),-(sp) move.l (0xf4),-(sp)
move.l sp,a1 ; save stack pointer (SSP) move.l sp,a1 // save stack pointer (SSP)
; set exception vectors // set exception vectors
lea \@exception(pc),a0 lea exception(pc),a0
move.l a0,($10) move.l a0,(0x10)
move.l a0,($2c) move.l a0,(0x2c)
move.l a0,($f4) move.l a0,(0xf4)
nop ; flush write pipeline nop // flush write pipeline
; try 68040 / 68060 // try 68040 / 68060
lea \@1(pc),a0 lea 1(pc),a0
dc.w $f4f8 ; cpusha bc dc.w 0xf4f8 // cpusha bc
bra \@ret bra ret
\@1: 1:
; try 68030 // try 68030
lea \@2(pc),a0 lea 2(pc),a0
dc.l $4e7a0002 ; movec.l cacr,d0 movec.l cacr,d0
move.l d0,d1 move.l d0,d1
or.w #$0808,d1 or.w #0x0808,d1
dc.l $4e7b1002 ; movec.l d1,cacr movec.l d1,cacr
dc.l $4e7b0002 ; movec.l d0,cacr movec.l d0,cacr
;;; bra \@ret //;; bra \@ret
\@2: 2:
\@ret: move.l (sp)+,($f4) ret: move.l (sp)+,(0xf4)
move.l (sp)+,($2c) move.l (sp)+,(0x2c)
move.l (sp)+,($10) move.l (sp)+,(0x10)
nop ; flush write pipeline nop // flush write pipeline
rts rts
\@done: done:
endm endm
@ -180,9 +173,9 @@ macro(SUPEXEC_FLUSH_CACHE)
macro(BOTH_FLUSH_CACHE) macro(BOTH_FLUSH_CACHE)
MINT_FLUSH_CACHE MINT_FLUSH_CACHE
tst.l d0 tst.l d0
beq \@done beq done2
SUPEXEC_FLUSH_CACHE SUPEXEC_FLUSH_CACHE
\@done: done2:
endm endm
@ -198,70 +191,76 @@ macro(BOTH_FLUSH_CACHE)
#endif #endif
; /*************************************************************************
; // entry - the text segment of a compressed executable /*************************************************************************
; // // entry - the text segment of a compressed executable
; // note: compressed programs never have the F_SHTEXT flag set, //
; // so we can assume that the text, data & bss segments // note: compressed programs never have the F_SHTEXT flag set,
; // are contiguous in memory // so we can assume that the text, data & bss segments
; **************************************************************************/ // are contiguous in memory
**************************************************************************/
#if defined(__ASL__) #if defined(__ASL__)
padding off padding off
#endif #endif
text
dc.b 'UPX1' ; marker for o2bin.pl section tos0
//text
//dc.b 'UPX1' // marker for o2bin.pl
start: start:
move.l a0,d0 ; a0 is basepage if accessory move.l a0,d0 // a0 is basepage if accessory
beq L(l_app) beq L(l_app)
move.l 4(a0),sp ; accessory - get stack move.l 4(a0),sp // accessory - get stack
bra L(start) bra L(start)
L(l_app): move.l 4(sp),d0 ; application - get basepage L(l_app): move.l 4(sp),d0 // application - get basepage
L(start): movem.l d1-d7/a0-a6,-(sp) L(start): movem.l d1-d7/a0-a6,-(sp)
; ------------- restore original basepage // ------------- restore original basepage
; we also setup d4 and a6 here, and we prepare a4 // we also setup d4 and a6 here, and we prepare a4
move.l d0,a2 ; a2 = basepage move.l d0,a2 // a2 = basepage
addq.l #p_tbase,a2 addq.l #p_tbase,a2
move.l (a2)+,a6 move.l (a2)+,a6
move.l a6,d4 ; d4 = p_tbase move.l a6,d4 // d4 = p_tbase
move.l #'up11',(a2) ; p_tlen move.l up11,(a2) // p_tlen
add.l (a2)+,a6 add.l (a2)+,a6
move.l a6,(a2)+ ; p_dbase move.l a6,(a2)+ // p_dbase
move.l #'up12',(a2) ; p_dlen move.l up12,(a2) // p_dlen
add.l (a2)+,a6 ; a6 = decompressed p_bbase add.l (a2)+,a6 // a6 = decompressed p_bbase
move.l (a2),a4 ; a4 = compressed p_bbase move.l (a2),a4 // a4 = compressed p_bbase
move.l a6,(a2)+ ; p_bbase move.l a6,(a2)+ // p_bbase
move.l #'up13',(a2) ; p_blen move.l up13,(a2) // p_blen
; ------------- copy data segment (from a4 to a3, downwards) // ------------- copy data segment (from a4 to a3, downwards)
; a4 (top of compressed data) already initialized above // a4 (top of compressed data) already initialized above
move.l d4,a3 move.l d4,a3
add.l #'up21',a3 ; top of data segment + offset add.l up21,a3 // top of data segment + offset
#if defined(SMALL) #if defined(SMALL)
move.l #'up22',d0 ; (len / 4) move.l up22,d0 // (len / 4)
; copy 4 bytes per loop // copy 4 bytes per loop
L(loop): move.l -(a4),-(a3) L(loop): move.l -(a4),-(a3)
;;subq.l #1,d0 section subql_1d0
dc.b 'u1' ; subq.l #1,d0 / subq.w #1,d0 subq.l #1,d0
section subqw_1d0
subq.w #1,d0
section s_bneloop0
bne L(loop) bne L(loop)
#else #else
move.l #'up22',d0 ; (len / 160) move.l up22,d0 // (len / 160)
; loop1 - use 10 registers to copy 4*10*4 = 160 bytes per loop // loop1 - use 10 registers to copy 4*10*4 = 160 bytes per loop
L(loop1): L(loop1):
lea.l -160(a4),a4 lea.l -160(a4),a4
movem.l 120(a4),d1-d3/d5-d7/a0-a2/a5 movem.l 120(a4),d1-d3/d5-d7/a0-a2/a5
@ -272,125 +271,139 @@ L(loop1):
movem.l d1-d3/d5-d7/a0-a2/a5,-(a3) movem.l d1-d3/d5-d7/a0-a2/a5,-(a3)
movem.l (a4),d1-d3/d5-d7/a0-a2/a5 movem.l (a4),d1-d3/d5-d7/a0-a2/a5
movem.l d1-d3/d5-d7/a0-a2/a5,-(a3) movem.l d1-d3/d5-d7/a0-a2/a5,-(a3)
;;subq.l #1,d0 section subql_1d0
dc.b 'u1' ; subq.l #1,d0 / subq.w #1,d0 subq.l #1,d0
section subqw_1d0
subq.w #1,d0
section s_bneloop0
bne L(loop1) bne L(loop1)
; loop2 - copy the remaining 4..160 bytes // loop2 - copy the remaining 4..160 bytes
;;moveq.l #xx,d0 ; ((len % 160) / 4) - 1 //;moveq.l #xx,d0 ; ((len % 160) / 4) - 1
dc.b 'u2' ; moveq.l #xx,d0 #if 0
dc.b 'u2' // moveq.l #xx,d0
#else
moveq.l #copy_remain,d0
#endif
L(loop2): move.l -(a4),-(a3) L(loop2): move.l -(a4),-(a3)
dbra d0,L(loop2) dbra d0,L(loop2)
#endif #endif
; a3 now points to the start of the compressed block // a3 now points to the start of the compressed block
; ------------- copy code to stack and setup ASTACK // ------------- copy code to stack and setup ASTACK
; Copy the final startup code below the stack. This will get // Copy the final startup code below the stack. This will get
; called via "jmp (ASTACK)" after decompression and relocation. // called via "jmp (ASTACK)" after decompression and relocation.
copy_to_stack: copy_to_stack:
lea.l clear_bss_end(pc),a2 lea.l clear_bss_end(pc),a2
move.l d4,-(ASTACK) ; entry point for final jmp move.l d4,-(ASTACK) // entry point for final jmp
moveq.l #((clear_bss_end-clear_bss)/2-1),d5 // moveq.l #((clear_bss_end-clear_bss)/2-1),d5
L(loop): move.w -(a2),-(ASTACK) moveq.l #copy_to_stack_len,d5
L(loop6): move.w -(a2),-(ASTACK)
subq.l #1,d5 subq.l #1,d5
bcc L(loop) bcc L(loop6)
#ifdef FLUSH_CACHE #ifdef FLUSH_CACHE
; patch code: on the stack, the `rts' becomes a `nop' // patch code: on the stack, the `rts' becomes a `nop'
move.w #$4e71,flush_cache_rts-clear_bss(ASTACK) move.w #0x4e71,flush_cache_rts-clear_bss(ASTACK)
#endif #endif
; note: d5.l is now -1 (needed for decompressor) // note: d5.l is now -1 (needed for decompressor)
; ------------- // -------------
#ifdef FLUSH_CACHE #ifdef FLUSH_CACHE
bsr flush_cache bsr flush_cache
#endif #endif
; ------------- prepare decompressor // ------------- prepare decompressor
; a3 still points to the start of the compressed block // a3 still points to the start of the compressed block
move.l d4,a4 ; dest. for decompressing move.l d4,a4 // dest. for decompressing
#define NRV_NO_INIT #define NRV_NO_INIT
;;moveq.l #-1,d5 ; last_off = -1 //;moveq.l #-1,d5 ; last_off = -1
moveq.l #-128,d0 ; d0.b = $80 moveq.l #-128,d0 // d0.b = $80
#if defined(NRV2B) #if defined(NRV2B)
moveq.l #-1,d7 moveq.l #-1,d7
moveq.l #-$68,d6 ; 0xffffff98 moveq.l #-0x68,d6 // 0xffffff98
lsl.w #5,d6 ; 0xfffff300 == -0xd00 lsl.w #5,d6 // 0xfffff300 == -0xd00
#elif defined(NRV2D) #elif defined(NRV2D)
moveq.l #-1,d7 moveq.l #-1,d7
moveq.l #-$50,d6 ; 0xffffffb0 moveq.l #-0x50,d6 // 0xffffffb0
lsl.w #4,d6 ; 0xfffffb00 == -0x500 lsl.w #4,d6 // 0xfffffb00 == -0x500
#elif defined(NRV2E) #elif defined(NRV2E)
moveq.l #0,d7 moveq.l #0,d7
moveq.l #-$50,d6 ; 0xffffffb0 moveq.l #-0x50,d6 // 0xffffffb0
lsl.w #4,d6 ; 0xfffffb00 == -0x500 lsl.w #4,d6 // 0xfffffb00 == -0x500
#else #else
# error # error
#endif #endif
; ------------- jump to copied decompressor // ------------- jump to copied decompressor
move.l d4,a2 move.l d4,a2
add.l #'up31',a2 add.l #up31,a2
jmp (a2) ; jmp decompr_start jmp (a2) // jmp decompr_start
; /************************************************************************* /*************************************************************************
; // this is the final part of the startup code which runs in the stack // this is the final part of the startup code which runs in the stack
; **************************************************************************/ **************************************************************************/
; ------------- clear dirty bss // ------------- clear dirty bss
clear_bss: clear_bss:
; on entry: // on entry:
; ASTACK == pc == clear_bss (on stack) // ASTACK == pc == clear_bss (on stack)
; a6 start of dirty bss [long living register] // a6 start of dirty bss [long living register]
; d6.l number of clr loops // d6.l number of clr loops
; d3.l 0 // d3.l 0
#if defined(SMALL) #if defined(SMALL)
L(loop): move.l d3,(a6)+ L(loop3): move.l d3,(a6)+
;;subq.l #1,d6 section subql_1d6
dc.b 'u4' ; subq.l #1,d6 / subq.w #1,d6 subq.l #1,d6
bne L(loop) section subqw_1d6
subq.w #1,d6
section s_bneloop3
bne L(loop3)
#else #else
; the dirty bss is usually not too large, so we don't // the dirty bss is usually not too large, so we don't
; bother making movem optimizations here // bother making movem optimizations here
L(loop): move.l d3,(a6)+ L(loop3): move.l d3,(a6)+
move.l d3,(a6)+ move.l d3,(a6)+
move.l d3,(a6)+ move.l d3,(a6)+
move.l d3,(a6)+ move.l d3,(a6)+
;;subq.l #1,d6 section subql_1d6
dc.b 'u4' ; subq.l #1,d6 / subq.w #1,d6 subq.l #1,d6
bne L(loop) section subqw_1d6
subq.w #1,d6
section s_bneloop3
bne L(loop3)
#endif #endif
; ------------- flush the cache // ------------- flush the cache
#ifdef FLUSH_CACHE #ifdef FLUSH_CACHE
; info: // info:
; This is also called as a subroutine (before decompression, NOT running // This is also called as a subroutine (before decompression, NOT running
; in the stack). When running in the stack the `rts' is replaced by a `nop'. // in the stack). When running in the stack the `rts' is replaced by a `nop'.
flush_cache: flush_cache:
FLUSH_CACHE FLUSH_CACHE
@ -400,23 +413,23 @@ flush_cache_rts:
#endif #endif
; ------------- restore ASTACK // ------------- restore ASTACK
lea clear_bss_end-clear_bss+4(ASTACK),sp lea clear_bss_size+4(ASTACK),sp
;; assert sp == clear_bss_end(pc)+4 //; assert sp == clear_bss_end(pc)+4
; ------------- clear the dirty stack // ------------- clear the dirty stack
#if 0 #if 0
; better don't do this - we are currently running in the stack // better don't do this - we are currently running in the stack
; and don't want to make yet another instruction-cache-line dirty // and don't want to make yet another instruction-cache-line dirty
clear_dirty_stack: clear_dirty_stack:
; clear down to clear_bss(pc) + 32 extra longs // clear down to clear_bss(pc) + 32 extra longs
moveq.l #((L(loop)-clear_bss+3)/4+32-1),d0 moveq.l #((L(loop)-clear_bss+3)/4+32-1),d0
lea L(loop)(pc),a0 lea L(loop)(pc),a0
L(loop): move.l d3,-(a0) L(loop): move.l d3,-(a0)
@ -425,48 +438,46 @@ L(loop): move.l d3,-(a0)
#endif #endif
; ------------- start program // ------------- start program
movem.l (sp)+,d1-d7/a0-a6 movem.l (sp)+,d1-d7/a0-a6
move.l a0,d0 move.l a0,d0
beq L(l_app) beq L(l_app1)
sub.l sp,sp ; accessory: no stack sub.l sp,sp // accessory: no stack
L(l_app): dc.w $4ef9 ; jmp $xxxxxxxx - jmp to text segment L(l_app1): dc.w 0x4ef9 // jmp $xxxxxxxx - jmp to text segment
clear_bss_end: clear_bss_end:
; /************************************************************************* /*************************************************************************
; // UPX ident & packheader // UPX ident & packheader
; **************************************************************************/ **************************************************************************/
#if 0
#if defined(SMALL) #if defined(SMALL)
# include "include/ident_s.ash" # include "include/ident_s.ash"
#else #else
# include "include/ident_n.ash" # include "include/ident_n.ash"
#endif #endif
#endif
align4 // align4
; 32 bytes - #include "header.ash" #include "include/header2.ash"
dc.b 85,80,88,33 ; UPX_MAGIC_LE32
dc.b 161,216,208,213 ; UPX_MAGIC2_LE32
dc.l 0,0,0,0,0
dc.b 0,0,0,45
; end of text segment - size is a multiple of 4 // end of text segment - size is a multiple of 4
; /************************************************************************* /*************************************************************************
; // This part is appended after the compressed data. // This part is appended after the compressed data.
; // It runs in the last part of the dirty bss (after the // It runs in the last part of the dirty bss (after the
; // relocations and the original fileheader). // relocations and the original fileheader).
; **************************************************************************/ **************************************************************************/
cutpoint: cutpoint:
; ------------- decompress (from a3 to a4) // ------------- decompress (from a3 to a4)
#define a0 A3 #define a0 A3
#define a1 A4 #define a1 A4
@ -474,9 +485,9 @@ cutpoint:
#define d2 D3 #define d2 D3
#if defined(NRV2B) #if defined(NRV2B)
# include "arch/m68k/nrv2b_d.ash" //# include "arch/m68k/nrv2b_d.ash"
#elif defined(NRV2D) #elif defined(NRV2D)
# include "arch/m68k/nrv2d_d.ash" //# include "arch/m68k/nrv2d_d.ash"
#elif defined(NRV2E) #elif defined(NRV2E)
# include "arch/m68k/nrv2e_d.ash" # include "arch/m68k/nrv2e_d.ash"
#else #else
@ -488,71 +499,52 @@ cutpoint:
#undef a3 #undef a3
#undef d2 #undef d2
; note: d3.l is 0 from decompressor above // note: d3.l is 0 from decompressor above
; ------------- prepare d6 for clearing the dirty bss // ------------- prepare d6 for clearing the dirty bss
#if defined(SMALL) #if defined(SMALL)
move.l #'up41',d6 ; dirty_bss / 4 move.l #up41,d6 // dirty_bss / 4
#else #else
move.l #'up41',d6 ; dirty_bss / 16 move.l #up41,d6 // dirty_bss / 16
#endif #endif
; ------------- test if we need to reloc section reloc
dc.b 'u3' ; moveq.l #1,d5 / jmp (ASTACK) moveq.l #1,d5
// The decompressed relocations now are just after the decompressed
// data segment, i.e. at the beginning of the (dirty) bss.
; ------------- reloc // note: d3.l is still 0
reloc: move.l a6,a0 // a0 = start of relocations
; The decompressed relocations now are just after the decompressed
; data segment, i.e. at the beginning of the (dirty) bss.
; note: d3.l is still 0
move.l a6,a0 ; a0 = start of relocations
move.l d4,a1 move.l d4,a1
add.l (a0)+,a1 ; get initial fixup add.l (a0)+,a1 // get initial fixup
L(loop1): add.l d3,a1 ; increase fixup L(loopx1): add.l d3,a1 // increase fixup
add.l d4,(a1) ; reloc one address add.l d4,(a1) // reloc one address
L(loop2): move.b (a0)+,d3 L(loopx2): move.b (a0)+,d3
beq reloc_end beq reloc_end
cmp.b d5,d3 ; note: d5.b is #1 from above cmp.b d5,d3 // note: d5.b is #1 from above
bne L(loop1) bne L(loopx1)
lea 254(a1),a1 ; d3 == 1 -> add 254, don't reloc lea 254(a1),a1 // d3 == 1 -> add 254, don't reloc
bra L(loop2) bra L(loopx2)
reloc_end: reloc_end:
; ------------- clear dirty bss & start program // ------------- clear dirty bss & start program
; We are currently running in the dirty bss. // We are currently running in the dirty bss.
; Jump to the code we copied below the stack. // Jump to the code we copied below the stack.
; note: d3.l is still 0 // note: d3.l is still 0
jmp (ASTACK) ; jmp clear_bss (on stack) jmp (ASTACK) // jmp clear_bss (on stack)
// vi:ts=8:et:nowrap
eof:
dc.w cutpoint-start ; size of entry
dc.w eof-cutpoint ; size of decompressor
dc.w decompr_start-cutpoint ; offset of decompressor start
dc.b 'UPX9' ; marker for o2bin.pl
#if defined(__ASL__)
endsection code
#endif
end
; vi:ts=8:et:nowrap