From b7bbd81dda0459dfbc727ec326b8fd3dbd34e8b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20Moln=C3=A1r?= Date: Thu, 6 Jul 2006 18:30:34 +0200 Subject: [PATCH] conversion of atari/tos to ElfLinker started --- src/stub/Makefile | 15 +- src/stub/src/arch/m68k/bits.ash | 149 +++++----- src/stub/src/arch/m68k/nrv2e_d.ash | 107 +++---- src/stub/src/m68k-atari.tos.asm | 452 ++++++++++++++--------------- 4 files changed, 362 insertions(+), 361 deletions(-) diff --git a/src/stub/Makefile b/src/stub/Makefile index d67f3a66..61fbdfa0 100644 --- a/src/stub/Makefile +++ b/src/stub/Makefile @@ -608,14 +608,21 @@ i386-win32.pe.h : $(srcdir)/src/$$T.asm m68k-atari.tos-%.h : tc_list = m68k-atari.tos default tc.m68k-atari.tos.app-a68k = perl -w $(srcdir)/src/arch/m68k/app-a68k.pl -tc.m68k-atari.tos.asm-a68k = a68k +#tc.m68k-atari.tos.asm-a68k = a68k +tc.m68k-atari.tos.pp-asm = gcc -E -nostdinc -x assembler-with-cpp -Wall +tc.m68k-atari.tos.asm-a68k = m68k-unknown-linux-gnu-as --register-prefix-optional m68k-atari.tos-nrv%.h : $(srcdir)/src/m68k-atari.tos.asm # call gpp_inc to generate .d file $(call tc,gpp_inc) --mode=c --MMD=$@ --MF=tmp/$T.i.d $< -o /dev/null - $(call tc,pp-asm) -D__A68K__ $(PP_FLAGS) $< -o tmp/$T.i - $(call tc,asm-a68k) -q -ltmp/$T.o.lst tmp/$T.i -otmp/$T.o - $(call tc,o2bin) tmp/$T.o tmp/$T.bin 'UPX1' 'UPX9' + $(call tc,pp-asm) -D__GAS__ $(PP_FLAGS) $< -o tmp/$T.i +## $(call tc,asm-a68k) -q -ltmp/$T.o.lst tmp/$T.i -otmp/$T.o + $(call tc,asm-a68k) tmp/$T.i -o tmp/$T.bin +## $(call tc,o2bin) tmp/$T.o tmp/$T.bin 'UPX1' 'UPX9' + $(call tc,m-objcopy) --strip-unneeded tmp/$T.bin + $(call tc,m-objcopy) -R .text -R .data -R .bss tmp/$T.bin + $(call tc,m-objcopy) -R .note -R .comment tmp/$T.bin + $(call tc,m-objdump) -trwh tmp/$T.bin >> tmp/$T.bin $(call tc,bin2h) --ident=$(IDENT_PREFIX)loader$(IDENT_SUFFIX) tmp/$T.bin $@ m68k-atari.tos-nrv2b% : PP_FLAGS = -DNRV2B diff --git a/src/stub/src/arch/m68k/bits.ash b/src/stub/src/arch/m68k/bits.ash index 2ff63394..a6ef1d27 100644 --- a/src/stub/src/arch/m68k/bits.ash +++ b/src/stub/src/arch/m68k/bits.ash @@ -1,3 +1,4 @@ +/* ; bits.ash -- bit access for decompression ; ; This file is part of the UCL data compression library. @@ -24,15 +25,15 @@ ; ; http://www.oberhumer.com/opensource/ucl/ ; +*/ - -; ------------- ADDBITS ------------- +// ------------- ADDBITS ------------- macro(ADDBITS) #if (NRV_BB == 8) - add.b d0,d0 ; sets Z, C and X ; 4 + add.b d0,d0 // sets Z, C and X // 4 #elif (NRV_BB == 32) - add.l d0,d0 ; sets Z, C and X ; 6 + add.l d0,d0 // sets Z, C and X // 6 #endif endm @@ -40,95 +41,95 @@ macro(ADDBITS) #if 0 macro(ADDXBITS) #if (NRV_BB == 8) - addx.b d0,d0 ; sets C and X ; 4 + addx.b d0,d0 // sets C and X // 4 #elif (NRV_BB == 32) - addx.l d0,d0 ; sets C and X ; 8 + addx.l d0,d0 // sets C and X // 8 #endif endm #endif -; ------------- FILLBYTES_xx ------------- +// ------------- FILLBYTES_xx ------------- -; get 1 byte; then get 1 bit into both C and X +// get 1 byte// then get 1 bit into both C and X macro(FILLBYTES_8) - ; note: we shift the X flag through -> must init d0.b with $80 - move.b (a0)+,d0 ; 8 - addx.b d0,d0 ; sets C and X ; 4 + // note: we shift the X flag through -> must init d0.b with $80 + move.b (a0)+,d0 // 8 + addx.b d0,d0 // sets C and X // 4 endm -; get 32 bits in little endian format; then get 1 bit into both C and X +// get 32 bits in little endian format// then get 1 bit into both C and X macro(FILLBYTES_LE32) #if 0 - move.b (a0)+,d0 ; 8 - ror.l #8,d0 ; 24 - move.b (a0)+,d0 ; 8 - ror.l #8,d0 ; 24 - move.b (a0)+,d0 ; 8 - ror.l #8,d0 ; 24 - move.b (a0)+,d0 ; 8 - ror.l #8,d0 ; 24 - add.l d0,d0 ; sets C and X ; 6 - bset #0,d0 ; only changes Z ; 12 - ; ----- - ; 146 + move.b (a0)+,d0 // 8 + ror.l #8,d0 // 24 + move.b (a0)+,d0 // 8 + ror.l #8,d0 // 24 + move.b (a0)+,d0 // 8 + ror.l #8,d0 // 24 + move.b (a0)+,d0 // 8 + ror.l #8,d0 // 24 + add.l d0,d0 // sets C and X // 6 + bset #0,d0 // only changes Z // 12 + // ----- + // 146 #elif 1 - move.b 3(a0),d0 ; 12 - lsl.w #8,d0 ; 22 - move.b 2(a0),d0 ; 12 - swap d0 ; 4 - move.b 1(a0),d0 ; 12 - lsl.w #8,d0 ; 22 - move.b (a0),d0 ; 8 - addq.l #4,a0 ; does not affect flags ; 8 - add.l d0,d0 ; sets C and X ; 6 - bset #0,d0 ; only changes Z ; 12 - ; ----- - ; 118 + move.b 3(a0),d0 // 12 + lsl.w #8,d0 // 22 + move.b 2(a0),d0 // 12 + swap d0 // 4 + move.b 1(a0),d0 // 12 + lsl.w #8,d0 // 22 + move.b (a0),d0 // 8 + addq.l #4,a0 // does not affect flags // 8 + add.l d0,d0 // sets C and X // 6 + bset #0,d0 // only changes Z // 12 + // ----- + // 118 #elif 1 - ; note: we shift the X flag through -> must init d0.l with $80000000 - ; note: rol/ror do not change X flag (but asl/asr/lsl/lsr do) - move.b 3(a0),d0 ; 12 - ror.w #8,d0 ; 22 - move.b 2(a0),d0 ; 12 - swap d0 ; 4 - move.b 1(a0),d0 ; 12 - ror.w #8,d0 ; 22 - move.b (a0),d0 ; 8 - addq.l #4,a0 ; does not affect flags ; 8 - addx.l d0,d0 ; sets C and X ; 8 - ; ----- - ; 108 + // note: we shift the X flag through -> must init d0.l with $80000000 + // note: rol/ror do not change X flag (but asl/asr/lsl/lsr do) + move.b 3(a0),d0 // 12 + ror.w #8,d0 // 22 + move.b 2(a0),d0 // 12 + swap d0 // 4 + move.b 1(a0),d0 // 12 + ror.w #8,d0 // 22 + move.b (a0),d0 // 8 + addq.l #4,a0 // does not affect flags // 8 + addx.l d0,d0 // sets C and X // 8 + // ----- + // 108 #else - ; IMPORTANT: movep is not implemented on the 68060 + // IMPORTANT: movep is not implemented on the 68060 # error "do not use movep" - ; note: we shift the X flag through -> must init d0.l with $80000000 - ; note: must use dc.l because of a bug in the pasm assembler - ; note: may access past the end of the input; this is ok for UPX - dc.l $01080003 ; movep.w 3(a0),d0 ; 16 - move.b 2(a0),d0 ; 12 - swap d0 ; 4 - dc.l $01080001 ; movep.w 1(a0),d0 ; 16 - move.b (a0),d0 ; 8 - addq.l #4,a0 ; does not affect flags ; 8 - addx.l d0,d0 ; sets C and X ; 8 - ; ----- - ; 72 + // note: we shift the X flag through -> must init d0.l with $80000000 + // note: must use dc.l because of a bug in the pasm assembler + // note: may access past the end of the input// this is ok for UPX + dc.l $01080003 // movep.w 3(a0),d0 // 16 + move.b 2(a0),d0 // 12 + swap d0 // 4 + dc.l $01080001 // movep.w 1(a0),d0 // 16 + move.b (a0),d0 // 8 + addq.l #4,a0 // does not affect flags // 8 + addx.l d0,d0 // sets C and X // 8 + // ----- + // 72 #endif endm -; ------------- FILLBITS ------------- +// ------------- FILLBITS ------------- macro(FILLBITS) #if (NRV_BB == 8) - ; no need for a subroutine + // no need for a subroutine FILLBYTES_8 #elif (NRV_BB == 32) # ifdef SMALL # define FILLBYTES_SR FILLBYTES_LE32 - bsr fillbytes_sr ; 18 + bsr fillbytes_sr // 18 # else FILLBYTES_LE32 # endif @@ -136,24 +137,24 @@ macro(FILLBITS) endm -; ------------- GETBIT ------------- +// ------------- GETBIT ------------- -; get one bit into both the Carry and eXtended flag +// get one bit into both the Carry and eXtended flag macro(GETBIT) #if defined(__A68K__) - ADDBITS ; 4 / 6 - bne \@ ; 10 (if jump) + ADDBITS // 4 / 6 + bne \@ // 10 (if jump) FILLBITS \@: #elif defined(__ASL__) - ADDBITS ; 4 / 6 - bne done ; 10 (if jump) + ADDBITS // 4 / 6 + bne done // 10 (if jump) FILLBITS done: #else LOCAL done - ADDBITS ; 4 / 6 - bne done ; 10 (if jump) + ADDBITS // 4 / 6 + bne done // 10 (if jump) FILLBITS done: #endif @@ -161,5 +162,5 @@ done: -; vi:ts=8:et +// vi:ts=8:et diff --git a/src/stub/src/arch/m68k/nrv2e_d.ash b/src/stub/src/arch/m68k/nrv2e_d.ash index e346f5dd..1e189f49 100644 --- a/src/stub/src/arch/m68k/nrv2e_d.ash +++ b/src/stub/src/arch/m68k/nrv2e_d.ash @@ -1,3 +1,4 @@ +/* ; n2e_d.ash -- NRV2E decompression in 68000 assembly ; ; This file is part of the UCL data compression library. @@ -56,27 +57,27 @@ ; we have max_match = 65535, so we can use word arithmetics on d2 ; we have max_offset < 2**23, so we can use partial word arithmetics on d1 ; +*/ - -; ------------- constants & macros ------------- +// ------------- constants & macros ------------- #if !defined(NRV_NO_INIT) - ;;move.l #-$500,d6 ; 0xfffffb00 - moveq.l #-$50,d6 ; 0xffffffb0 - lsl.w #4,d6 ; << 4 + ////move.l #-0x500,d6 // 0xfffffb00 + moveq.l #-0x50,d6 // 0xffffffb0 + lsl.w #4,d6 // << 4 moveq.l #0,d7 - moveq.l #-1,d5 ; last_off = -1 + moveq.l #-1,d5 // last_off = -1 - ; init d0 with high bit set + // init d0 with high bit set #if (NRV_BB == 8) - ;;move.b #$80,d0 ; init d0.b for FILLBYTES - moveq.l #-128,d0 ; d0.b = $80 + ////move.b #0x80,d0 // init d0.b for FILLBYTES + moveq.l #-128,d0 // d0.b = 0x80 #elif (NRV_BB == 32) - ;;move.l #$80000000,d0 ; init d0.l for FILLBYTES + ////move.l #0x80000000,d0 // init d0.l for FILLBYTES moveq.l #1,d0 - ror.l #1,d0 ; d0.l = $80000000 + ror.l #1,d0 // d0.l = 0x80000000 #endif bra decompr_start @@ -88,12 +89,12 @@ #if defined(FILLBYTES_SR) fillbytes_sr: FILLBYTES_SR - rts ; 16 + rts // 16 #endif -; ------------- DECOMPRESSION ------------- +// ------------- DECOMPRESSION ------------- decompr_literal: @@ -102,16 +103,16 @@ decompr_literal: decompr_start: decompr_loop: #ifdef SMALL - ; cost literal: 4 + 10 + 10 - ; cost match: 4 + 10 + 8 - ; cost fillbits: 4 + 8 + // cost literal: 4 + 10 + 10 + // cost match: 4 + 10 + 8 + // cost fillbits: 4 + 8 GETBIT bcs decompr_literal #else - ; optimization: carry is clear -> we know that bits are available - ; cost literal: 4 + 8 + 10 - ; cost match: 4 + 10 - ; cost fillbits: 4 + 8 + 8 + // optimization: carry is clear -> we know that bits are available + // cost literal: 4 + 8 + 10 + // cost match: 4 + 10 + // cost fillbits: 4 + 8 + 8 ADDBITS bcc decompr_match bne decompr_literal @@ -128,16 +129,16 @@ decompr_l1: GETBIT addx.w d1,d1 #ifdef SMALL - ; cost loop continue: 4 + 10 + 8 - ; cost loop break: 4 + 10 + 10 - ; cost fillbits: 4 + 8 + // cost loop continue: 4 + 10 + 8 + // cost loop break: 4 + 10 + 10 + // cost fillbits: 4 + 8 GETBIT bcs decompr_break1 #else - ; optimization: carry is clear -> we know that bits are available - ; cost loop continue: 4 + 10 - ; cost loop break: 4 + 8 + 10 - ; cost fillbits: 4 + 8 + 8 + // optimization: carry is clear -> we know that bits are available + // cost loop continue: 4 + 10 + // cost loop break: 4 + 8 + 10 + // cost fillbits: 4 + 8 + 8 ADDBITS bcc L(continue) bne decompr_break1 @@ -152,7 +153,7 @@ L(continue): bra decompr_end decompr_break1: subq.w #3,d1 - bcs decompr_prev_dist ; last m_off + bcs decompr_prev_dist // last m_off lsl.l #8,d1 move.b (a0)+,d1 not.l d1 @@ -175,16 +176,16 @@ decompr_get_mlen2: decompr_l2: GETBIT addx.w d2,d2 #ifdef SMALL - ; cost loop continue: 4 + 10 + 10 - ; cost loop break: 4 + 10 + 8 - ; cost fillbits: 4 + 8 + // cost loop continue: 4 + 10 + 10 + // cost loop break: 4 + 10 + 8 + // cost fillbits: 4 + 8 GETBIT bcc decompr_l2 #else - ; optimization: carry is clear -> we know that bits are available - ; cost loop continue: 4 + 10 - ; cost loop break: 4 + 8 + 10 - ; cost fillbits: 4 + 8 + 8 + // optimization: carry is clear -> we know that bits are available + // cost loop continue: 4 + 10 + // cost loop break: 4 + 8 + 10 + // cost fillbits: 4 + 8 + 8 ADDBITS bcc decompr_l2 bne L(break) @@ -200,29 +201,29 @@ decompr_got_mlen: move.l d1,d5 lea 0(a1,d1.l),a3 - ; must use sub as cmp doesn't affect the X flag + // must use sub as cmp doesn't affect the X flag sub.l d6,d1 addx.w d7,d2 -; TODO: partly unroll this loop; could use some magic with d7 for address -; computations, then compute a nice `jmp yyy(pc,dx.w)' +// TODO: partly unroll this loop// could use some magic with d7 for address +// computations, then compute a nice `jmp yyy(pc,dx.w)' #if 1 - ; cost for any m_len: 12 + 22 * (m_len - 1) + 4 - ; 38, 60, 82, 104, 126, 148, 170, 192, 214, 236 - move.b (a3)+,(a1)+ ; 12 -L(copy): move.b (a3)+,(a1)+ ; 12 - dbra d2,L(copy) ; 10 / 14 + // cost for any m_len: 12 + 22 * (m_len - 1) + 4 + // 38, 60, 82, 104, 126, 148, 170, 192, 214, 236 + move.b (a3)+,(a1)+ // 12 +L(copy): move.b (a3)+,(a1)+ // 12 + dbra d2,L(copy) // 10 / 14 #else - ; cost for even m_len: 18 + 34 * (m_len / 2) + 4 - ; cost for odd m_len: 28 + 34 * (m_len / 2) + 4 - ; 56, 66, 90, 100, 124, 134, 158, 168, 192, 202 - lsr.w #1,d2 ; 8 - bcc L(copy) ; 10 / 8 - move.b (a3)+,(a1)+ ; 12 -L(copy): move.b (a3)+,(a1)+ ; 12 - move.b (a3)+,(a1)+ ; 12 - dbra d2,L(copy) ; 10 / 14 + // cost for even m_len: 18 + 34 * (m_len / 2) + 4 + // cost for odd m_len: 28 + 34 * (m_len / 2) + 4 + // 56, 66, 90, 100, 124, 134, 158, 168, 192, 202 + lsr.w #1,d2 // 8 + bcc L(copy) // 10 / 8 + move.b (a3)+,(a1)+ // 12 +L(copy): move.b (a3)+,(a1)+ // 12 + move.b (a3)+,(a1)+ // 12 + dbra d2,L(copy) // 10 / 14 #endif bra decompr_loop @@ -232,5 +233,5 @@ L(copy): move.b (a3)+,(a1)+ ; 12 decompr_end: -; vi:ts=8:et +// vi:ts=8:et diff --git a/src/stub/src/m68k-atari.tos.asm b/src/stub/src/m68k-atari.tos.asm index 37819f8d..c3cd91a8 100644 --- a/src/stub/src/m68k-atari.tos.asm +++ b/src/stub/src/m68k-atari.tos.asm @@ -1,3 +1,4 @@ +/* ; l_tos.s -- loader & decompressor for the atari/tos format ; ; This file is part of the UPX executable compressor. @@ -24,12 +25,12 @@ ; Markus F.X.J. Oberhumer Laszlo Molnar ; ; - +*/ #define NRV_BB 8 -#include "../../version.h" +/* ; ; see also: ; freemint/sys/mint/basepage.h @@ -44,30 +45,16 @@ ; by a simple perl script. We also maintain compatiblity with the pasm ; assembler (which must be started in the emulator window). ; +*/ +#define L(label) .L##label +#define macro(name) .macro name +#define endm .endm +#define section .section -#if defined(__A68K__) -# define align4 align 0,4 -# define L(label) \/**/label -# define macro(name) name macro -# define text section code -#elif defined(__ASL__) -# define align4 align 4 -# define L(label) $$/**/label -# define macro(name) name macro -# define text section code -#else -# define align4 align 4 -# define L(label) ./**/label -# define macro(name) macro name -#endif - -; defines needed for including ident_[ns].ash -#define db dc.b -#define dw dc.w -#define dd dc.l - +.altmacro +/* ; basepage offsets p_lowtpa equ $0 ; .l pointer to self (bottom of TPA) p_hitpa equ $4 ; .l pointer to top of TPA + 1 @@ -81,7 +68,11 @@ p_dta equ $20 ; .l pointer to current DTA p_parent equ $24 ; .l pointer to parent's basepage p_flags equ $28 ; .l memory usage flags p_env equ $2c ; .l pointer to environment string +*/ +p_tbase = 8 + +/* ; ; long living registers: ; d4 p_tbase - start of text segment @@ -91,12 +82,13 @@ p_env equ $2c ; .l pointer to environment string ; - start of dirty bss ; ASTACK (a7) - final startup code copied below stack ; +*/ +/************************************************************************* +// flush cache macros +**************************************************************************/ -; /************************************************************************* -; // flush cache macros -; **************************************************************************/ - +/* ; note: ; GEMDOS/XBIOS trashes d0, d1, d2, a0, a1, a2 @@ -108,71 +100,72 @@ p_env equ $2c ; .l pointer to environment string ; ; Note that on a 68060 FreeMiNT just uses `cpusha bc' in all cases, ; so we don't bother passing base and length. (info: base would be d4) +*/ macro(MINT_FLUSH_CACHE) - pea -1 ; length - clr.l -(sp) ; base + pea -1 // length + clr.l -(sp) // base #if 0 - move.w #$0016,-(sp) ; S_FLUSHCACHE (22) - move.w #$0154,-(sp) ; Ssystem (340) + move.w #0x016,-(sp) // S_FLUSHCACHE (22) + move.w #0x154,-(sp) // Ssystem (340) #else - move.l #$01540016,-(sp) + move.l #0x01540016,-(sp) #endif - trap #1 ; GEMDOS + trap #1 // GEMDOS lea 12(sp),sp endm -; First try `cpusha bc' (68040/68060). If that fails try temporary changing -; the cache control register (68030). +// First try `cpusha bc' (68040/68060). If that fails try temporary changing +// the cache control register (68030). macro(SUPEXEC_FLUSH_CACHE) - pea \@super(pc) - move.w #$0026,-(sp) ; Supexec (38) - trap #14 ; XBIOS + pea super(pc) + move.w #0x0026,-(sp) // Supexec (38) + trap #14 // XBIOS addq.l #6,sp - bra \@done + bra done -; exception handler -\@exception: move.l a1,sp ; restore stack (SSP) - jmp (a0) ; and continue +// exception handler +exception: move.l a1,sp // restore stack (SSP) + jmp (a0) // and continue -\@super: move.l ($10),-(sp) - move.l ($2c),-(sp) - move.l ($f4),-(sp) - move.l sp,a1 ; save stack pointer (SSP) +super: move.l (0x10),-(sp) + move.l (0x2c),-(sp) + move.l (0xf4),-(sp) + move.l sp,a1 // save stack pointer (SSP) - ; set exception vectors - lea \@exception(pc),a0 - move.l a0,($10) - move.l a0,($2c) - move.l a0,($f4) - nop ; flush write pipeline + // set exception vectors + lea exception(pc),a0 + move.l a0,(0x10) + move.l a0,(0x2c) + move.l a0,(0xf4) + nop // flush write pipeline - ; try 68040 / 68060 - lea \@1(pc),a0 - dc.w $f4f8 ; cpusha bc - bra \@ret -\@1: - ; try 68030 - lea \@2(pc),a0 - dc.l $4e7a0002 ; movec.l cacr,d0 + // try 68040 / 68060 + lea 1(pc),a0 + dc.w 0xf4f8 // cpusha bc + bra ret +1: + // try 68030 + lea 2(pc),a0 + movec.l cacr,d0 move.l d0,d1 - or.w #$0808,d1 - dc.l $4e7b1002 ; movec.l d1,cacr - dc.l $4e7b0002 ; movec.l d0,cacr -;;; bra \@ret -\@2: + or.w #0x0808,d1 + movec.l d1,cacr + movec.l d0,cacr +//;; bra \@ret +2: -\@ret: move.l (sp)+,($f4) - move.l (sp)+,($2c) - move.l (sp)+,($10) - nop ; flush write pipeline +ret: move.l (sp)+,(0xf4) + move.l (sp)+,(0x2c) + move.l (sp)+,(0x10) + nop // flush write pipeline rts -\@done: +done: endm @@ -180,9 +173,9 @@ macro(SUPEXEC_FLUSH_CACHE) macro(BOTH_FLUSH_CACHE) MINT_FLUSH_CACHE tst.l d0 - beq \@done + beq done2 SUPEXEC_FLUSH_CACHE -\@done: +done2: endm @@ -198,70 +191,76 @@ macro(BOTH_FLUSH_CACHE) #endif -; /************************************************************************* -; // entry - the text segment of a compressed executable -; // -; // note: compressed programs never have the F_SHTEXT flag set, -; // so we can assume that the text, data & bss segments -; // are contiguous in memory -; **************************************************************************/ + +/************************************************************************* +// entry - the text segment of a compressed executable +// +// note: compressed programs never have the F_SHTEXT flag set, +// so we can assume that the text, data & bss segments +// are contiguous in memory +**************************************************************************/ #if defined(__ASL__) padding off #endif - text - dc.b 'UPX1' ; marker for o2bin.pl + +section tos0 + //text + //dc.b 'UPX1' // marker for o2bin.pl start: - move.l a0,d0 ; a0 is basepage if accessory + move.l a0,d0 // a0 is basepage if accessory beq L(l_app) - move.l 4(a0),sp ; accessory - get stack + move.l 4(a0),sp // accessory - get stack bra L(start) -L(l_app): move.l 4(sp),d0 ; application - get basepage +L(l_app): move.l 4(sp),d0 // application - get basepage L(start): movem.l d1-d7/a0-a6,-(sp) -; ------------- restore original basepage +// ------------- restore original basepage - ; we also setup d4 and a6 here, and we prepare a4 + // we also setup d4 and a6 here, and we prepare a4 - move.l d0,a2 ; a2 = basepage + move.l d0,a2 // a2 = basepage addq.l #p_tbase,a2 move.l (a2)+,a6 - move.l a6,d4 ; d4 = p_tbase - move.l #'up11',(a2) ; p_tlen + move.l a6,d4 // d4 = p_tbase + move.l up11,(a2) // p_tlen add.l (a2)+,a6 - move.l a6,(a2)+ ; p_dbase - move.l #'up12',(a2) ; p_dlen - add.l (a2)+,a6 ; a6 = decompressed p_bbase - move.l (a2),a4 ; a4 = compressed p_bbase - move.l a6,(a2)+ ; p_bbase - move.l #'up13',(a2) ; p_blen + move.l a6,(a2)+ // p_dbase + move.l up12,(a2) // p_dlen + add.l (a2)+,a6 // a6 = decompressed p_bbase + move.l (a2),a4 // a4 = compressed p_bbase + move.l a6,(a2)+ // p_bbase + move.l up13,(a2) // p_blen -; ------------- copy data segment (from a4 to a3, downwards) +// ------------- copy data segment (from a4 to a3, downwards) - ; a4 (top of compressed data) already initialized above + // a4 (top of compressed data) already initialized above move.l d4,a3 - add.l #'up21',a3 ; top of data segment + offset + add.l up21,a3 // top of data segment + offset #if defined(SMALL) - move.l #'up22',d0 ; (len / 4) + move.l up22,d0 // (len / 4) - ; copy 4 bytes per loop + // copy 4 bytes per loop L(loop): move.l -(a4),-(a3) - ;;subq.l #1,d0 - dc.b 'u1' ; subq.l #1,d0 / subq.w #1,d0 +section subql_1d0 + subq.l #1,d0 +section subqw_1d0 + subq.w #1,d0 +section s_bneloop0 bne L(loop) #else - move.l #'up22',d0 ; (len / 160) + move.l up22,d0 // (len / 160) - ; loop1 - use 10 registers to copy 4*10*4 = 160 bytes per loop + // loop1 - use 10 registers to copy 4*10*4 = 160 bytes per loop L(loop1): lea.l -160(a4),a4 movem.l 120(a4),d1-d3/d5-d7/a0-a2/a5 @@ -272,125 +271,139 @@ L(loop1): movem.l d1-d3/d5-d7/a0-a2/a5,-(a3) movem.l (a4),d1-d3/d5-d7/a0-a2/a5 movem.l d1-d3/d5-d7/a0-a2/a5,-(a3) - ;;subq.l #1,d0 - dc.b 'u1' ; subq.l #1,d0 / subq.w #1,d0 +section subql_1d0 + subq.l #1,d0 +section subqw_1d0 + subq.w #1,d0 +section s_bneloop0 bne L(loop1) - ; loop2 - copy the remaining 4..160 bytes - ;;moveq.l #xx,d0 ; ((len % 160) / 4) - 1 - dc.b 'u2' ; moveq.l #xx,d0 + // loop2 - copy the remaining 4..160 bytes + //;moveq.l #xx,d0 ; ((len % 160) / 4) - 1 +#if 0 + dc.b 'u2' // moveq.l #xx,d0 +#else + moveq.l #copy_remain,d0 +#endif L(loop2): move.l -(a4),-(a3) dbra d0,L(loop2) #endif - ; a3 now points to the start of the compressed block + // a3 now points to the start of the compressed block -; ------------- copy code to stack and setup ASTACK +// ------------- copy code to stack and setup ASTACK -; Copy the final startup code below the stack. This will get -; called via "jmp (ASTACK)" after decompression and relocation. +// Copy the final startup code below the stack. This will get +// called via "jmp (ASTACK)" after decompression and relocation. copy_to_stack: lea.l clear_bss_end(pc),a2 - move.l d4,-(ASTACK) ; entry point for final jmp + move.l d4,-(ASTACK) // entry point for final jmp - moveq.l #((clear_bss_end-clear_bss)/2-1),d5 -L(loop): move.w -(a2),-(ASTACK) +// moveq.l #((clear_bss_end-clear_bss)/2-1),d5 + moveq.l #copy_to_stack_len,d5 +L(loop6): move.w -(a2),-(ASTACK) subq.l #1,d5 - bcc L(loop) + bcc L(loop6) #ifdef FLUSH_CACHE - ; patch code: on the stack, the `rts' becomes a `nop' - move.w #$4e71,flush_cache_rts-clear_bss(ASTACK) + // patch code: on the stack, the `rts' becomes a `nop' + move.w #0x4e71,flush_cache_rts-clear_bss(ASTACK) #endif - ; note: d5.l is now -1 (needed for decompressor) + // note: d5.l is now -1 (needed for decompressor) -; ------------- +// ------------- #ifdef FLUSH_CACHE bsr flush_cache #endif -; ------------- prepare decompressor +// ------------- prepare decompressor - ; a3 still points to the start of the compressed block - move.l d4,a4 ; dest. for decompressing + // a3 still points to the start of the compressed block + move.l d4,a4 // dest. for decompressing #define NRV_NO_INIT - ;;moveq.l #-1,d5 ; last_off = -1 - moveq.l #-128,d0 ; d0.b = $80 + //;moveq.l #-1,d5 ; last_off = -1 + moveq.l #-128,d0 // d0.b = $80 #if defined(NRV2B) moveq.l #-1,d7 - moveq.l #-$68,d6 ; 0xffffff98 - lsl.w #5,d6 ; 0xfffff300 == -0xd00 + moveq.l #-0x68,d6 // 0xffffff98 + lsl.w #5,d6 // 0xfffff300 == -0xd00 #elif defined(NRV2D) moveq.l #-1,d7 - moveq.l #-$50,d6 ; 0xffffffb0 - lsl.w #4,d6 ; 0xfffffb00 == -0x500 + moveq.l #-0x50,d6 // 0xffffffb0 + lsl.w #4,d6 // 0xfffffb00 == -0x500 #elif defined(NRV2E) moveq.l #0,d7 - moveq.l #-$50,d6 ; 0xffffffb0 - lsl.w #4,d6 ; 0xfffffb00 == -0x500 + moveq.l #-0x50,d6 // 0xffffffb0 + lsl.w #4,d6 // 0xfffffb00 == -0x500 #else # error #endif -; ------------- jump to copied decompressor +// ------------- jump to copied decompressor move.l d4,a2 - add.l #'up31',a2 - jmp (a2) ; jmp decompr_start + add.l #up31,a2 + jmp (a2) // jmp decompr_start -; /************************************************************************* -; // this is the final part of the startup code which runs in the stack -; **************************************************************************/ +/************************************************************************* +// this is the final part of the startup code which runs in the stack +**************************************************************************/ -; ------------- clear dirty bss +// ------------- clear dirty bss clear_bss: - ; on entry: - ; ASTACK == pc == clear_bss (on stack) - ; a6 start of dirty bss [long living register] - ; d6.l number of clr loops - ; d3.l 0 + // on entry: + // ASTACK == pc == clear_bss (on stack) + // a6 start of dirty bss [long living register] + // d6.l number of clr loops + // d3.l 0 #if defined(SMALL) -L(loop): move.l d3,(a6)+ - ;;subq.l #1,d6 - dc.b 'u4' ; subq.l #1,d6 / subq.w #1,d6 - bne L(loop) +L(loop3): move.l d3,(a6)+ +section subql_1d6 + subq.l #1,d6 +section subqw_1d6 + subq.w #1,d6 +section s_bneloop3 + bne L(loop3) #else - ; the dirty bss is usually not too large, so we don't - ; bother making movem optimizations here -L(loop): move.l d3,(a6)+ + // the dirty bss is usually not too large, so we don't + // bother making movem optimizations here +L(loop3): move.l d3,(a6)+ move.l d3,(a6)+ move.l d3,(a6)+ move.l d3,(a6)+ - ;;subq.l #1,d6 - dc.b 'u4' ; subq.l #1,d6 / subq.w #1,d6 - bne L(loop) +section subql_1d6 + subq.l #1,d6 +section subqw_1d6 + subq.w #1,d6 +section s_bneloop3 + bne L(loop3) #endif -; ------------- flush the cache +// ------------- flush the cache #ifdef FLUSH_CACHE -; info: -; This is also called as a subroutine (before decompression, NOT running -; in the stack). When running in the stack the `rts' is replaced by a `nop'. +// info: +// This is also called as a subroutine (before decompression, NOT running +// in the stack). When running in the stack the `rts' is replaced by a `nop'. flush_cache: FLUSH_CACHE @@ -400,23 +413,23 @@ flush_cache_rts: #endif -; ------------- restore ASTACK +// ------------- restore ASTACK - lea clear_bss_end-clear_bss+4(ASTACK),sp + lea clear_bss_size+4(ASTACK),sp - ;; assert sp == clear_bss_end(pc)+4 + //; assert sp == clear_bss_end(pc)+4 -; ------------- clear the dirty stack +// ------------- clear the dirty stack #if 0 -; better don't do this - we are currently running in the stack -; and don't want to make yet another instruction-cache-line dirty +// better don't do this - we are currently running in the stack +// and don't want to make yet another instruction-cache-line dirty clear_dirty_stack: - ; clear down to clear_bss(pc) + 32 extra longs + // clear down to clear_bss(pc) + 32 extra longs moveq.l #((L(loop)-clear_bss+3)/4+32-1),d0 lea L(loop)(pc),a0 L(loop): move.l d3,-(a0) @@ -425,48 +438,46 @@ L(loop): move.l d3,-(a0) #endif -; ------------- start program +// ------------- start program movem.l (sp)+,d1-d7/a0-a6 move.l a0,d0 - beq L(l_app) - sub.l sp,sp ; accessory: no stack -L(l_app): dc.w $4ef9 ; jmp $xxxxxxxx - jmp to text segment + beq L(l_app1) + sub.l sp,sp // accessory: no stack +L(l_app1): dc.w 0x4ef9 // jmp $xxxxxxxx - jmp to text segment clear_bss_end: -; /************************************************************************* -; // UPX ident & packheader -; **************************************************************************/ +/************************************************************************* +// UPX ident & packheader +**************************************************************************/ +#if 0 #if defined(SMALL) # include "include/ident_s.ash" #else # include "include/ident_n.ash" #endif +#endif - align4 +// align4 - ; 32 bytes - #include "header.ash" - dc.b 85,80,88,33 ; UPX_MAGIC_LE32 - dc.b 161,216,208,213 ; UPX_MAGIC2_LE32 - dc.l 0,0,0,0,0 - dc.b 0,0,0,45 +#include "include/header2.ash" - ; end of text segment - size is a multiple of 4 + // end of text segment - size is a multiple of 4 -; /************************************************************************* -; // This part is appended after the compressed data. -; // It runs in the last part of the dirty bss (after the -; // relocations and the original fileheader). -; **************************************************************************/ +/************************************************************************* +// This part is appended after the compressed data. +// It runs in the last part of the dirty bss (after the +// relocations and the original fileheader). +**************************************************************************/ cutpoint: -; ------------- decompress (from a3 to a4) +// ------------- decompress (from a3 to a4) #define a0 A3 #define a1 A4 @@ -474,9 +485,9 @@ cutpoint: #define d2 D3 #if defined(NRV2B) -# include "arch/m68k/nrv2b_d.ash" +//# include "arch/m68k/nrv2b_d.ash" #elif defined(NRV2D) -# include "arch/m68k/nrv2d_d.ash" +//# include "arch/m68k/nrv2d_d.ash" #elif defined(NRV2E) # include "arch/m68k/nrv2e_d.ash" #else @@ -488,71 +499,52 @@ cutpoint: #undef a3 #undef d2 - ; note: d3.l is 0 from decompressor above + // note: d3.l is 0 from decompressor above -; ------------- prepare d6 for clearing the dirty bss +// ------------- prepare d6 for clearing the dirty bss #if defined(SMALL) - move.l #'up41',d6 ; dirty_bss / 4 + move.l #up41,d6 // dirty_bss / 4 #else - move.l #'up41',d6 ; dirty_bss / 16 + move.l #up41,d6 // dirty_bss / 16 #endif -; ------------- test if we need to reloc +section reloc - dc.b 'u3' ; moveq.l #1,d5 / jmp (ASTACK) + moveq.l #1,d5 +// The decompressed relocations now are just after the decompressed +// data segment, i.e. at the beginning of the (dirty) bss. -; ------------- reloc + // note: d3.l is still 0 -reloc: - -; The decompressed relocations now are just after the decompressed -; data segment, i.e. at the beginning of the (dirty) bss. - - ; note: d3.l is still 0 - - move.l a6,a0 ; a0 = start of relocations + move.l a6,a0 // a0 = start of relocations move.l d4,a1 - add.l (a0)+,a1 ; get initial fixup + add.l (a0)+,a1 // get initial fixup -L(loop1): add.l d3,a1 ; increase fixup - add.l d4,(a1) ; reloc one address -L(loop2): move.b (a0)+,d3 +L(loopx1): add.l d3,a1 // increase fixup + add.l d4,(a1) // reloc one address +L(loopx2): move.b (a0)+,d3 beq reloc_end - cmp.b d5,d3 ; note: d5.b is #1 from above - bne L(loop1) - lea 254(a1),a1 ; d3 == 1 -> add 254, don't reloc - bra L(loop2) + cmp.b d5,d3 // note: d5.b is #1 from above + bne L(loopx1) + lea 254(a1),a1 // d3 == 1 -> add 254, don't reloc + bra L(loopx2) reloc_end: -; ------------- clear dirty bss & start program +// ------------- clear dirty bss & start program -; We are currently running in the dirty bss. -; Jump to the code we copied below the stack. +// We are currently running in the dirty bss. +// Jump to the code we copied below the stack. - ; note: d3.l is still 0 + // note: d3.l is still 0 - jmp (ASTACK) ; jmp clear_bss (on stack) + jmp (ASTACK) // jmp clear_bss (on stack) - - -eof: - dc.w cutpoint-start ; size of entry - dc.w eof-cutpoint ; size of decompressor - dc.w decompr_start-cutpoint ; offset of decompressor start - dc.b 'UPX9' ; marker for o2bin.pl - -#if defined(__ASL__) - endsection code -#endif - end - - -; vi:ts=8:et:nowrap +// vi:ts=8:et:nowrap