1
0
mirror of https://github.com/upx/upx synced 2025-09-28 19:06:07 +08:00

Support for 64-bit AMD x86_64

Makefile
Added Files:
	a_lx_elf64.c amd_bxx.S amd_d_nrv2e.S amd_regs.h
	fold_elf64amd.S l_lx_elf64amd.S l_lx_elf64amd.lds

committer: jreiser <jreiser> 1131566835 +0000
This commit is contained in:
John Reiser 2005-11-09 20:07:15 +00:00
parent 414a6c1c9e
commit 023c06e178
8 changed files with 907 additions and 0 deletions

View File

@ -45,6 +45,7 @@ STUBS = \
l_lx_sh86.h fold_sh86.h \
l_lx_pti86.h fold_pti86.h \
l_lx_elfppc32.h fold_elfppc32.h \
l_lx_elf64amd.h fold_elf64amd.h \
l_mac_ppc32.h fold_machppc32.h \
l_vmlinz.h l_vmlinx.h \
l_armpe.h
@ -178,6 +179,21 @@ ifneq ($(wildcard $d),)
OBJCOPY_PPC32 := $d/powerpc-750-linux-gnu-objcopy
endif
###
### AMD x86_64
###
GCC_AMD64 := false
LD_AMD64 := false
OBJCOPY_AMD64 := $false
OBJSTRIP_AMD64 = $(OBJCOPY_AMD64) -R .comment -R .note
d = /home2/crosstool/gcc-4.0.1-glibc-2.3.5/x86_64-unknown-linux-gnu/bin
ifneq ($(wildcard $d),)
GCC_AMD64 := $d/x86_64-unknown-linux-gnu-gcc -m64 -nostdinc -MMD
LD_AMD64 := $d/x86_64-unknown-linux-gnu-ld
OBJCOPY_AMD64 := $d/x86_64-unknown-linux-gnu-objcopy
endif
###
### ARM-PE-WINCE
@ -380,6 +396,11 @@ l_lx_elfppc32.h: l_lx_elfppc32.S ppc_d_nrv2e.S
$(LD_PPC32) -o $T.bin --oformat binary $T.o
$(BIN2H) $T.bin linux_elfppc32_loader $@
l_lx_elf64amd.h: l_lx_elf64amd.S amd_d_nrv2e.S
$(GCC_AMD64) -c $T.S
$(LD_AMD64) -o $T.bin --oformat binary $T.o
$(BIN2H) $T.bin linux_elf64amd_loader $@
l_lx_elf.o: l_lx_elf.c linux.hh
$(CC_LINUX_I386) -c $<
$(OBJSTRIP_LINUX_I386) $@
@ -415,15 +436,27 @@ m_lx_elfppc32.o: m_lx_elfppc32.c
$(GCC_PPC32) -Os -c $<
$(OBJSTRIP_PPC32) $@
a_lx_elf64amd.o: a_lx_elf64.c
$(GCC_AMD64) -c -Os -o $@ $<
fold_elfppc32.o: fold_elfppc32.S ppc_bxx.S
$(GCC_PPC32) -c $<
$(OBJSTRIP_PPC32) $@
fold_elf64amd.o: fold_elf64amd.S amd_bxx.S
$(GCC_AMD64) -c -O $T.S
fold_elfppc32.h: m_lx_elfppc32.o fold_elfppc32.o l_lx_elfppc32.lds
$(LD_PPC32) -T $(srcdir)/l_lx_elfppc32.lds -Map $T.map -o $T.bin --strip-all \
fold_elfppc32.o m_lx_elfppc32.o
$(BIN2H) $T.bin linux_elfppc32_fold $@
fold_elf64amd.h: a_lx_elf64amd.o fold_elf64amd.o l_lx_elf64amd.lds amd_bxx.S
$(LD_AMD64) -T $(srcdir)/l_lx_elf64amd.lds -Map $T.map -o $T.bin --strip-all \
fold_elf64amd.o a_lx_elf64amd.o
$(STRIPELF_LINUX_I386) $T.bin
$(BIN2H) $T.bin linux_elf64amd_fold $@
fold_exec86.o: fold_exec86.asm
$(NASM) -f elf -o $@ $<
$(OBJSTRIP_LINUX_I386) $@

366
src/stub/a_lx_elf64.c Normal file
View File

@ -0,0 +1,366 @@
/* a_lx_elf64.c -- stub loader for Linux 64-bit ELF executable
This file is part of the UPX executable compressor.
Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 1996-2004 Laszlo Molnar
Copyright (C) 2000-2005 John F. Reiser
All Rights Reserved.
UPX and the UCL library are free software; you can redistribute them
and/or modify them under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING.
If not, write to the Free Software Foundation, Inc.,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
Markus F.X.J. Oberhumer Laszlo Molnar
<mfx@users.sourceforge.net> <ml1050@users.sourceforge.net>
John F. Reiser
<jreiser@users.sourceforge.net>
*/
#include "linux.hh"
extern void exit(int);
/*************************************************************************
// configuration section
**************************************************************************/
// In order to make it much easier to move this code at runtime and execute
// it at an address different from it load address: there must be no
// static data, and no string constants.
#define MAX_ELF_HDR 1024 // Elf64_Ehdr + n*Elf64_Phdr must fit in this
/*************************************************************************
// "file" util
**************************************************************************/
typedef struct {
size_t size; // must be first to match size[0] uncompressed size
char *buf;
} Extent;
static void
xread(Extent *x, char *buf, size_t count)
{
char *p=x->buf, *q=buf;
size_t j;
if (x->size < count) {
exit(127);
}
for (j = count; 0!=j--; ++p, ++q) {
*q = *p;
}
x->buf += count;
x->size -= count;
}
/*************************************************************************
// util
**************************************************************************/
#if 1 //{ save space
#define ERR_LAB error: exit(127);
#define err_exit(a) goto error
#else //}{ save debugging time
#define ERR_LAB
static void
err_exit(int a)
{
(void)a; // debugging convenience
exit(127);
}
#endif //}
/*************************************************************************
// UPX & NRV stuff
**************************************************************************/
typedef void f_unfilter(
nrv_byte *, // also addvalue
nrv_uint,
unsigned cto8, // junk in high 24 bits
unsigned ftid
);
typedef int f_expand(
const nrv_byte *, nrv_uint,
nrv_byte *, nrv_uint *, unsigned );
static void
unpackExtent(
Extent *const xi, // input
Extent *const xo, // output
f_expand *const f_decompress,
f_unfilter *f_unf
)
{
while (xo->size) {
struct b_info h;
// Note: if h.sz_unc == h.sz_cpr then the block was not
// compressible and is stored in its uncompressed form.
// Read and check block sizes.
xread(xi, (char *)&h, sizeof(h));
if (h.sz_unc == 0) { // uncompressed size 0 -> EOF
if (h.sz_cpr != UPX_MAGIC_LE32) // h.sz_cpr must be h->magic
err_exit(2);
if (xi->size != 0) // all bytes must be written
err_exit(3);
break;
}
if (h.sz_cpr <= 0) {
err_exit(4);
ERR_LAB
}
if (h.sz_cpr > h.sz_unc
|| h.sz_unc > xo->size ) {
err_exit(5);
}
// Now we have:
// assert(h.sz_cpr <= h.sz_unc);
// assert(h.sz_unc > 0 && h.sz_unc <= blocksize);
// assert(h.sz_cpr > 0 && h.sz_cpr <= blocksize);
if (h.sz_cpr < h.sz_unc) { // Decompress block
nrv_uint out_len;
int const j = (*f_decompress)((unsigned char *)xi->buf, h.sz_cpr,
(unsigned char *)xo->buf, &out_len, h.b_method );
if (j != 0 || out_len != (nrv_uint)h.sz_unc)
err_exit(7);
// Skip Ehdr+Phdrs: separate 1st block, not filtered
if (h.b_ftid!=0 && f_unf // have filter
&& ((512 < out_len) // this block is longer than Ehdr+Phdrs
|| (xo->size==(unsigned)h.sz_unc) ) // block is last in Extent
) {
(*f_unf)((unsigned char *)xo->buf, out_len, h.b_cto8, h.b_ftid);
}
xi->buf += h.sz_cpr;
xi->size -= h.sz_cpr;
}
else { // copy literal block
xread(xi, xo->buf, h.sz_cpr);
}
xo->buf += h.sz_unc;
xo->size -= h.sz_unc;
}
}
// Create (or find) an escape hatch to use when munmapping ourselves the stub.
// Called by do_xmap to create it, and by assembler code to find it.
static void *
make_hatch(Elf64_Phdr const *const phdr)
{
return 0;
}
static void
upx_bzero(char *p, size_t len)
{
if (len) do {
*p++= 0;
} while (--len);
}
#define bzero upx_bzero
static void
auxv_up(Elf64_auxv_t *av, unsigned const type, uint64_t const value)
{
if (av && 0==(1&(uint64_t)av)) /* PT_INTERP usually inhibits, except for hatch */
for (;; ++av) {
if (av->a_type==type || (av->a_type==AT_IGNORE && type!=AT_NULL)) {
av->a_type = type;
av->a_un.a_val = value;
return;
}
}
}
// The PF_* and PROT_* bits are {1,2,4}; the conversion table fits in 32 bits.
#define REP8(x) \
((x)|((x)<<4)|((x)<<8)|((x)<<12)|((x)<<16)|((x)<<20)|((x)<<24)|((x)<<28))
#define EXP8(y) \
((1&(y)) ? 0xf0f0f0f0 : (2&(y)) ? 0xff00ff00 : (4&(y)) ? 0xffff0000 : 0)
#define PF_TO_PROT(pf) \
((PROT_READ|PROT_WRITE|PROT_EXEC) & ( \
( (REP8(PROT_EXEC ) & EXP8(PF_X)) \
|(REP8(PROT_READ ) & EXP8(PF_R)) \
|(REP8(PROT_WRITE) & EXP8(PF_W)) \
) >> ((pf & (PF_R|PF_W|PF_X))<<2) ))
// Find convex hull of PT_LOAD (the minimal interval which covers all PT_LOAD),
// and mmap that much, to be sure that a kernel using exec-shield-randomize
// won't place the first piece in a way that leaves no room for the rest.
static unsigned long // returns relocation constant
xfind_pages(unsigned mflags, Elf64_Phdr const *phdr, int phnum,
char **const p_brk
)
{
size_t lo= ~0, hi= 0, szlo= 0;
char *addr;
mflags += MAP_PRIVATE | MAP_ANONYMOUS; // '+' can optimize better than '|'
for (; --phnum>=0; ++phdr) if (PT_LOAD==phdr->p_type) {
if (phdr->p_vaddr < lo) {
lo = phdr->p_vaddr;
szlo = phdr->p_filesz;
}
if (hi < (phdr->p_memsz + phdr->p_vaddr)) {
hi = phdr->p_memsz + phdr->p_vaddr;
}
}
szlo += ~PAGE_MASK & lo; // page fragment on lo edge
lo -= ~PAGE_MASK & lo; // round down to page boundary
hi = PAGE_MASK & (hi - lo - PAGE_MASK -1); // page length
szlo = PAGE_MASK & (szlo - PAGE_MASK -1); // page length
addr = mmap((void *)lo, hi, PROT_READ|PROT_WRITE|PROT_EXEC, mflags, 0, 0);
*p_brk = hi + addr; // the logical value of brk(0)
munmap(szlo + addr, hi - szlo); // desirable if PT_LOAD non-contiguous
return (unsigned long)addr - lo;
}
static Elf64_Addr // entry address
do_xmap(
Elf64_Ehdr const *const ehdr,
Extent *const xi,
int const fdi,
Elf64_auxv_t *const av,
f_expand *const f_decompress,
f_unfilter *const f_unf
)
{
Elf64_Phdr const *phdr = (Elf64_Phdr const *) (ehdr->e_phoff +
(char const *)ehdr);
char *v_brk;
unsigned long const reloc = xfind_pages(
((ET_DYN!=ehdr->e_type) ? MAP_FIXED : 0), phdr, ehdr->e_phnum, &v_brk);
int j;
for (j=0; j < ehdr->e_phnum; ++phdr, ++j)
if (xi && PT_PHDR==phdr->p_type) {
auxv_up(av, AT_PHDR, phdr->p_vaddr + reloc);
} else
if (PT_LOAD==phdr->p_type) {
unsigned const prot = PF_TO_PROT(phdr->p_flags);
Extent xo;
size_t mlen = xo.size = phdr->p_filesz;
char *addr = xo.buf = (char *)phdr->p_vaddr;
char *haddr = phdr->p_memsz + addr;
size_t frag = (long)addr &~ PAGE_MASK;
mlen += frag;
addr -= frag;
addr += reloc;
haddr += reloc;
if (addr != mmap(addr, mlen, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_PRIVATE | (xi ? MAP_ANONYMOUS : 0),
fdi, phdr->p_offset - frag) ) {
err_exit(8);
}
if (xi) {
unpackExtent(xi, &xo, f_decompress, f_unf);
}
bzero(addr, frag); // fragment at lo end
frag = (-mlen) &~ PAGE_MASK; // distance to next page boundary
bzero(mlen+addr, frag); // fragment at hi end
if (0!=mprotect(addr, mlen, prot)) {
err_exit(10);
ERR_LAB
}
addr += mlen + frag; /* page boundary on hi end */
if (addr < haddr) { // need pages for .bss
if (addr != mmap(addr, haddr - addr, prot,
MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0 ) ) {
err_exit(9);
}
}
}
if (xi) { // 1st call (main); also have (0!=av) here
if (ET_DYN!=ehdr->e_type) {
// Needed only if compressed shell script invokes compressed shell.
brk(v_brk);
}
}
return ehdr->e_entry + reloc;
}
/*************************************************************************
// upx_main - called by our entry code
//
// This function is optimized for size.
**************************************************************************/
void *
upx_main( // returns entry address
struct l_info const *const li,
size_t const sz_compressed, // total length
Elf64_Ehdr *const ehdr, // temp char[sz_ehdr] for decompressing
size_t const sz_ehdr,
f_expand *const f_decompress,
f_unfilter *const f_unf,
Elf64_auxv_t *const av
)
{
Elf64_Phdr const *phdr = (Elf64_Phdr const *)(1+ ehdr);
Elf64_Addr entry;
Extent xi, xo, xi0;
xi.buf = (char *)(1+ (struct p_info const *)(1+ li)); // &b_info
xi.size = sz_compressed - (sizeof(struct l_info) + sizeof(struct p_info));
xo.buf = (char *)ehdr;
xo.size = ((struct b_info const *)xi.buf)->sz_unc;
xi0 = xi;
// ehdr = Uncompress Ehdr and Phdrs
unpackExtent(&xi, &xo, f_decompress, 0); // never filtered?
// AT_PHDR.a_un.a_val is set again by do_xmap if PT_PHDR is present.
auxv_up(av, AT_PHDR , (unsigned long)(1+(Elf64_Ehdr *)phdr->p_vaddr));
auxv_up(av, AT_PHNUM , ehdr->e_phnum);
auxv_up(av, AT_ENTRY , (unsigned long)ehdr->e_entry);
//auxv_up(av, AT_PHENT , ehdr->e_phentsize); /* this can never change */
//auxv_up(av, AT_PAGESZ, PAGE_SIZE); /* ld-linux.so.2 does not need this */
entry = do_xmap(ehdr, &xi0, 0, av, f_decompress, f_unf);
{ // Map PT_INTERP program interpreter
int j;
for (j=0; j < ehdr->e_phnum; ++phdr, ++j) if (PT_INTERP==phdr->p_type) {
char const *const iname = (char const *)phdr->p_vaddr;
int const fdi = open(iname, O_RDONLY, 0);
if (0 > fdi) {
err_exit(18);
}
if (MAX_ELF_HDR!=read(fdi, (void *)ehdr, MAX_ELF_HDR)) {
ERR_LAB
err_exit(19);
}
entry = do_xmap(ehdr, 0, fdi, 0, 0, 0);
close(fdi);
}
}
return (void *)entry;
}
/*
vi:ts=4:et:nowrap
*/

64
src/stub/amd_bxx.S Normal file
View File

@ -0,0 +1,64 @@
/* amd_bxx.S -- AMD x86_64 Call/Branch Trick unfilter
This file is part of the UPX executable compressor.
Copyright (C) 2005 John F. Reiser
All Rights Reserved.
UPX and the UCL library are free software; you can redistribute them
and/or modify them under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING.
If not, write to the Free Software Foundation, Inc.,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
John F. Reiser
<jreiser@users.sourceforge.net>
*/
#include "amd_regs.h"
amdbxx: # (*f_unf)(xo->buf, out_len, h.b_cto8, h.b_ftid);
#define ptr %arg1 /* known to be %rdi */
#define len %arg2
#define cto8 %arg3l /* known to be "%dl" */
#define ftid %arg4l
cmpl $0x49,ftid; jne ckend # filter: JMP, CALL, 6-byte Jxx
movq len,%rcx # byte count
movq ptr,%rsi # remember start of buffer
jmp ckstart
ckloop3:
movb (%rdi),%al; addq $1,%rdi
cmpb $0x80,%al; jb ckloop2 # lo of 6-byte Jcc
cmpb $0x8F,%al; ja ckloop2 # hi of 6-byte Jcc
cmpb $0x0F,-2(%rdi); je ckmark # prefix of 6-byte Jcc
ckloop2:
subb $ 0xE8,%al
cmpb $0xE9-0xE8,%al; ja ckcount # not JMP, not CALL
ckmark:
cmpb %dl,(%rdi); jne ckcount # not marked with cto8
movl (%rdi),%eax # the marked, bswapped 32-bit displacement
andl $~0<<8,%eax # clear the mark
bswap %eax
subl %edi,%eax
addl %esi,%eax
stosl # *%rdi++ = %eax;
ckstart:
subq $4,%rcx
movb (%rdi),%al; addq $1,%rdi
loop ckloop2 # prefix cannot overlap previous displacement
jrcxz ckend
ckcount:
loop ckloop3
ckend:
ret

148
src/stub/amd_d_nrv2e.S Normal file
View File

@ -0,0 +1,148 @@
/* amd_d_nrv2e.S -- AMD64 decompressor for NRV2E
This file is part of the UPX executable compressor.
Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer
Copyright (C) 1996-2004 Laszlo Molnar
Copyright (C) 2000-2005 John F. Reiser
All Rights Reserved.
UPX and the UCL library are free software; you can redistribute them
and/or modify them under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING.
If not, write to the Free Software Foundation, Inc.,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
Markus F.X.J. Oberhumer Laszlo Molnar
<mfx@users.sourceforge.net> <ml1050@users.sourceforge.net>
John F. Reiser
<jreiser@users.sourceforge.net>
*/
#include "amd_regs.h"
M_NRV2B_LE32=2 # ../conf.h
M_NRV2E_LE32=8
SZ_DLINE=128 # size of data cache line in Apple G5
/* Returns 0 on success; non-zero on failure. */
decompress: # (uchar const *src, size_t lsrc, uchar *dst, size_t &ldst, uint method)
/* Arguments according to calling convention */
#define src %arg1
#define lsrc %arg2
#define dst %arg3
#define ldst %arg4 /* Out: actually a reference: &len_dst */
#define meth %arg5l
/* Working registers */
#define off %eax /* XXX: 2GB */
#define len %ecx /* XXX: 2GB */
#define bits %ebx
#define disp %rbp
push %rbp; push %rbx
push ldst
push dst
addq src,lsrc; push lsrc # &input_eof
movq src,%rsi # hardware src
movl $1<<31,%ebx # force refill
movq dst,%rdi # hardware dst
orq $~0,disp # -1: initial displacement
call setup_rdx_n2e
ra_setup_rdx:
/* jump on next bit {0,1} with prediction {y==>likely, n==>unlikely} */
/* Prediction omitted for now. */
#define jnextb0n call *%rdx; jnc
#define jnextb0y call *%rdx; jnc
#define jnextb1n call *%rdx; jc
#define jnextb1y call *%rdx; jc
/* rotate next bit into bottom bit of reg */
#define getnextb(reg) call *%rdx; adcl reg,reg
/*.align 1<<4 # not effective unless upx pays attention */
get_refill_n2e: # In: 1==Carry
movl (%rsi),bits; leaq 4(%rsi),%rsi # next 32 bits
adcl bits,bits # LSB= 1 (CarryIn); CarryOut= next bit
ret
getbit:
addl bits,bits; jz get_refill_n2e # Carry= next bit
ret
lit_n2e:
movsb # *%rdi++ = *%rsi++;
top_n2e:
call *%rdx
jc lit_n2e
movl $1,off
jmp getoff_n2e
/*.align 1<<4 # not effective unless upx pays attention */
off_n2e:
dec off
getnextb(off)
getoff_n2e:
getnextb(off)
jnextb0n off_n2e
xorl len,len # len= 0
subl $3,off; jc offprev_n2e
shll $8,off
lodsb # requires off===%eax
xorl $~0,off; jz eof_n2e
sarl off # Carry= original low bit
movslq off,disp
jc lenlast_n2e
jmp lenmore_n2e
offprev_n2e:
jnextb1y lenlast_n2e
lenmore_n2e:
movl $1,len
jnextb1y lenlast_n2e
len_n2e:
getnextb(len)
jnextb0n len_n2e
addl $6-2-2,len
jmp gotlen_n2e
lenlast_n2e:
getnextb(len) # 0,1,2,3
gotlen_n2e:
cmpq $-0x500,disp
adcl $2,len # len += 2+ (disp < -0x500);
push %rsi
leaq (%rdi,disp),%rsi
rep; movsb
pop %rsi
bot_n2e:
movb 2*SZ_DLINE(%rdi),%al # prefetch for store
jmp top_n2e
setup_rdx_n2e:
pop %rdx; addq $ getbit - ra_setup_rdx,%rdx
cmpl $ M_NRV2E_LE32,meth; jne eof_n2e
jmp bot_n2e
eof_n2e:
pop %rcx # &input_eof
movq %rsi,%rax; subq %rcx,%rax # src -= eof; // return 0: good; else: bad
pop %rdx; subq %rdx,%rdi # dst -= original dst
pop %rcx; movl %edi,(%rcx) # actual length used at dst XXX: 4GB
pop %rbx; pop %rbp
ret

15
src/stub/amd_regs.h Normal file
View File

@ -0,0 +1,15 @@
/* NOTE: THE FIRST ARGUMENT IS arg1, NOT arg0. */
#define arg1 rdi
#define arg1l edi
#define arg2 rsi
#define arg2l esi
#define arg3 rdx
#define arg3l edx
#define arg4 rcx
#define arg4l ecx
#define sys4 r10 # 4th arg to syscall is not in %rcx
#define sys4l r10d
#define arg5 r8
#define arg5l r8d
#define arg6 r9
#define arg6l r9d

124
src/stub/fold_elf64amd.S Normal file
View File

@ -0,0 +1,124 @@
/* fold_elf64amd.S -- linkage to C code to process Elf binary
*
* This file is part of the UPX executable compressor.
*
* Copyright (C) 2000-2005 John F. Reiser
* All Rights Reserved.
*
* UPX and the UCL library are free software; you can redistribute them
* and/or modify them under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING.
* If not, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Markus F.X.J. Oberhumer Laszlo Molnar
* <mfx@users.sourceforge.net> <ml1050@users.sourceforge.net>
*
* John F. Reiser
* <jreiser@users.sourceforge.net>
*/
#include "amd_regs.h"
PAGE_SHIFT= 12
PAGE_MASK= 0xffffffffffffffff<<PAGE_SHIFT
sz_b_info= 12
sz_unc= 0
sz_cpr= 4
sz_l_info= 12
sz_p_info= 12
OVERHEAD=2048
MAX_ELF_HDR=1024
__NR_munmap= 91
/* In:
%rbp= &decompress; also 9+ (char *)&(offset to {l_info; p_info; b_info})
%rsp= &{argc,argv...,0,env...,0,auxv...,strings}
*/
fold_begin:
call L90 # ret_addr is &f_unfilter
#include "amd_bxx.S"
zfind:
lodsq; testq %rax,%rax; jnz zfind
ret
L90:
pop %arg6 # &amdbxx: f_unfilter
movq %rsp,%rsi # stack pointer at execve
call zfind # %rsi= &env
call zfind # %rsi= &Elf64_auxv
subq $ OVERHEAD,%rsp
movq %rsp,%arg3 # &ELf64_Ehdr temporary space
push %rsi # arg7
movq $ PAGE_MASK,%rcx # sign extend 32 bits to 64 bits
movl -9(%rbp),%arg2l # total size - offset to {l_info; p_info; b_info}
movq %arg6,%r15; andq %rcx,%r15 # %r15= &this_page
movq %rbp,%arg5 # &decompress: f_expand
movq %rbp,%arg1; subq %arg2,%arg1 # &l_info
movq %arg1,%r14; andq %rcx,%r14 # %r14= our_Elf64_Ehdr
movl sz_unc+sz_p_info+sz_l_info(%arg1),%arg4l # sz_elf_headers
call upx_main # Out: %rax= entry
/* entry= upx_main(l_info *arg1, total_size arg2, Elf64_Ehdr *arg3, sz_ehdr arg4,
f_decompr arg5, f_unfilter arg6, Elf32_Auxv_t *arg7 )
*/
addq $8+OVERHEAD,%rsp
push %rax # save &entry
movq %r14,%arg1 # &our_Elf64_Ehdr
movq %r15,%arg2
subq %r14,%arg2 # size
call munmap # unmap compressed program; /proc/self/exe disappears
ret
/* 64-bit mode only! */
__NR_read= 0
__NR_open= 2
__NR_close= 3
__NR_mmap= 9
__NR_mprotect= 10
__NR_munmap= 11
__NR_brk= 12
__NR_exit= 60
mmap: .globl mmap
movq %arg4,%sys4
movb $ __NR_mmap,%al
sysgo: # NOTE: kernel demands 4th arg in %sys4, NOT %arg4
movzbl %al,%eax
syscall
cmpq $ PAGE_MASK,%rax; jc no_fail
orq $~0,%rax # failure; IGNORE errno
no_fail:
ret
read: .globl read
movb $ __NR_read,%al; jmp sysgo
open: .globl open
movb $ __NR_open,%al; jmp sysgo
close: .globl close
movb $ __NR_close,%al; jmp sysgo
mprotect: .globl mprotect
movb $ __NR_mprotect,%al; jmp sysgo
munmap: .globl munmap
movb $ __NR_munmap,%al; jmp sysgo
brk: .globl brk
movb $ __NR_brk,%al; jmp sysgo
exit: .globl exit
movb $ __NR_exit,%al; jmp sysgo
# vi:ts=8:et:nowrap

109
src/stub/l_lx_elf64amd.S Normal file
View File

@ -0,0 +1,109 @@
/* l_lx_elf64amd.S -- Linux program entry point & decompressor (Elf binary)
*
* This file is part of the UPX executable compressor.
*
* Copyright (C) 1996-2004 Markus Franz Xaver Johannes Oberhumer
* Copyright (C) 1996-2004 Laszlo Molnar
* Copyright (C) 2000-2005 John F. Reiser
* All Rights Reserved.
*
* UPX and the UCL library are free software; you can redistribute them
* and/or modify them under the terms of the GNU General Public License as
* published by the Free Software Foundation; either version 2 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; see the file COPYING.
* If not, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*
* Markus F.X.J. Oberhumer Laszlo Molnar
* <mfx@users.sourceforge.net> <ml1050@users.sourceforge.net>
*
* John F. Reiser
* <jreiser@users.sourceforge.net>
*/
/*__LEXEC000__*/
_start: .globl _start
/* The following 'call' must be at _start; fold_begin knows this,
and so does PackLinuxElf64amd::pack3() .
*/
call main # push address of decompressor
#include "amd_d_nrv2e.S"
sz_b_info= 12
sz_unc= 0
sz_cpr= 4
b_method= 8
PROT_READ= 1
PROT_WRITE= 2
PROT_EXEC= 4
MAP_PRIVATE= 2
MAP_FIXED= 0x10
MAP_ANONYMOUS= 0x20
SYS_mmap= 9 # 64-bit mode only!
PAGE_SHIFT= 12
PAGE_MASK= (~0<<PAGE_SHIFT)
PAGE_SIZE= -PAGE_MASK
#include "amd_regs.h"
/* Decompress the rest of this loader, and jump to it.
Map a page to hold the decompressed bytes. Logically this could
be done by setting .p_memsz for our first PT_LOAD. But as of 2005-11-09,
linux 2.6.14 only does ".bss expansion" on the PT_LOAD that describes the
highest address. [I regard this as a bug, and it makes the kernel's
fs/binfmt_elf.c complicated, buggy, and insecure.] For us, that is the 2nd
PT_LOAD, which is the only way that linux allows to set the brk() for the
uncompressed program. [This is a signicant kernel misfeature.]
*/
unfold:
pop %rdi # &{ b_info={sz_unc, sz_cpr, {4 char}}, folded_loader...}
subl %arg2l,%arg2l # %arg2= 0
push %rdi # remember &b_info
lea PROT_READ | PROT_WRITE | PROT_EXEC(%arg2),%arg3l
addl sz_cpr(%rdi),%arg1l # n.b.: %rdi===%arg1; XXX: 4GB
lea MAP_PRIVATE | MAP_FIXED | MAP_ANONYMOUS(%arg2),%sys4l
addl $-1+ sz_b_info,%arg1l # XXX: 4GB
movl $ PAGE_MASK,%ecx
subl %ecx,%arg1l # XXX: 4GB
lea SYS_mmap(%arg2),%eax
andl %ecx,%arg1l # next page boundary after fold; XXX: 4GB
subl %ecx,%arg2l # %arg2l= PAGE_SIZE
push %rcx
subl %arg5l,%arg5l; subl %arg6l,%arg6l
syscall # trashes %rcx
pop %rcx
cmpl %ecx,%eax; jb 0f; hlt; 0: # XXX: 4GB
pop %rsi # %arg2= &b_info
push %rax # ret_addr after decompression
.byte 0x92 # xchg %eax,%arg3l # %arg3= dst for unfolding XXX: 4GB
lodsl; movl %arg2l,%arg4l # &len_dst ==> &do_not_care XXX: 4GB
lodsl; .byte 0x97 # xchg %rax,%arg1l # sz_cpr XXX: 4GB
lodsl; movzbl %al,%arg5l # b_method
xchg %arg1l,%arg2l # XXX: 4GB
jmp *%rbp # goto decompress; return to unfolded loader
main:
# int3 # uncomment for debugging
pop %rbp # &dcompress
call unfold
/* { b_info={sz_unc, sz_cpr, {4 char}}, folded_loader...} */
eof:
/*__XTHEENDX__*/
/*
vi:ts=8:et:nowrap
*/

View File

@ -0,0 +1,48 @@
/* l_lx_elf64amd.lds -- static linker script to build l_lx_elf64amd.bin
This file is part of the UPX executable compressor.
Copyright (C) 2000-2005 John F. Reiser
All Rights Reserved.
UPX and the UCL library are free software; you can redistribute them
and/or modify them under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of
the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; see the file COPYING.
If not, write to the Free Software Foundation, Inc.,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
Markus F.X.J. Oberhumer Laszlo Molnar
<mfx@users.sourceforge.net> <ml1050@users.sourceforge.net>
John F. Reiser
<jreiser@users.sourceforge.net>
*/
OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
OUTPUT_ARCH(i386:x86-64)
/*ENTRY(_start)*/
PHDRS
{
text PT_LOAD FILEHDR PHDRS ;
data PT_LOAD ; /* for setting brk(0) */
}
SECTIONS
{
. = 0x00100000 + SIZEOF_HEADERS + 12; /* 12==sizeof(l_info) */
.text : {
*(.text)
*(.data)
} : text
.data : {
} : data
}