diff --git a/src/stub/Makefile b/src/stub/Makefile index 5dc1813f..b80dff9e 100644 --- a/src/stub/Makefile +++ b/src/stub/Makefile @@ -322,11 +322,14 @@ fold_elf86.h: l_lx_elf.o fold_elf86.o l_lx_elf86.lds l_lx_exec.o: l_lx_exec.c $(CC_LINUX_I386) -c $< +upx_itoa.o: upx_itoa.asm + $(NASM) -f elf -o $@ $< + fold_exec86.o: fold_exec86.asm $(NASM) -f elf -o $@ $< -fold_exec86.h: l_lx_exec.o fold_exec86.o l_lx_exec86.lds - ld -T $(srcdir)/l_lx_exec86.lds -Map $T.map -o $T.bin $T.o l_lx_exec.o +fold_exec86.h: l_lx_exec.o upx_itoa.o fold_exec86.o l_lx_exec86.lds + ld -T $(srcdir)/l_lx_exec86.lds -Map $T.map -o $T.bin $T.o l_lx_exec.o upx_itoa.o objcopy -S -R .comment -R .note $T.bin $(STRIPELF) $T.bin $(BRANDELF) $T.bin diff --git a/src/stub/l_lx_exec.c b/src/stub/l_lx_exec.c index 22274c46..70f4de82 100644 --- a/src/stub/l_lx_exec.c +++ b/src/stub/l_lx_exec.c @@ -73,33 +73,35 @@ static __inline__ int xwrite(int fd, const void *buf, int count) // util **************************************************************************/ -// FIXME: all code in this source file must be relocatible, so -// we have to use `volatile' here. -// FIXME: rewrite upx_itoa() in assembly -static char *upx_itoa(char *buf, unsigned long v) -{ -// const unsigned TEN = 10; - volatile unsigned TEN = 10; - char *p = buf; - { - unsigned long k = v; - do { - p++; - k /= TEN; - } while (k > 0); - } - buf = p; - *p = 0; - { - unsigned long k = v; - do { - *--p = '0' + k % TEN; - k /= TEN; - } while (k > 0); - } - return buf; -} - +extern char * +__attribute__ ((regparm(2), stdcall)) // be ruthless +upx_itoa(unsigned long v, char *buf); +// Some versions of gcc optimize the division and/or remainder using +// a multiplication by (2**32)/10, and use a relocatable 32-bit address +// to reference the constant. We require no relocations because we move +// the code at runtime. See upx_itoa.asm for replacement [also smaller.] +//static char *upx_itoa(unsigned long v, char *buf) +//{ +// volatile unsigned TEN = 10; // an ugly way to achieve no relocation +// char *p = buf; +// { +// unsigned long k = v; +// do { +// p++; +// k /= TEN; +// } while (k > 0); +// } +// buf = p; +// *p = 0; +// { +// unsigned long k = v; +// do { +// *--p = '0' + k % TEN; +// k /= TEN; +// } while (k > 0); +// } +// return buf; +//} static uint32_t ascii5(char *p, uint32_t v, unsigned n) { @@ -159,9 +161,9 @@ go_self(char const *tmpname, char *argv[], char *envp[]) SET4(procself_buf + 0, '/', 'p', 'r', 'o'); SET4(procself_buf + 4, 'c', '/', 0 , 0 ); { - char *const procself = upx_itoa(procself_buf + 6, getpid()); + char *const procself = upx_itoa(getpid(), procself_buf + 6); SET4(procself, '/', 'f', 'd', '/'); - upx_itoa(procself + 4, fdi); + upx_itoa(fdi, procself + 4); } // Check for working /proc/self/fd/X by accessing the diff --git a/src/stub/upx_itoa.asm b/src/stub/upx_itoa.asm new file mode 100644 index 00000000..794cda2a --- /dev/null +++ b/src/stub/upx_itoa.asm @@ -0,0 +1,54 @@ +; l_lx_itoa.asm -- decimal print; smaller than gcc, and no relocations +; +; This file is part of the UPX executable compressor. +; +; Copyright (C) 2002 John F. Reiser +; All Rights Reserved. +; +; UPX and the UCL library are free software; you can redistribute them +; and/or modify them under the terms of the GNU General Public License as +; published by the Free Software Foundation; either version 2 of +; the License, or (at your option) any later version. +; +; This program is distributed in the hope that it will be useful, +; but WITHOUT ANY WARRANTY; without even the implied warranty of +; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +; GNU General Public License for more details. +; +; You should have received a copy of the GNU General Public License +; along with this program; see the file COPYING. +; If not, write to the Free Software Foundation, Inc., +; 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. +; +; John F. Reiser +; +; + + + BITS 32 + SECTION .text + GLOBAL upx_itoa + +upx_itoa: ; char *upx_itoa(eax= unsigned v, edx= char *buf) /* 0<=(int)v */ + push edi ; save register + mov edi,edx ; output ptr + push byte 10 + cld + pop ecx ; radix + call recur + mov [edi],ah ; NUL terminate + xchg eax,edi ; eax= continuation point + pop edi ; restore register + ret +recur: + cdq ; zero extend eax into edx [use "sub edx,edx" if eax < 0 ] + div ecx ; eax=quo, edx=rem; flags are undefined + push edx + test eax,eax + je quo0 + call recur +quo0: + pop eax ; remainder + add al, byte '0' + stosb + ret