From 6bb104b39f17ecbad3b480a1e803e60c0b707391 Mon Sep 17 00:00:00 2001 From: John Reiser Date: Sun, 21 May 2000 15:44:11 +0000 Subject: [PATCH] Compress the compiled C code in the stub on lx_exec86 (saves 157 bytes). Also make 'fold_begin' processing more robust. p_lx_elf.cpp p_lx_sh.cpp p_unix.cpp stub/Makefile stub/l_lx_exec86.asm committer: jreiser 958923851 +0000 --- src/p_lx_elf.cpp | 2 +- src/p_lx_sh.cpp | 2 +- src/p_unix.cpp | 36 ++++++++-- src/stub/Makefile | 30 ++++++--- src/stub/l_lx_exec86.asm | 140 ++++++++++++++++++++++++++++----------- 5 files changed, 154 insertions(+), 56 deletions(-) diff --git a/src/p_lx_elf.cpp b/src/p_lx_elf.cpp index 39310ada..cbd2e478 100644 --- a/src/p_lx_elf.cpp +++ b/src/p_lx_elf.cpp @@ -118,7 +118,7 @@ void PackLinuxI386elf::patchLoader() Elf_LE32_Phdr *const phdr = (Elf_LE32_Phdr *)(1+ehdr); // stub/scripts/setfold.pl puts address of 'fold_begin' in phdr[1].p_offset - off_t const fold_begin = phdr[1].p_offset + 0x80; + off_t const fold_begin = phdr[1].p_offset; MemBuffer cprLoader(lsize); // compress compiled C-code portion of loader diff --git a/src/p_lx_sh.cpp b/src/p_lx_sh.cpp index ffdd2a81..043e1c63 100644 --- a/src/p_lx_sh.cpp +++ b/src/p_lx_sh.cpp @@ -100,7 +100,7 @@ void PackLinuxI386sh::patchLoader() patch_le32(loader,lsize,"UPX2",o_shname); // stub/scripts/setfold.pl puts address of 'fold_begin' in phdr[1].p_offset - off_t const fold_begin = phdri[1].p_offset + 0x80; + off_t const fold_begin = phdri[1].p_offset; MemBuffer cprLoader(lsize); // compress compiled C-code portion of loader diff --git a/src/p_unix.cpp b/src/p_unix.cpp index 49fabde1..ea3413a6 100644 --- a/src/p_unix.cpp +++ b/src/p_unix.cpp @@ -426,10 +426,37 @@ void PackLinuxI386::patchLoader() //patch_le32(loader,lsize,"UPX1",lsize); no longer used patchVersion(loader,lsize); + Elf_LE32_Ehdr *const ehdr = (Elf_LE32_Ehdr *)(void *)loader; + Elf_LE32_Phdr *const phdr = (Elf_LE32_Phdr *)(1+ehdr); + + // stub/scripts/setfold.pl puts address of 'fold_begin' in phdr[1].p_offset + off_t const fold_begin = phdr[1].p_offset; + MemBuffer cprLoader(lsize); + + // compress compiled C-code portion of loader + upx_compress_config_t conf; memset(&conf, 0xff, sizeof(conf)); + conf.c_flags = 0; + upx_uint result_buffer[16]; + size_t const uncLsize = lsize - fold_begin; + size_t cprLsize; + upx_compress( + loader + fold_begin, uncLsize, + cprLoader, &cprLsize, + 0, // progress_callback_t ?? + getCompressionMethod(), 9, + &conf, + result_buffer + ); + memcpy(fold_begin+loader, cprLoader, cprLsize); + lsize = fold_begin + cprLsize; + phdr->p_filesz = lsize; + // phdr->p_memsz is the decompressed size + // The beginning of our loader consists of a elf_hdr (52 bytes) and // one section elf_phdr (32 byte) now, // another section elf_phdr (32 byte) later, so we have 12 free bytes // from offset 116 to the program start at offset 128. + // These 12 bytes are used for l_info by ::patchLoaderChecksum(). assert(get_le32(loader + 28) == 52); // e_phoff assert(get_le32(loader + 32) == 0); // e_shoff assert(get_le16(loader + 40) == 52); // e_ehsize @@ -464,16 +491,13 @@ void PackLinuxI386::updateLoader(OutputFile *fo) ehdr->e_phnum = 2; // The first Phdr maps the stub (instructions, data, bss) rwx. - // Round up hi address to page boundary. - Elf_LE32_Phdr *phdro = (Elf_LE32_Phdr *)(sizeof(Elf_LE32_Ehdr)+loader); - unsigned const vaddr2 = PAGE_MASK & (~PAGE_MASK + phdro->p_memsz + phdro->p_vaddr); - // The second Phdr maps the overlay r--, // to defend against /usr/bin/strip removing the overlay. - ++phdro; + Elf_LE32_Phdr *const phdro = 1+(Elf_LE32_Phdr *)(1+ehdr); + phdro->p_type = PT_LOAD; phdro->p_offset = lsize; - phdro->p_paddr = phdro->p_vaddr = vaddr2 + (lsize &~ PAGE_MASK); + phdro->p_paddr = phdro->p_vaddr = 0x00400000 + (lsize &~ PAGE_MASK); phdro->p_memsz = phdro->p_filesz = fo->getBytesWritten() - lsize; phdro->p_flags = PF_R; phdro->p_align = -PAGE_MASK; diff --git a/src/stub/Makefile b/src/stub/Makefile index fe90be8a..221321ee 100644 --- a/src/stub/Makefile +++ b/src/stub/Makefile @@ -186,11 +186,13 @@ l_w32pe.h: l_w32pe.asx # // linux rules (exec, elf, sh, sep) # ************************************************************************/ -l_lx_n2b.h: l_lx_exec.c l_xe_n2b.o l_lx_exec86.lds +l_lx_n2b.h: l_lx_exec.c l_xe_n2b.o l_lx_exec86.lds Makefile $(CC_LINUX) -DNRV2B -o $T.o -c $< ld -T l_lx_exec86.lds -Map l_lx_n2b.map -o $T.bin \ l_xe_n2b.o $T.o - objcopy -S -R .comment -R .note $T.bin + fold=`nm $T.bin | grep fold_begin | sed 's/.....\(...\).*/0x\1/'`; \ + objcopy -S -R .comment -R .note $T.bin; \ + $(SETFOLD) $T.bin $$fold $(STRIPELF) $T.bin $(BRANDELF) $T.bin $(BIN2H) $T.bin linux_i386exec_nrv2b_loader $@ @@ -199,8 +201,9 @@ l_le_n2b.h: l_lx_elf.c l_6e_n2b.o l_lx_elf86.lds $(CC_LINUX) -DNRV2B -o $T.o -c $< ld -T l_lx_elf86.lds -Map $T.map -o $T.bin \ l_6e_n2b.o $T.o - objcopy -S -R .comment -R .note $T.bin - $(SETFOLD) $T.bin 0x`nm l_6e_n2b.o | grep fold_begin` + fold=`nm $T.bin | grep fold_begin | sed 's/.....\(...\).*/0x\1/'`; \ + objcopy -S -R .comment -R .note $T.bin; \ + $(SETFOLD) $T.bin $$fold $(STRIPELF) $T.bin $(BRANDELF) $T.bin $(BIN2H) $T.bin linux_i386elf_nrv2b_loader $@ @@ -209,8 +212,9 @@ l_sh_n2b.h: l_lx_sh.c l_6h_n2b.o l_lx_sh86.lds $(CC_LINUX) -DNRV2B -o $T.o -c $< ld -T l_lx_sh86.lds -Map $T.map -o $T.bin \ l_6h_n2b.o $T.o - objcopy -S -R .comment -R .note $T.bin - $(SETFOLD) $T.bin 0x`nm l_6h_n2b.o | grep fold_begin` + fold=`nm $T.bin | grep fold_begin | sed 's/.....\(...\).*/0x\1/'`; \ + objcopy -S -R .comment -R .note $T.bin; \ + $(SETFOLD) $T.bin $$fold $(STRIPELF) $T.bin $(BRANDELF) $T.bin $(BIN2H) $T.bin linux_i386sh_nrv2b_loader $@ @@ -229,7 +233,9 @@ l_lx_n2d.h: l_lx_exec.c l_xe_n2d.o l_lx_exec86.lds $(CC_LINUX) -DNRV2D -o $T.o -c $< ld -T l_lx_exec86.lds -Map $T.map -o $T.bin \ l_xe_n2d.o $T.o - objcopy -S -R .comment -R .note $T.bin + fold=`nm $T.bin | grep fold_begin | sed 's/.....\(...\).*/0x\1/'`; \ + objcopy -S -R .comment -R .note $T.bin; \ + $(SETFOLD) $T.bin $$fold $(STRIPELF) $T.bin $(BRANDELF) $T.bin $(BIN2H) $T.bin linux_i386exec_nrv2d_loader $@ @@ -238,8 +244,9 @@ l_le_n2d.h: l_lx_elf.c l_6e_n2d.o l_lx_elf86.lds $(CC_LINUX) -DNRV2D -o $T.o -c $< ld -T l_lx_elf86.lds -Map $T.map -o $T.bin \ l_6e_n2d.o $T.o - objcopy -S -R .comment -R .note $T.bin - $(SETFOLD) $T.bin 0x`nm l_6e_n2d.o | grep fold_begin` + fold=`nm $T.bin | grep fold_begin | sed 's/.....\(...\).*/0x\1/'`; \ + objcopy -S -R .comment -R .note $T.bin; \ + $(SETFOLD) $T.bin $$fold $(STRIPELF) $T.bin $(BRANDELF) $T.bin $(BIN2H) $T.bin linux_i386elf_nrv2d_loader $@ @@ -248,8 +255,9 @@ l_sh_n2d.h: l_lx_sh.c l_6h_n2d.o l_lx_sh86.lds $(CC_LINUX) -DNRV2D -o $T.o -c $< ld -T l_lx_sh86.lds -Map $T.map -o $T.bin \ l_6h_n2d.o $T.o - objcopy -S -R .comment -R .note $T.bin - $(SETFOLD) $T.bin 0x`nm l_6h_n2d.o | grep fold_begin` + fold=`nm $T.bin | grep fold_begin | sed 's/.....\(...\).*/0x\1/'`; \ + objcopy -S -R .comment -R .note $T.bin; \ + $(SETFOLD) $T.bin $$fold $(STRIPELF) $T.bin $(BRANDELF) $T.bin $(BIN2H) $T.bin linux_i386sh_nrv2d_loader $@ diff --git a/src/stub/l_lx_exec86.asm b/src/stub/l_lx_exec86.asm index f5b46a8f..fb106876 100644 --- a/src/stub/l_lx_exec86.asm +++ b/src/stub/l_lx_exec86.asm @@ -1,10 +1,13 @@ -; l_lx_exec86.asm -- Linux program entry point & decompressor (execve) +; l_lx_exec86.asm -- Linux program entry point & decompressor (kernel exec) ; ; This file is part of the UPX executable compressor. ; ; Copyright (C) 1996-2000 Markus Franz Xaver Johannes Oberhumer ; Copyright (C) 1996-2000 Laszlo Molnar ; +; Integration of virtual exec() with decompression is +; Copyright (C) 2000 John F. Reiser. All rights reserved. +; ; UPX and the UCL library are free software; you can redistribute them ; and/or modify them under the terms of the GNU General Public License as ; published by the Free Software Foundation; either version 2 of @@ -23,6 +26,8 @@ ; Markus F.X.J. Oberhumer Laszlo Molnar ; markus.oberhumer@jk.uni-linz.ac.at ml1050@cdata.tvnet.hu ; +; John F. Reiser +; jreiser@BitWagon.com BITS 32 @@ -48,8 +53,8 @@ %endif -%include "ident.ash" +%include "ident.ash" ; /************************************************************************* ; // program entry point @@ -57,28 +62,45 @@ ; **************************************************************************/ GLOBAL _start -EXTERN upx_main _start: - call main ; push &decompress +;;;; int3 +;; How to debug this code: Uncomment the 'int3' breakpoint instruction above. +;; Build the stubs and upx. Compress a testcase, such as a copy of /bin/date. +;; Invoke gdb, and give a 'run' command. Define a single-step macro such as +;; define g +;; stepi +;; x/i $pc +;; end +;; and a step-over macro such as +;; define h +;; x/2i $pc +;; tbreak *$_ +;; continue +;; x/i $pc +;; end +;; Step through the code; remember that repeats the previous command. +;; +%if 0 + ; personality(PER_LINUX) + mov eax, 136 ; syscall_personality + xor ebx, ebx ; PER_LINUX + int 0x80 +%endif + call main ; push address of decompress subroutine ; /************************************************************************* ; // C callable decompressor ; **************************************************************************/ -%define INP dword [esp+24+4] -%define INS dword [esp+24+8] -%define OUTP dword [esp+24+12] -%define OUTS dword [esp+24+16] +%define INP dword [esp+8*4+4] +%define INS dword [esp+8*4+8] +%define OUTP dword [esp+8*4+12] +%define OUTS dword [esp+8*4+16] decompress: - push ebp - push edi - push esi - push ebx - push ecx - push edx - cld + pusha + ; cld mov esi, INP mov edi, OUTP @@ -110,36 +132,80 @@ decompress: mov edx, OUTS mov [edx], edi - pop edx - pop ecx - pop ebx - pop esi - pop edi - pop ebp + mov [7*4 + esp], eax + popa ret -; /************************************************************************* -; // prepare arguments and call upx_main -; **************************************************************************/ + +%define PAGE_MASK (~0<<12) +%define PAGE_SIZE ( 1<<12) + +%define szElf32_Ehdr 0x34 +%define szElf32_Phdr 8*4 +%define p_filesz 4*4 +%define p_memsz 5*4 +%define a_val 4 + +%define MAP_FIXED 0x10 +%define MAP_PRIVATE 0x02 +%define MAP_ANONYMOUS 0x20 +%define PROT_READ 1 +%define PROT_WRITE 2 +%define PROT_EXEC 4 +%define __NR_mmap 90 +%define __NR_munmap 91 + +; Decompress the rest of this loader, and jump to it +unfold: + pop esi ; &fold_begin = src + push esi ; &dst + mov ecx, ebp ; &decompress + and ecx, dword PAGE_MASK ; &my_elfhdr + mov ebx, ecx ; save &my_elfhdr for later + +;; Compressed code now begins at fold_begin. +;; We want decompressed code to begin at fold_begin, too. +;; Move the compressed code to the high end of the page. +;; Assume non-overlapping so that forward movsb is OK. + lea edi, [-PAGE_MASK + ecx] ; high end of page + add ecx, [p_filesz + szElf32_Ehdr + ecx] ; beyond src + sub ecx, esi ; srclen + push ecx ; srclen + sub edi, ecx + push edi ; &src + cld + rep movsb + + call ebp ; decompress(&src, srclen, &dst, &dstlen) + pop eax ; discard &src + pop eax ; discard srclen + pop eax ; &dst == fold_begin + +;; icache lookahead of compressed code after "call unfold" is still there. +;; Synchronize with dcache of decompressed code. + pushf + push cs + push eax + iret ; back to fold_begin! + main: - pop ebp ; &decompress -%if 0 - ; personality(PER_LINUX) - mov eax, 136 ; syscall_personality - xor ebx, ebx ; PER_LINUX - int 0x80 -%endif + pop ebp ; &decompress + push eax ; place to store dstlen + push esp ; &dstlen + call unfold +fold_begin: ;; this label is known to the Makefile + pop eax ; discard &dstlen + pop eax ; discard dstlen + pop eax ; Pop the argument count mov ecx, esp ; argv starts just at the current stack top lea edx, [ecx+eax*4+4] ; envp = &argv[argc + 1] push eax ; Restore the stack push ebp ; argument: &decompress -%define PAGE_MASK (~0<<12) - and ebp, PAGE_MASK - push ebp ; argument: &Elf32_Ehdr - push edx ; Push third argument: envp - push ecx ; Push second argument: argv -;;; push eax ; Push first argument: argc + push ebx ; argument: &my_elfhdr + push edx ; argument: envp + push ecx ; argument: argv +EXTERN upx_main call upx_main ; Call the UPX main function hlt ; Crash if somehow upx_main does return