mirror of
https://github.com/upx/upx
synced 2025-10-05 19:20:23 +08:00
i386 stub: prefer movzbl over movb fetch
32-bit mode on x86_64 hardware can fail movb fetch to register by writing 0x00 randomly instead of the fetched byte. Note that most CPU can write only 32 or 64 bits to the register file, so writing just 8 or 16 bits incurs a 1-cycle penalty in order to form 32 bits by Read-Modify-Write of the destination register. modified: stub/src/arch/i386/nrv2b_d32-easy.S modified: stub/src/i386-expand.S
This commit is contained in:
parent
3d58035b41
commit
25e6a31004
|
@ -38,7 +38,7 @@ lit_n2b:
|
|||
incl %esi; movb %dl,(%edi)
|
||||
incl %edi
|
||||
top_n2b:
|
||||
movb (%esi),%dl # prefetch: literal, or bottom 8 bits of offset
|
||||
movzbl (%esi),%edx # prefetch: literal, or bottom 8 bits of offset
|
||||
jnextb1yp lit_n2b
|
||||
push $1; pop off
|
||||
offmore_n2b:
|
||||
|
@ -46,7 +46,7 @@ offmore_n2b:
|
|||
jnextb0np offmore_n2b
|
||||
|
||||
subl $ 3,off; jc len_n2b # use previous offset
|
||||
shll $ 8,off; movzbl %dl,%edx
|
||||
shll $ 8,off
|
||||
orl %edx,off; incl %esi
|
||||
xorl $~0,off; jz eof
|
||||
movl off,disp
|
||||
|
|
|
@ -51,7 +51,7 @@ NBPW= 4
|
|||
#define GETBITp \
|
||||
addl bits,bits; jnz 0f; \
|
||||
movl (%esi),bits; sub $-4,%esi; \
|
||||
adcl bits,bits; movb (%esi),%dl; \
|
||||
adcl bits,bits; movzbl (%esi),%edx; \
|
||||
0:
|
||||
/* Same, but without prefetch (not useful for length of match.) */
|
||||
#define jnextb0n jnextb0y
|
||||
|
@ -125,7 +125,7 @@ __clear_cache: .globl __clear_cache
|
|||
refill:
|
||||
movl (%esi),bits; sub $-4,%esi // next 32 bits; set Carry
|
||||
adcl bits,bits // LSB= 1 (CarryIn); CarryOut= next bit
|
||||
movb (%esi),%dl // pre-fetch: literal, or bottom 8 bits of offset
|
||||
movzbl (%esi),%edx // pre-fetch: literal, or bottom 8 bits of offset
|
||||
rep; ret
|
||||
getbit:
|
||||
addl bits,bits; jz refill // Carry= next bit
|
||||
|
@ -133,16 +133,16 @@ getbit:
|
|||
|
||||
copy: // In: len, %edi, dispq; Out: 0==len, %edi, dispq; trashes %eax, %edx
|
||||
lea (%edi,dispq),%eax; cmpl $5,len // <=3 is forced
|
||||
movb (%eax),%dl; jbe copy1 // <=5 for better branch predict
|
||||
movzbl (%eax),%edx; jbe copy1 // <=5 for better branch predict
|
||||
cmpl $-4,displ; ja copy1 // 4-byte chunks would overlap
|
||||
subl $4,len // adjust for termination cases
|
||||
copy4:
|
||||
movl (%eax),%edx; add $4, %eax; subl $4,len
|
||||
movl %edx,(%edi); lea 4(%edi),%edi; jnc copy4
|
||||
addl $4,len; movb (%eax),%dl; jz copy0
|
||||
addl $4,len; movzbl (%eax),%edx; jz copy0
|
||||
copy1:
|
||||
inc %eax; movb %dl,(%edi); dec len
|
||||
movb (%eax),%dl
|
||||
movzbl (%eax),%edx
|
||||
lea 1(%edi),%edi; jnz copy1
|
||||
copy0:
|
||||
rep; ret
|
||||
|
|
Loading…
Reference in New Issue
Block a user