From a7bff34784796121ca7a732640f384032ea44862 Mon Sep 17 00:00:00 2001 From: John Reiser Date: Sat, 25 Mar 2006 15:55:20 +0000 Subject: [PATCH] Fix THUMB mode return to ARM mode on ARMv4T. Spend 18 bytes to inline most of get1b via conditional subroutine call idiom. committer: jreiser 1143302120 +0000 --- src/stub/arm_nrv2e_d8.S | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/src/stub/arm_nrv2e_d8.S b/src/stub/arm_nrv2e_d8.S index d6999453..f9dcff43 100644 --- a/src/stub/arm_nrv2e_d8.S +++ b/src/stub/arm_nrv2e_d8.S @@ -62,14 +62,8 @@ #define CHECK_BYTE /*empty*/ #endif /*}*/ -/* Putting get1_n2e in a register [r6:wrnk] inhibits branch prediction, - and saves only 14 bytes (9 calls, but 2 Thumb instr to setup). - 'bl' takes 4 bytes and 2 cycles. It is tempting to inline - "add bits,bits; beq " instead, but branching back costs - 9*4 bytes with 4-byte alignment (adr tmp,; b fetch8), or - 9*6 bytes without alignment (bl fetch8; b ). -*/ -#define GETBIT bl get1_n2e +/* "mov lr,pc; bxx ..." implements conditional subroutine call */ +#define GETBIT add bits,bits; mov lr,pc; beq get1_n2e #define getnextb(reg) GETBIT; adc reg,reg #define jnextb0 GETBIT; bcc @@ -109,7 +103,16 @@ eof_n2e: sub src,srclim @ 0 if actual src length equals expected length sub dst,r3 @ actual dst length str dst,[r4] - pop {r4,r5,r6,r7, pc} @ return + pop {r4,r5,r6,r7 /*,pc*/} + pop {r1}; bx r1 @ "pop {,pc}" fails return to ARM mode on ARMv4T + +get1_n2e: @ In: Carry set [from adding 0x80000000 (1<<31) to itself] + ldrb bits,[src] @ zero-extend next byte + adc bits,bits @ double and insert CarryIn as low bit + CHECK_SRC + add src,#1 + lsl bits,#24 @ move to top byte, and set CarryOut from old bit 8 + mov pc,lr @ return, stay in current (THUMB) mode lit_n2e: CHECK_SRC; ldrb tmp,[src]; add src,#1 @@ -171,17 +174,6 @@ copy_n2e: b top_n2e .size ucl_nrv2e_decompress_8, .-ucl_nrv2e_decompress_8 -get1_n2e: .type get1_n2e, %function - add bits,bits; bne get1r_n2e @ CarryOut has data bit - ldrb bits,[src] @ zero-extend next byte - adc bits,bits @ double and insert CarryIn as low bit - CHECK_SRC - add src,#1 - lsl bits,#24 @ move to top byte, and set CarryOut from old bit 8 -get1r_n2e: - bx lr - .size get1_n2e, .-get1_n2e - /* vi:ts=8:et:nowrap */