JIT: reduced size of emitted code (30% smaller), slightly better performance

This commit is contained in:
Dimitris Panokostas 2020-02-03 10:52:55 +01:00
parent e66e1e863f
commit 30062e91f2
4 changed files with 1115 additions and 1079 deletions

View file

@ -140,6 +140,7 @@ struct regstruct
/* pointer to real arrays/structs for easier access in JIT */
uae_u32 *raw_cputbl_count;
uintptr mem_banks;
uintptr cache_tags;
#endif
};

View file

@ -242,22 +242,6 @@ LOWFUNC(WRITE,READ,1,compemu_raw_cmp_pc,(IMPTR s))
}
LENDFUNC(WRITE,READ,1,compemu_raw_cmp_pc,(IMPTR s))
LOWFUNC(NONE,WRITE,1,compemu_raw_set_pc_m,(MEMR s))
{
uintptr idx;
if(s >= (uintptr) &regs && s < ((uintptr) &regs) + sizeof(struct regstruct)) {
idx = s - (uintptr) & regs;
LDR_rRI(REG_WORK1, R_REGSTRUCT, idx);
} else {
LOAD_U32(REG_WORK1, s);
LDR_rR(REG_WORK1, REG_WORK1);
}
idx = (uintptr) &(regs.pc_p) - (uintptr) &regs;
STR_rRI(REG_WORK1, R_REGSTRUCT, idx);
}
LENDFUNC(NONE,WRITE,1,compemu_raw_set_pc_m,(MEMR s))
LOWFUNC(NONE,WRITE,1,compemu_raw_set_pc_i,(IMPTR s))
{
LOAD_U32(REG_WORK2, s);
@ -439,21 +423,49 @@ STATIC_INLINE void compemu_raw_handle_except(IM32 cycles)
branchadd = (uae_u32*)get_target();
BEQ_i(0); // no exception, jump to next instruction
raw_pop_preserved_regs();
LOAD_U32(REG_PAR1, cycles);
LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // <execute_exception>
emit_long((uintptr)execute_exception);
uae_u32* branchadd2 = (uae_u32*)get_target();
B_i(0); // <exec_nostats>
write_jmp_target(branchadd2, (uintptr)popall_execute_exception);
// Write target of next instruction
write_jmp_target(branchadd, (uintptr)get_target());
}
STATIC_INLINE void compemu_raw_maybe_recompile(uintptr t)
LOWFUNC(NONE,WRITE,1,compemu_raw_execute_normal,(MEMR s))
{
BGE_i(2);
raw_pop_preserved_regs();
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
emit_long(t);
LOAD_U32(REG_WORK1, s);
LDR_rR(REG_WORK1, REG_WORK1);
uae_u32* branchadd = (uae_u32*)get_target();
B_i(0); // <exec_nostats>
write_jmp_target(branchadd, (uintptr)popall_execute_normal_setpc);
}
LENDFUNC(NONE,WRITE,1,compemu_raw_execute_normal,(MEMR s))
LOWFUNC(NONE,WRITE,1,compemu_raw_check_checksum,(MEMR s))
{
LOAD_U32(REG_WORK1, s);
LDR_rR(REG_WORK1, REG_WORK1);
uae_u32* branchadd = (uae_u32*)get_target();
B_i(0); // <exec_nostats>
write_jmp_target(branchadd, (uintptr)popall_check_checksum_setpc);
}
LENDFUNC(NONE,WRITE,1,compemu_raw_check_checksum,(MEMR s))
LOWFUNC(NONE,WRITE,1,compemu_raw_exec_nostats,(IMPTR s))
{
LOAD_U32(REG_WORK1, s);
uae_u32* branchadd = (uae_u32*)get_target();
B_i(0); // <exec_nostats>
write_jmp_target(branchadd, (uintptr)popall_exec_nostats_setpc);
}
LENDFUNC(NONE,WRITE,1,compemu_raw_exec_nostats,(IMPTR s))
STATIC_INLINE void compemu_raw_maybe_recompile(void)
{
uae_u32* branchadd = (uae_u32*)get_target();
BLT_i(0);
write_jmp_target(branchadd, (uintptr)popall_recompile_block);
}
STATIC_INLINE void compemu_raw_jmp(uintptr t)
@ -468,24 +480,23 @@ STATIC_INLINE void compemu_raw_jmp(uintptr t)
}
}
STATIC_INLINE void compemu_raw_jmp_pc_tag(uintptr base)
STATIC_INLINE void compemu_raw_jmp_pc_tag(void)
{
uintptr idx = (uintptr)&regs.pc_p - (uintptr)&regs;
LDRH_rRI(REG_WORK1, R_REGSTRUCT, idx);
LDR_rR(REG_WORK2, RPC_INDEX);
idx = (uintptr)&regs.cache_tags - (uintptr)&regs;
LDR_rRI(REG_WORK2, R_REGSTRUCT, idx);
LDR_rRR_LSLi(RPC_INDEX, REG_WORK2, REG_WORK1, 2);
emit_long(base);
}
STATIC_INLINE void compemu_raw_maybe_cachemiss(uintptr t)
STATIC_INLINE void compemu_raw_maybe_cachemiss(void)
{
BEQ_i(2);
raw_pop_preserved_regs();
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
emit_long(t);
uae_u32* branchadd = (uae_u32*)get_target();
BNE_i(0);
write_jmp_target(branchadd, (uintptr)popall_cache_miss);
}
STATIC_INLINE void compemu_raw_maybe_do_nothing(IM32 cycles, uintptr adr)
STATIC_INLINE void compemu_raw_maybe_do_nothing(IM32 cycles)
{
clobber_flags();
@ -505,9 +516,9 @@ STATIC_INLINE void compemu_raw_maybe_do_nothing(IM32 cycles, uintptr adr)
}
STR_rRI(REG_WORK2, R_REGSTRUCT, idx);
raw_pop_preserved_regs();
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
emit_long(adr);
uae_u32* branchadd2 = (uae_u32*)get_target();
B_i(0);
write_jmp_target(branchadd2, (uintptr)popall_do_nothing);
// <end>
write_jmp_target((uae_u32*)branchadd, (uintptr)get_target());
@ -545,22 +556,18 @@ LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_inreg,(RR4 rr_pc, IM32 cycles))
}
STR_rRI(REG_WORK1, R_REGSTRUCT, offs);
uae_u32* branchadd = (uae_u32*)get_target();
CC_B_i(NATIVE_CC_MI, 0);
write_jmp_target(branchadd, (uintptr)popall_do_nothing);
#ifdef ARMV6T2
CC_B_i(NATIVE_CC_MI, 2);
BFC_rii(rr_pc, 16, 31); // apply TAGMASK
#else
CC_B_i(NATIVE_CC_MI, 3);
BIC_rri(rr_pc, rr_pc, 0x00ff0000);
BIC_rri(rr_pc, rr_pc, 0xff000000);
#endif
LDR_rRI(REG_WORK1, RPC_INDEX, 8); // <cache_tags>
offs = (uintptr)(&regs.cache_tags) - (uintptr)&regs;
LDR_rRI(REG_WORK1, R_REGSTRUCT, offs);
LDR_rRR_LSLi(RPC_INDEX, REG_WORK1, rr_pc, 2);
raw_pop_preserved_regs();
LDR_rRI(RPC_INDEX, RPC_INDEX, 0); // <do_nothing>
emit_long((uintptr)cache_tags);
emit_long((uintptr)do_nothing);
}
LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_inreg,(RR4 rr_pc, IM32 cycles))
@ -584,14 +591,14 @@ STATIC_INLINE uae_u32* compemu_raw_endblock_pc_isconst(IM32 cycles, IMPTR v)
tba = (uae_u32*)get_target();
CC_B_i(NATIVE_CC_MI^1, 0); // <target set by caller>
LDR_rRI(REG_WORK1, RPC_INDEX, 8); // <v>
LDR_rRI(REG_WORK1, RPC_INDEX, 4); // <v>
offs = (uintptr)&regs.pc_p - (uintptr)&regs;
STR_rRI(REG_WORK1, R_REGSTRUCT, offs);
raw_pop_preserved_regs();
LDR_rRI(RPC_INDEX, RPC_INDEX, 0); // <do_nothing>
uae_u32* branchadd = (uae_u32*)get_target();
B_i(0);
write_jmp_target(branchadd, (uintptr)popall_do_nothing);
emit_long(v);
emit_long((uintptr)do_nothing);
return tba;
}

View file

@ -232,25 +232,6 @@ LOWFUNC(WRITE,READ,1,compemu_raw_cmp_pc,(IMPTR s))
}
LENDFUNC(WRITE,READ,1,compemu_raw_cmp_pc,(IMPTR s))
LOWFUNC(NONE,WRITE,1,compemu_raw_set_pc_m,(MEMR s))
{
uintptr idx;
if(s >= (uintptr) &regs && s < ((uintptr) &regs) + sizeof(struct regstruct)) {
idx = s - (uintptr) &regs;
if(s == (uintptr) &(regs.pc_p))
LDR_xXi(REG_WORK1, R_REGSTRUCT, idx);
else
LDR_wXi(REG_WORK1, R_REGSTRUCT, idx);
} else {
LOAD_U64(REG_WORK1, s);
LDR_xXi(REG_WORK1, REG_WORK1, 0);
}
idx = (uintptr) &(regs.pc_p) - (uintptr) &regs;
STR_xXi(REG_WORK1, R_REGSTRUCT, idx);
}
LENDFUNC(NONE,WRITE,1,compemu_raw_set_pc_m,(MEMR s))
LOWFUNC(NONE,WRITE,1,compemu_raw_set_pc_i,(IMPTR s))
{
LOAD_U64(REG_WORK1, s);
@ -440,52 +421,84 @@ STATIC_INLINE void compemu_raw_handle_except(IM32 cycles)
branchadd = (uae_u32*)get_target();
CBZ_wi(REG_WORK1, 0); // no exception, jump to next instruction
raw_pop_preserved_regs();
LOAD_U32(REG_PAR1, cycles);
LDR_xPCi(REG_WORK1, 8); // <execute_exception>
BR_x(REG_WORK1);
emit_longlong((uintptr)execute_exception);
uae_u32* branchadd2 = (uae_u32*)get_target();
B_i(0); // <exec_nostats>
write_jmp_target(branchadd2, (uintptr)popall_execute_exception);
// Write target of next instruction
write_jmp_target(branchadd, (uintptr)get_target());
}
STATIC_INLINE void compemu_raw_maybe_recompile(uintptr t)
LOWFUNC(NONE,WRITE,1,compemu_raw_execute_normal,(MEMR s))
{
BGE_i(NUM_POP_CMDS + 5);
raw_pop_preserved_regs();
LDR_xPCi(REG_WORK1, 8);
BR_x(REG_WORK1);
emit_longlong(t);
LOAD_U64(REG_WORK1, s);
LDR_xXi(REG_WORK1, REG_WORK1, 0);
uae_u32* branchadd = (uae_u32*)get_target();
B_i(0); // <exec_nostats>
write_jmp_target(branchadd, (uintptr)popall_execute_normal_setpc);
}
LENDFUNC(NONE,WRITE,1,compemu_raw_execute_normal,(MEMR s))
LOWFUNC(NONE,WRITE,1,compemu_raw_check_checksum,(MEMR s))
{
LOAD_U64(REG_WORK1, s);
LDR_xXi(REG_WORK1, REG_WORK1, 0);
uae_u32* branchadd = (uae_u32*)get_target();
B_i(0); // <exec_nostats>
write_jmp_target(branchadd, (uintptr)popall_check_checksum_setpc);
}
LENDFUNC(NONE,WRITE,1,compemu_raw_check_checksum,(MEMR s))
LOWFUNC(NONE,WRITE,1,compemu_raw_exec_nostats,(IMPTR s))
{
LOAD_U64(REG_WORK1, s);
uae_u32* branchadd = (uae_u32*)get_target();
B_i(0); // <exec_nostats>
write_jmp_target(branchadd, (uintptr)popall_exec_nostats_setpc);
}
LENDFUNC(NONE,WRITE,1,compemu_raw_exec_nostats,(IMPTR s))
STATIC_INLINE void compemu_raw_maybe_recompile(void)
{
BGE_i(2);
uae_u32* branchadd = (uae_u32*)get_target();
B_i(0);
write_jmp_target(branchadd, (uintptr)popall_recompile_block);
}
STATIC_INLINE void compemu_raw_jmp(uintptr t)
{
LDR_xPCi(REG_WORK1, 8);
BR_x(REG_WORK1);
emit_longlong(t);
uintptr loc = (uintptr)get_target();
if(t > loc - 127 * 1024 * 1024 && t < loc + 127 * 1024 * 1024) {
B_i(0);
write_jmp_target((uae_u32*)loc, t);
} else {
LDR_xPCi(REG_WORK1, 8);
BR_x(REG_WORK1);
emit_longlong(t);
}
}
STATIC_INLINE void compemu_raw_jmp_pc_tag(uintptr base)
STATIC_INLINE void compemu_raw_jmp_pc_tag(void)
{
uintptr idx = (uintptr)&regs.pc_p - (uintptr)&regs;
LDRH_wXi(REG_WORK1, R_REGSTRUCT, idx);
LDR_xPCi(REG_WORK2, 12);
idx = (uintptr)&regs.cache_tags - (uintptr)&regs;
LDR_xXi(REG_WORK2, R_REGSTRUCT, idx);
LDR_xXxLSLi(REG_WORK1, REG_WORK2, REG_WORK1, 1);
BR_x(REG_WORK1);
emit_longlong(base);
}
STATIC_INLINE void compemu_raw_maybe_cachemiss(uintptr t)
STATIC_INLINE void compemu_raw_maybe_cachemiss(void)
{
BEQ_i(NUM_POP_CMDS + 5);
raw_pop_preserved_regs();
LDR_xPCi(REG_WORK1, 8);
BR_x(REG_WORK1);
emit_longlong(t);
BEQ_i(2);
uae_u32* branchadd = (uae_u32*)get_target();
B_i(0);
write_jmp_target(branchadd, (uintptr)popall_cache_miss);
}
STATIC_INLINE void compemu_raw_maybe_do_nothing(IM32 cycles, uintptr adr)
STATIC_INLINE void compemu_raw_maybe_do_nothing(IM32 cycles)
{
uintptr idx = (uintptr)&regs.spcflags - (uintptr) &regs;
LDR_wXi(REG_WORK1, R_REGSTRUCT, idx);
@ -502,10 +515,9 @@ STATIC_INLINE void compemu_raw_maybe_do_nothing(IM32 cycles, uintptr adr)
}
STR_wXi(REG_WORK2, R_REGSTRUCT, idx);
raw_pop_preserved_regs();
LDR_xPCi(REG_WORK1, 8);
BR_x(REG_WORK1);
emit_longlong(adr);
uae_u32* branchadd2 = (uae_u32*)get_target();
B_i(0);
write_jmp_target(branchadd2, (uintptr)popall_do_nothing);
// <end>
write_jmp_target((uae_u32 *)branchadd, (uintptr)get_target());
@ -535,18 +547,16 @@ LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_inreg,(RR4 rr_pc, IM32 cycles))
}
STR_wXi(REG_WORK1, R_REGSTRUCT, offs);
TBNZ_xii(REG_WORK1, 31, 7); // test sign and branch if set (negative)
TBNZ_xii(REG_WORK1, 31, 5); // test sign and branch if set (negative)
UBFIZ_xxii(rr_pc, rr_pc, 0, 16); // apply TAGMASK
LDR_xPCi(REG_WORK1, 12); // <cache_tags>
offs = (uintptr)(&regs.cache_tags) - (uintptr)&regs;
LDR_xXi(REG_WORK1, R_REGSTRUCT, offs);
LDR_xXxLSLi(REG_WORK1, REG_WORK1, rr_pc, 3); // cacheline holds pointer -> multiply with 8
BR_x(REG_WORK1);
emit_longlong((uintptr)cache_tags);
raw_pop_preserved_regs();
LDR_xPCi(REG_WORK1, 8); // <do_nothing>
BR_x(REG_WORK1);
emit_longlong((uintptr)do_nothing);
uae_u32* branchadd = (uae_u32*)get_target();
B_i(0);
write_jmp_target(branchadd, (uintptr)popall_do_nothing);
}
LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_inreg,(RR4 rr_pc, IM32 cycles))
@ -569,15 +579,14 @@ STATIC_INLINE uae_u32* compemu_raw_endblock_pc_isconst(IM32 cycles, IMPTR v)
tba = (uae_u32*)get_target();
B_i(0); // <target set by caller>
LDR_xPCi(REG_WORK1, 16 + 4 * NUM_POP_CMDS); // <v>
LDR_xPCi(REG_WORK1, 12); // <v>
offs = (uintptr)&regs.pc_p - (uintptr)&regs;
STR_xXi(REG_WORK1, R_REGSTRUCT, offs);
raw_pop_preserved_regs();
LDR_xPCi(REG_WORK1, 16); // <do_nothing>
BR_x(REG_WORK1);
uae_u32* branchadd = (uae_u32*)get_target();
B_i(0);
write_jmp_target(branchadd, (uintptr)popall_do_nothing);
emit_longlong(v);
emit_longlong((uintptr)do_nothing);
return tba;
}

File diff suppressed because it is too large Load diff