diff --git a/src/jit/codegen_arm.cpp b/src/jit/codegen_arm.cpp index defbb5d8..f291cd44 100644 --- a/src/jit/codegen_arm.cpp +++ b/src/jit/codegen_arm.cpp @@ -85,12 +85,6 @@ extern void __clear_cache (char*, char*); #define REG_PC_PRE R0_INDEX /* The register we use for preloading regs.pc_p */ #define REG_PC_TMP R1_INDEX /* Another register that is not the above */ -#define MUL_NREG1 R0_INDEX /* %r4 will hold the low 32 bits after a 32x32 mul */ -#define MUL_NREG2 R1_INDEX /* %r5 will hold the high 32 bits */ - -#define STACK_ALIGN 4 -#define STACK_OFFSET sizeof(void *) - #define R_REGSTRUCT 11 uae_s8 always_used[]={2,3,R_REGSTRUCT,12,-1}; // r2, r3 and r12 are work register in emitted code @@ -164,8 +158,6 @@ STATIC_INLINE void SIGNED16_REG_2_REG(W4 d, RR4 s) { #define SIGN_EXTEND_16_REG_2_REG(d,s) SIGNED16_REG_2_REG(d,s) -#define jit_unimplemented(fmt, ...) do{ jit_log("**** Unimplemented ****\n"); jit_log(fmt, ## __VA_ARGS__); abort(); }while (0) - LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, RR4 s)) { ADD_rrr(d, d, s); @@ -220,8 +212,8 @@ LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, I } SIGNED8_IMM_2_REG(REG_WORK1, offset); - - ADD_rrr(REG_WORK1, s, REG_WORK1); + ADD_rrr(REG_WORK1, s, REG_WORK1); + ADD_rrrLSLi(d, REG_WORK1, index, shft); } LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset)) @@ -398,36 +390,6 @@ LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i)) -STATIC_INLINE void raw_dec_sp(int off) -{ - if (off) { - if(CHECK32(off)) { - SUB_rri(RSP_INDEX, RSP_INDEX, off); - } else { - LDR_rRI(REG_WORK1, RPC_INDEX, 4); - SUB_rrr(RSP_INDEX, RSP_INDEX, REG_WORK1); - B_i(0); - //: - emit_long(off); - } - } -} - -STATIC_INLINE void raw_inc_sp(int off) -{ - if (off) { - if(CHECK32(off)) { - ADD_rri(RSP_INDEX, RSP_INDEX, off); - } else { - LDR_rRI(REG_WORK1, RPC_INDEX, 4); - ADD_rrr(RSP_INDEX, RSP_INDEX, REG_WORK1); - B_i(0); - //: - emit_long(off); - } - } -} - STATIC_INLINE void raw_push_regs_to_preserve(void) { PUSH_REGS(PRESERVE_MASK); } @@ -436,11 +398,6 @@ STATIC_INLINE void raw_pop_preserved_regs(void) { POP_REGS(PRESERVE_MASK); } -STATIC_INLINE void raw_load_flagx(uae_u32 t, uae_u32 r) -{ - LDR_rRI(t, R_REGSTRUCT, 17 * 4); // X flag are next to 8 Dregs, 8 Aregs and CPSR in struct regstruct -} - STATIC_INLINE void raw_flags_evicted(int r) { live.state[FLAGTMP].status = INMEM; @@ -465,24 +422,6 @@ STATIC_INLINE void raw_reg_to_flags(int r) MSR_CPSRf_r(r); } -STATIC_INLINE void raw_load_flagreg(uae_u32 t, uae_u32 r) -{ - LDR_rRI(t, R_REGSTRUCT, 16 * 4); // Flags are next to 8 Dregs and 8 Aregs in struct regstruct -} - -/* %eax register is clobbered if target processor doesn't support fucomi */ -#define FFLAG_NREG_CLOBBER_CONDITION 0 -#define FFLAG_NREG R0_INDEX -#define FLAG_NREG2 -1 -#define FLAG_NREG1 -1 -#define FLAG_NREG3 -1 - -STATIC_INLINE void raw_emit_nop_filler(int nbytes) -{ - nbytes >>= 2; - while(nbytes--) { NOP(); } -} - // // Arm instructions // @@ -526,19 +465,46 @@ LOWFUNC(WRITE,NONE,2,compemu_raw_MERGE_rr,(RW4 d, RR4 s)) } LENDFUNC(WRITE,NONE,2,compemu_raw_MERGE_rr,(RW4 d, RR4 s)) +LOWFUNC(WRITE,NONE,2,compemu_raw_MERGE8_rr,(RW4 d, RR4 s)) +{ +#ifdef ARMV6T2 + UBFX_rrii(REG_WORK1, s, 8, 24); + BFI_rrii(d, REG_WORK1, 8, 31); +#else + AND_rri(REG_WORK1, s, 0xffffff00); + BIC_rri(d, d, 0xffffff00); + ORR_rrr(d, d, REG_WORK1); +#endif +} +LENDFUNC(WRITE,NONE,2,compemu_raw_MERGE8_rr,(RW4 d, RR4 s)) + LOWFUNC(WRITE,RMW,2,compemu_raw_add_l_mi,(IMM d, IMM s)) { #ifdef ARMV6T2 - MOVW_ri16(REG_WORK1, d); - MOVT_ri16(REG_WORK1, d >> 16); - LDR_rR(REG_WORK2, REG_WORK1); + if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) { + uae_s32 idx = d - (uae_u32) & regs; + LDR_rRI(REG_WORK2, R_REGSTRUCT, idx); + } else { + MOVW_ri16(REG_WORK1, d); + MOVT_ri16(REG_WORK1, d >> 16); + LDR_rR(REG_WORK2, REG_WORK1); + } - MOVW_ri16(REG_WORK3, s); - if(s >> 16) - MOVT_ri16(REG_WORK3, s >> 16); - ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK3); + if(CHECK32(s)) { + ADD_rri(REG_WORK2, REG_WORK2, s); + } else { + MOVW_ri16(REG_WORK3, s); + if(s >> 16) + MOVT_ri16(REG_WORK3, s >> 16); + ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK3); + } - STR_rR(REG_WORK2, REG_WORK1); + if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) { + uae_s32 idx = d - (uae_u32) & regs; + STR_rRI(REG_WORK2, R_REGSTRUCT, idx); + } else { + STR_rR(REG_WORK2, REG_WORK1); + } #else uae_s32 offs = data_long_offs(d); LDR_rRI(REG_WORK1, RPC_INDEX, offs); @@ -554,18 +520,6 @@ LOWFUNC(WRITE,RMW,2,compemu_raw_add_l_mi,(IMM d, IMM s)) } LENDFUNC(WRITE,RMW,2,compemu_raw_add_l_mi,(IMM d, IMM s)) -LOWFUNC(WRITE,NONE,2,compemu_raw_and_TAGMASK,(RW4 d)) -{ - // TAGMASK is 0x0000ffff -#ifdef ARMV6T2 - BFC_rii(d, 16, 31); -#else - BIC_rri(d, d, 0x00ff0000); - BIC_rri(d, d, 0xff000000); -#endif -} -LENDFUNC(WRITE,NONE,2,compemu_raw_and_TAGMASK,(RW4 d)) - LOWFUNC(WRITE,READ,2,compemu_raw_cmp_l_mi,(MEMR d, IMM s)) { clobber_flags(); @@ -845,157 +799,140 @@ STATIC_INLINE void compemu_raw_jcc_l_oponly(int cc) { switch (cc) { case NATIVE_CC_HI: // HI - BEQ_i(2); // beq no jump - BCS_i(1); // bcs no jump - // jump - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); - // no jump + BEQ_i(0); // beq no jump + BCC_i(0); // bcc jump break; case NATIVE_CC_LS: // LS BEQ_i(0); // beq jump - BCC_i(1); // bcc no jump + BCC_i(0); // bcc no jump // jump - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + B_i(0); // no jump break; case NATIVE_CC_F_OGT: // Jump if valid and greater than - BVS_i(2); // do not jump if NaN - BLE_i(1); // do not jump if less or equal - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + BVS_i(0); // do not jump if NaN + BGT_i(0); // jump if greater than break; case NATIVE_CC_F_OGE: // Jump if valid and greater or equal - BVS_i(2); // do not jump if NaN - BCC_i(1); // do not jump if carry cleared - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + BVS_i(0); // do not jump if NaN + BCS_i(0); // jump if carry set break; case NATIVE_CC_F_OLT: // Jump if vaild and less than - BVS_i(2); // do not jump if NaN - BCS_i(1); // do not jump if carry set - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + BVS_i(0); // do not jump if NaN + BCC_i(0); // jump if carry cleared break; case NATIVE_CC_F_OLE: // Jump if valid and less or equal - BVS_i(2); // do not jump if NaN - BGT_i(1); // do not jump if greater than - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + BVS_i(0); // do not jump if NaN + BLE_i(0); // jump if less or equal break; case NATIVE_CC_F_OGL: // Jump if valid and greator or less - BVS_i(2); // do not jump if NaN - BEQ_i(1); // do not jump if equal - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + BVS_i(0); // do not jump if NaN + BNE_i(0); // jump if not equal break; case NATIVE_CC_F_OR: // Jump if valid - BVS_i(1); // do not jump if NaN - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + BVC_i(0); break; case NATIVE_CC_F_UN: // Jump if NAN - BVC_i(1); // do not jump if valid - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + BVS_i(0); break; case NATIVE_CC_F_UEQ: // Jump if NAN or equal BVS_i(0); // jump if NaN - BNE_i(1); // do not jump if greater or less + BNE_i(0); // do not jump if greater or less // jump - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + B_i(0); break; case NATIVE_CC_F_UGT: // Jump if NAN or greater than BVS_i(0); // jump if NaN - BLS_i(1); // do not jump if lower or same + BLS_i(0); // do not jump if lower or same // jump - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + B_i(0); break; case NATIVE_CC_F_UGE: // Jump if NAN or greater or equal BVS_i(0); // jump if NaN - BMI_i(1); // do not jump if lower + BMI_i(0); // do not jump if lower // jump - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + B_i(0); break; case NATIVE_CC_F_ULT: // Jump if NAN or less than BVS_i(0); // jump if NaN - BGE_i(1); // do not jump if greater or equal + BGE_i(0); // do not jump if greater or equal // jump - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + B_i(0); break; case NATIVE_CC_F_ULE: // Jump if NAN or less or equal BVS_i(0); // jump if NaN - BGT_i(1); // do not jump if greater + BGT_i(0); // do not jump if greater // jump - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + B_i(0); break; default: - CC_B_i(cc^1, 1); - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + CC_B_i(cc, 0); break; } - // emit of target will be done by caller + // emit of target into last branch will be done by caller } STATIC_INLINE void compemu_raw_handle_except(IMM cycles) { - uae_u32* branchadd; + uae_u32* branchadd; - clobber_flags(); -#ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception)); - MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16); -#else - auto offset = data_long_offs((uae_u32)(&jit_exception)); - LDR_rRI(REG_WORK2, RPC_INDEX, offset); -#endif - LDR_rR(REG_WORK1, REG_WORK2); + clobber_flags(); + + MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception)); + MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16); + LDR_rR(REG_WORK1, REG_WORK2); TST_rr(REG_WORK1, REG_WORK1); - BNE_i(1); // exception, skip LDR and target - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // jump to next opcode branchadd = (uae_u32*)get_target(); - skip_long(); // emit of target (next opcode handler) will be done later - - // countdown -= scaled_cycles(totcycles); - uae_s32 offs = (uae_u32)&countdown - (uae_u32)®s; + BEQ_i(0); // no exception, jump to next instruction + + // countdown -= scaled_cycles(totcycles); + uae_s32 offs = (uae_u32)&countdown - (uae_u32)®s; LDR_rRI(REG_WORK1, R_REGSTRUCT, offs); - if (CHECK32(cycles)) { - SUBS_rri(REG_WORK1, REG_WORK1, cycles); - } - else { + if(CHECK32(cycles)) { + SUBS_rri(REG_WORK1, REG_WORK1, cycles); + } else { #ifdef ARMV6T2 - MOVW_ri16(REG_WORK2, cycles); - if (cycles >> 16) - MOVT_ri16(REG_WORK2, cycles >> 16); + MOVW_ri16(REG_WORK2, cycles); + if(cycles >> 16) + MOVT_ri16(REG_WORK2, cycles >> 16); #else - int offs2 = data_long_offs(cycles); - LDR_rRI(REG_WORK2, RPC_INDEX, offs2); + int offs2 = data_long_offs(cycles); + LDR_rRI(REG_WORK2, RPC_INDEX, offs2); #endif - SUBS_rrr(REG_WORK1, REG_WORK1, REG_WORK2); - } + SUBS_rrr(REG_WORK1, REG_WORK1, REG_WORK2); + } STR_rRI(REG_WORK1, R_REGSTRUCT, offs); - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // - emit_long((uintptr)popall_execute_exception); - + raw_pop_preserved_regs(); + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // + emit_long((uintptr)execute_exception); + // Write target of next instruction - write_jmp_target(branchadd, (cpuop_func*)get_target()); - + write_jmp_target(branchadd, (uintptr)get_target()); } -STATIC_INLINE void compemu_raw_jl(uae_u32 t) +STATIC_INLINE void compemu_raw_maybe_recompile(uae_u32 t) { #ifdef ARMV6T2 - MOVW_ri16(REG_WORK1, t); - MOVT_ri16(REG_WORK1, t >> 16); - CC_BX_r(NATIVE_CC_LT, REG_WORK1); + BGE_i(2); + raw_pop_preserved_regs(); + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + emit_long(t); #else uae_s32 offs = data_long_offs(t); CC_LDR_rRI(NATIVE_CC_LT, RPC_INDEX, RPC_INDEX, offs); @@ -1004,8 +941,14 @@ STATIC_INLINE void compemu_raw_jl(uae_u32 t) STATIC_INLINE void compemu_raw_jmp(uae_u32 t) { - LDR_rRI(RPC_INDEX, RPC_INDEX, -4); - emit_long(t); + if(t >= (uae_u32)popallspace && t < (uae_u32)(popallspace + POPALLSPACE_SIZE + MAX_JIT_CACHE * 1024)) { + uae_u32* loc = (uae_u32*)get_target(); + B_i(0); + write_jmp_target(loc, t); + } else { + LDR_rRI(RPC_INDEX, RPC_INDEX, -4); + emit_long(t); + } } STATIC_INLINE void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) @@ -1024,10 +967,11 @@ STATIC_INLINE void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m) emit_long(base); } -STATIC_INLINE void compemu_raw_jnz(uae_u32 t) +STATIC_INLINE void compemu_raw_maybe_cachemiss(uae_u32 t) { #ifdef ARMV6T2 - BEQ_i(1); + BEQ_i(2); + raw_pop_preserved_regs(); LDR_rRI(RPC_INDEX, RPC_INDEX, -4); emit_long(t); #else @@ -1093,19 +1037,22 @@ LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_inreg,(RR4 rr_pc, IMM cycles)) BIC_rri(rr_pc, rr_pc, 0x00ff0000); BIC_rri(rr_pc, rr_pc, 0xff000000); #endif - LDR_rRI(REG_WORK1, RPC_INDEX, 4); // + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // LDR_rRR_LSLi(RPC_INDEX, REG_WORK1, rr_pc, 2); - LDR_rRI(RPC_INDEX, RPC_INDEX, 0); // + raw_pop_preserved_regs(); + LDR_rRI(RPC_INDEX, RPC_INDEX, 0); // emit_long((uintptr)cache_tags); - emit_long((uintptr)popall_do_nothing); + emit_long((uintptr)do_nothing); } LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_inreg,(RR4 rr_pc, IMM cycles)) -LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v)) +//LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v)) +STATIC_INLINE uae_u32* compemu_raw_endblock_pc_isconst(IMM cycles, IMM v) { + uae_u32* tba; clobber_flags(); // countdown -= scaled_cycles(totcycles); @@ -1126,20 +1073,29 @@ LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v)) } STR_rRI(REG_WORK1, R_REGSTRUCT, offs); - CC_LDR_rRI(NATIVE_CC_MI^1, RPC_INDEX, RPC_INDEX, 16); // + tba = (uae_u32*)get_target(); + CC_B_i(NATIVE_CC_MI^1, 0); // - LDR_rRI(REG_WORK1, RPC_INDEX, 4); // + LDR_rRI(REG_WORK1, RPC_INDEX, 8); // offs = (uae_u32)®s.pc_p - (uae_u32)®s; STR_rRI(REG_WORK1, R_REGSTRUCT, offs); - LDR_rRI(RPC_INDEX, RPC_INDEX, 0); // + raw_pop_preserved_regs(); + LDR_rRI(RPC_INDEX, RPC_INDEX, 0); // emit_long(v); - emit_long((uintptr)popall_do_nothing); - - // -} -LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v)) + emit_long((uintptr)do_nothing); + return tba; +} +//LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v)) + + +LOWFUNC(NONE,READ,2,compemu_raw_tag_pc,(W4 d, MEMR s)) +{ + uae_s32 idx = (uae_u32)(s) - (uae_u32)®s; + LDRH_rRI(d, R_REGSTRUCT, idx); +} +LENDFUNC(NONE,READ,2,compemu_raw_tag_pc,(W4 d, MEMR s)) /************************************************************************* * FPU stuff * @@ -1477,52 +1433,49 @@ LOWFUNC(NONE,WRITE,2,raw_fp_from_exten_mr,(RR4 adr, FR s)) { uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; - VMOVi_to_ARM_rd(REG_WORK2, s, 1); // get high part of double + VMOVi_to_ARM_rd(REG_WORK1, s, 1); // get high part of double VCMP64_d0(s); VMRS_CPSR(); - BEQ_i(22); // iszero + BEQ_i(20); // iszero - UBFX_rrii(REG_WORK3, REG_WORK2, 20, 11); // get exponent - MOVW_ri16(REG_WORK1, 2047); - CMP_rr(REG_WORK3, REG_WORK1); - BEQ_i(15); // isnan + UBFX_rrii(REG_WORK2, REG_WORK1, 20, 11); // get exponent + MOVW_ri16(REG_WORK3, 2047); + CMP_rr(REG_WORK2, REG_WORK3); + BEQ_i(13); // isnan - MOVW_ri16(REG_WORK1, 15360); // diff of bias between double and long double - ADD_rrr(REG_WORK3, REG_WORK3, REG_WORK1); // exponent done - AND_rri(REG_WORK2, REG_WORK2, 0x80000000); // extract sign - ORR_rrrLSLi(REG_WORK3, REG_WORK2, REG_WORK3, 16); // merge sign and exponent + MOVW_ri16(REG_WORK3, 15360); // diff of bias between double and long double + ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK3); // exponent done + AND_rri(REG_WORK1, REG_WORK1, 0x80000000); // extract sign + ORR_rrrLSLi(REG_WORK2, REG_WORK1, REG_WORK2, 16); // merge sign and exponent - LDR_rRI(REG_WORK1, R_REGSTRUCT, offs); - ADD_rrr(REG_WORK1, adr, REG_WORK1); + LDR_rRI(REG_WORK3, R_REGSTRUCT, offs); + ADD_rrr(REG_WORK3, adr, REG_WORK3); - REV_rr(REG_WORK3, REG_WORK3); - STRH_rR(REG_WORK3, REG_WORK1); // write exponent + REV_rr(REG_WORK2, REG_WORK2); + STRH_rR(REG_WORK2, REG_WORK3); // write exponent VSHL64_ddi(SCRATCH_F64_1, s, 11); // shift mantissa to correct position - VMOV64_rrd(REG_WORK3, REG_WORK2, SCRATCH_F64_1); - ORR_rri(REG_WORK2, REG_WORK2, 0x80000000); // insert explicit 1 - REV_rr(REG_WORK2, REG_WORK2); - REV_rr(REG_WORK3, REG_WORK3); - STR_rRI(REG_WORK2, REG_WORK1, 4); - STR_rRI(REG_WORK3, REG_WORK1, 8); - B_i(10); // end_of_op + VREV64_8_dd(SCRATCH_F64_1, SCRATCH_F64_1); + VMOV64_rrd(REG_WORK1, REG_WORK2, SCRATCH_F64_1); + ORR_rri(REG_WORK1, REG_WORK1, 0x80); // insert explicit 1 + STRD_rRI(REG_WORK1, REG_WORK3, 4); + B_i(9); // end_of_op // isnan - MOVW_ri16(REG_WORK2, 0x7fff); - LSL_rri(REG_WORK2, REG_WORK2, 16); - MVN_ri(REG_WORK3, 0); + MOVW_ri16(REG_WORK1, 0x7fff); + LSL_rri(REG_WORK1, REG_WORK1, 16); + MVN_ri(REG_WORK2, 0); // iszero - CC_AND_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, 0x80000000); // extract sign - CC_MOV_ri(NATIVE_CC_EQ, REG_WORK3, 0); + CC_AND_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, 0x80000000); // extract sign + CC_MOV_ri(NATIVE_CC_EQ, REG_WORK2, 0); - LDR_rRI(REG_WORK1, R_REGSTRUCT, offs); - ADD_rrr(REG_WORK1, adr, REG_WORK1); + LDR_rRI(REG_WORK3, R_REGSTRUCT, offs); + ADD_rrr(REG_WORK3, adr, REG_WORK3); - REV_rr(REG_WORK2, REG_WORK2); - STR_rR(REG_WORK2, REG_WORK1); - STR_rRI(REG_WORK3, REG_WORK1, 4); - STR_rRI(REG_WORK3, REG_WORK1, 8); + REV_rr(REG_WORK1, REG_WORK1); + STRD_rR(REG_WORK1, REG_WORK3); + STR_rRI(REG_WORK2, REG_WORK3, 8); // end_of_op @@ -1533,44 +1486,42 @@ LOWFUNC(NONE,READ,2,raw_fp_to_exten_rm,(FW d, RR4 adr)) { uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s; - LDR_rRI(REG_WORK1, R_REGSTRUCT, offs); - ADD_rrr(REG_WORK1, adr, REG_WORK1); + LDR_rRI(REG_WORK3, R_REGSTRUCT, offs); + ADD_rrr(REG_WORK3, adr, REG_WORK3); - LDR_rRI(REG_WORK2, REG_WORK1, 4); - LDR_rRI(REG_WORK3, REG_WORK1, 8); - REV_rr(REG_WORK2, REG_WORK2); - REV_rr(REG_WORK3, REG_WORK3); - BIC_rri(REG_WORK2, REG_WORK2, 0x80000000); // clear explicit 1 - VMOV64_drr(d, REG_WORK3, REG_WORK2); + LDRD_rRI(REG_WORK1, REG_WORK3, 4); + BIC_rri(REG_WORK1, REG_WORK1, 0x80); // clear explicit 1 + VMOV64_drr(d, REG_WORK1, REG_WORK2); + VREV64_8_dd(d, d); - LDR_rR(REG_WORK2, REG_WORK1); - REV_rr(REG_WORK2, REG_WORK2); - LSR_rri(REG_WORK2, REG_WORK2, 16); // exponent now in lower half - MOVW_ri16(REG_WORK3, 0x7fff); - ANDS_rrr(REG_WORK3, REG_WORK3, REG_WORK2); + LDRH_rR(REG_WORK1, REG_WORK3); + REV16_rr(REG_WORK1, REG_WORK1); // exponent now in lower half + + MOVW_ri16(REG_WORK2, 0x7fff); + ANDS_rrr(REG_WORK2, REG_WORK2, REG_WORK1); BNE_i(9); // not_zero VCMP64_d0(d); VMRS_CPSR(); BNE_i(6); // not zero // zero VMOV_I64_dimmI(d, 0x00); - TST_ri(REG_WORK2, 0x8000); // check sign + TST_ri(REG_WORK1, 0x8000); // check sign BEQ_i(12); // end_of_op - MOV_ri(REG_WORK2, 0x80000000); - MOV_ri(REG_WORK3, 0); - VMOV64_drr(d, REG_WORK3, REG_WORK2); + MOV_ri(REG_WORK1, 0x80000000); + MOV_ri(REG_WORK2, 0); + VMOV64_drr(d, REG_WORK2, REG_WORK1); B_i(8); // end_of_op // not_zero - MOVW_ri16(REG_WORK1, 15360); // diff of bias between double and long double - SUB_rrr(REG_WORK3, REG_WORK3, REG_WORK1); // exponent done, ToDo: check for carry -> result gets Inf in double - UBFX_rrii(REG_WORK2, REG_WORK2, 15, 1); // extract sign - BFI_rrii(REG_WORK3, REG_WORK2, 11, 11); // insert sign + MOVW_ri16(REG_WORK3, 15360); // diff of bias between double and long double + SUB_rrr(REG_WORK2, REG_WORK2, REG_WORK3); // exponent done, ToDo: check for carry -> result gets Inf in double + UBFX_rrii(REG_WORK1, REG_WORK1, 15, 1); // extract sign + BFI_rrii(REG_WORK2, REG_WORK1, 11, 11); // insert sign VSHR64_ddi(d, d, 11); // shift mantissa to correct position - LSL_rri(REG_WORK3, REG_WORK3, 20); - VMOV_I64_dimmI(0, 0x00); - VMOVi_from_ARM_dr(0, REG_WORK3, 1); - VORR_ddd(d, d, 0); + LSL_rri(REG_WORK2, REG_WORK2, 20); + VMOV_I64_dimmI(SCRATCH_F64_1, 0x00); + VMOVi_from_ARM_dr(SCRATCH_F64_1, REG_WORK2, 1); + VORR_ddd(d, d, SCRATCH_F64_1); // end_of_op } diff --git a/src/jit/codegen_arm.h b/src/jit/codegen_arm.h index 7a54d4f3..2a4cdb00 100644 --- a/src/jit/codegen_arm.h +++ b/src/jit/codegen_arm.h @@ -645,8 +645,8 @@ enum { #define CC_ADD_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs)) #define CC_ADD_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_RRX(Rm)) -#define ADD_rri8(cc,Rd,Rn,i) CC_ADD_rri8(NATIVE_CC_AL,Rd,Rn,i) -#define ADD_rri8RORi(cc,Rd,Rn,Rm,i) CC_ADD_rri8RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) +#define ADD_rri8(Rd,Rn,i) CC_ADD_rri8(NATIVE_CC_AL,Rd,Rn,i) +#define ADD_rri8RORi(Rd,Rn,Rm,i) CC_ADD_rri8RORi(NATIVE_CC_AL,Rd,Rn,Rm,i) #define ADD_rri(Rd,Rn,i) CC_ADD_rri(NATIVE_CC_AL,Rd,Rn,i) #define ADD_rrr(Rd,Rn,Rm) CC_ADD_rrr(NATIVE_CC_AL,Rd,Rn,Rm) @@ -1320,6 +1320,12 @@ enum { #define CC_STM_Ri(cc,Rn,i) _W(((cc) << 28) | (0x8 << 24) | (0x8 << 20) | ((Rn) << 16) | i) #define STM_Ri(Rn,i) CC_STM_Ri(NATIVE_CC_AL,Rn,i) +#define CC_MLS_rrrr(cc,Rd,Rn,Rm,Ra) _W(((cc) << 28) | (0x0 << 24) | (0x6 << 20) | (Rd << 16) | (Ra << 12) | (Rm << 8) | (0x9 << 4) | Rn) +#define MLS_rrrr(Rd,Rn,Rm,Ra) CC_MLS_rrrr(NATIVE_CC_AL,Rd,Rn,Rm,Ra) + +#define CC_SMULxy_rrr(cc,Rd,Rn,Rm,x,y) _W(((cc) << 28) | (0x1 << 24) | (0x6 << 20) | (Rd << 16) | (0x0 << 12) | (Rm << 8) | (0x8 << 4) | (Rn) | (x << 5) | (y << 6)) +#define SMULxy_rrr(Rd,Rn,Rm,x,y) CC_SMULxy_rrr(NATIVE_CC_AL,Rd,Rn,Rm,x,y) + // ARMv6T2 //#ifdef ARMV6T2 @@ -1492,7 +1498,10 @@ enum { #define FIMM6(imm) ((imm) << 16) #define VSHL64_ddi(Dd,Dm,imm) _W((0xf << 28) | (0x2 << 24) | (0x8 << 20) | (0x5 << 8) | (0x9 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm) | FIMM6(imm)) #define VSHR64_ddi(Dd,Dm,imm) _W((0xf << 28) | (0x3 << 24) | (0x8 << 20) | (0x0 << 8) | (0x9 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm) | FIMM6(64-imm)) +#define VSLI64_ddi(Dd,Dm,i) _W((0xf << 28) | (0x3 << 24) | (0x8 << 20) | (0x5 << 8) | (0x9 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm) | FIMM6(i)) #define VORR_ddd(Dd,Dn,Dm) _W((0xf << 28) | (0x2 << 24) | (0x2 << 20) | (0x1 << 8) | (0x1 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm)) +#define VREV64_8_dd(Dd,Dm) _W((0xf << 28) | (0x3 << 24) | (0xb << 20) | (0x0 << 16) | (0x0 << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm)) + #endif /* ARM_RTASM_H */ diff --git a/src/jit/compemu.cpp b/src/jit/compemu.cpp index 28ba27e0..1e4590bf 100644 --- a/src/jit/compemu.cpp +++ b/src/jit/compemu.cpp @@ -33301,14 +33301,12 @@ if (special_mem) { FAIL(1); return 0; } - int src=scratchie++; - int dst=scratchie++; { int srca=scratchie++; mov_l_rr(srca,srcreg+8); { int dsta = scratchie++; mov_l_ri(dsta,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */ jnf_ADD_im8(srcreg + 8, srcreg + 8, 16); - jnf_MOVE16(dst, src); + jnf_MOVE16(dsta, srca); }}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; return 0; @@ -33325,14 +33323,12 @@ if (special_mem) { FAIL(1); return 0; } - int src=scratchie++; - int dst=scratchie++; { int srca = scratchie++; mov_l_ri(srca,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */ { int dsta=scratchie++; mov_l_rr(dsta,dstreg+8); jnf_ADD_im8(dstreg + 8, dstreg + 8, 16); - jnf_MOVE16(dst, src); + jnf_MOVE16(dsta, srca); }}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; return 0; @@ -33349,14 +33345,12 @@ if (special_mem) { FAIL(1); return 0; } - int src=scratchie++; - int dst=scratchie++; { int srca=dodgy?scratchie++:srcreg+8; if (dodgy) mov_l_rr(srca,srcreg+8); { int dsta = scratchie++; mov_l_ri(dsta,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */ - jnf_MOVE16(dst, src); + jnf_MOVE16(dsta, srca); }}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; return 0; @@ -33373,14 +33367,12 @@ if (special_mem) { FAIL(1); return 0; } - int src=scratchie++; - int dst=scratchie++; { int srca = scratchie++; mov_l_ri(srca,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */ { int dsta=dodgy?scratchie++:dstreg+8; if (dodgy) mov_l_rr(dsta,dstreg+8); - jnf_MOVE16(dst, src); + jnf_MOVE16(dsta, srca); }}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; return 0; @@ -33398,15 +33390,15 @@ if (special_mem) { FAIL(1); return 0; } - int src=scratchie++; - int dst=scratchie++; + int srca=scratchie++; + int dsta=scratchie++; uae_u16 dstreg = ((comp_get_iword((m68k_pc_offset+=2)-2))>>12) & 0x07; - jnf_MOVE(src, srcreg + 8); - jnf_MOVE(dst, dstreg + 8); + jnf_MOVE(srca, srcreg + 8); + jnf_MOVE(dsta, dstreg + 8); if (srcreg != dstreg) jnf_ADD_im8(srcreg + 8, srcreg + 8, 16); jnf_ADD_im8(dstreg + 8, dstreg + 8, 16); - jnf_MOVE16(dst, src); + jnf_MOVE16(dsta, srca); } if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; return 0; @@ -64819,14 +64811,12 @@ if (special_mem) { FAIL(1); return 0; } - int src=scratchie++; - int dst=scratchie++; { int srca=scratchie++; mov_l_rr(srca,srcreg+8); { int dsta = scratchie++; mov_l_ri(dsta,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */ jnf_ADD_im8(srcreg + 8, srcreg + 8, 16); - jnf_MOVE16(dst, src); + jnf_MOVE16(dsta, srca); }}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; return 0; @@ -64843,14 +64833,12 @@ if (special_mem) { FAIL(1); return 0; } - int src=scratchie++; - int dst=scratchie++; { int srca = scratchie++; mov_l_ri(srca,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */ { int dsta=scratchie++; mov_l_rr(dsta,dstreg+8); jnf_ADD_im8(dstreg + 8, dstreg + 8, 16); - jnf_MOVE16(dst, src); + jnf_MOVE16(dsta, srca); }}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; return 0; @@ -64867,14 +64855,12 @@ if (special_mem) { FAIL(1); return 0; } - int src=scratchie++; - int dst=scratchie++; { int srca=dodgy?scratchie++:srcreg+8; if (dodgy) mov_l_rr(srca,srcreg+8); { int dsta = scratchie++; mov_l_ri(dsta,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */ - jnf_MOVE16(dst, src); + jnf_MOVE16(dsta, srca); }}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; return 0; @@ -64891,14 +64877,12 @@ if (special_mem) { FAIL(1); return 0; } - int src=scratchie++; - int dst=scratchie++; { int srca = scratchie++; mov_l_ri(srca,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */ { int dsta=dodgy?scratchie++:dstreg+8; if (dodgy) mov_l_rr(dsta,dstreg+8); - jnf_MOVE16(dst, src); + jnf_MOVE16(dsta, srca); }}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; return 0; @@ -64916,15 +64900,15 @@ if (special_mem) { FAIL(1); return 0; } - int src=scratchie++; - int dst=scratchie++; + int srca=scratchie++; + int dsta=scratchie++; uae_u16 dstreg = ((comp_get_iword((m68k_pc_offset+=2)-2))>>12) & 0x07; - jnf_MOVE(src, srcreg + 8); - jnf_MOVE(dst, dstreg + 8); + jnf_MOVE(srca, srcreg + 8); + jnf_MOVE(dsta, dstreg + 8); if (srcreg != dstreg) jnf_ADD_im8(srcreg + 8, srcreg + 8, 16); jnf_ADD_im8(dstreg + 8, dstreg + 8, 16); - jnf_MOVE16(dst, src); + jnf_MOVE16(dsta, srca); } if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc(); if (failure) m68k_pc_offset=m68k_pc_offset_thisinst; return 0; diff --git a/src/jit/compemu.h b/src/jit/compemu.h index 957f24e8..9e8b5aeb 100644 --- a/src/jit/compemu.h +++ b/src/jit/compemu.h @@ -162,7 +162,7 @@ extern uae_u32 needed_flags; extern uae_u8* comp_pc_p; extern void* pushall_call_handler; -#define VREGS 32 +#define VREGS 24 #define VFREGS 10 #define INMEM 1 @@ -205,19 +205,11 @@ STATIC_INLINE int end_block(uae_u16 opcode) #define PC_P 16 #define FLAGX 17 #define FLAGTMP 18 -#define NEXT_HANDLER 19 -#define S1 20 -#define S2 21 -#define S3 22 -#define S4 23 -#define S5 24 -#define S6 25 -#define S7 26 -#define S8 27 -#define S9 28 -#define S10 29 -#define S11 30 -#define S12 31 +#define S1 19 +#define S2 20 +#define S3 21 +#define S4 22 +#define S5 23 #define FP_RESULT 8 #define FS1 9 @@ -263,14 +255,6 @@ typedef struct { fn_status fat[N_FREGS]; } bigstate; -typedef struct { - /* Integer part */ - uae_s8 virt[VREGS]; - uae_s8 nat[N_REGS]; -} smallstate; - -extern int touchcnt; - #define IMM uae_s32 #define RR1 uae_u32 #define RR2 uae_u32 @@ -329,7 +313,6 @@ extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp); #define SYNC_PC_OFFSET 124 extern void sync_m68k_pc(void); extern uae_u32 get_const(int r); -extern int is_const(int r); extern void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond); extern void register_possible_exception(void); @@ -385,11 +368,9 @@ typedef struct blockinfo_t { uae_u8 optlevel; uae_u8 needed_flags; uae_u8 status; - uae_u8 havestate; dependency dep[2]; /* Holds things we depend on */ dependency* deplist; /* List of things that depend on this */ - smallstate env; } blockinfo; #define BI_INVALID 0 diff --git a/src/jit/compemu_midfunc_arm.cpp b/src/jit/compemu_midfunc_arm.cpp index 9fbb8c4f..628ba598 100644 --- a/src/jit/compemu_midfunc_arm.cpp +++ b/src/jit/compemu_midfunc_arm.cpp @@ -477,13 +477,10 @@ STATIC_INLINE void flush_cpu_icache(void *start, void *stop) #endif } -STATIC_INLINE void write_jmp_target(uae_u32* jmpaddr, cpuop_func* a) { - *(jmpaddr) = (uae_u32)a; - flush_cpu_icache((void *)jmpaddr, (void *)&jmpaddr[1]); -} - -STATIC_INLINE void emit_jmp_target(uae_u32 a) { - emit_long((uae_u32)a); +STATIC_INLINE void write_jmp_target(uae_u32* jmpaddr, uintptr a) { + uae_s32 off = ((uae_u32)a - (uae_u32)jmpaddr - 8) >> 2; + *(jmpaddr) = (*(jmpaddr) & 0xff000000) | (off & 0x00ffffff); + flush_cpu_icache((void *)jmpaddr, (void *)&jmpaddr[1]); } diff --git a/src/jit/compemu_midfunc_arm2.cpp b/src/jit/compemu_midfunc_arm2.cpp index 98309e3c..cf2b287d 100644 --- a/src/jit/compemu_midfunc_arm2.cpp +++ b/src/jit/compemu_midfunc_arm2.cpp @@ -278,18 +278,6 @@ MENDFUNC(3,jff_ADD_l,(W4 d, RR4 s, RR4 v)) * Flags: Not affected. * */ -MIDFUNC(2,jnf_ADDA_b,(W4 d, RR1 s)) -{ - s = readreg(s, 4); - d = rmw(d, 4, 4); - - SXTAB_rrr(d, d, s); - - unlock2(d); - unlock2(s); -} -MENDFUNC(2,jnf_ADDA_b,(W4 d, RR1 s)) - MIDFUNC(2,jnf_ADDA_w,(W4 d, RR2 s)) { s = readreg(s, 4); @@ -337,13 +325,8 @@ MIDFUNC(3,jnf_ADDX,(W4 d, RR4 s, RR4 v)) v = readreg(v, 4); d = writereg(d, 4); - clobber_flags(); - - // Restore X to carry (don't care about other flags) - LSL_rri(REG_WORK1, x, 29); - MSR_CPSRf_r(REG_WORK1); - - ADC_rrr(d, s, v); + ADD_rrr(d, s, v); + ADD_rrr(d, d, x); unlock2(d); unlock2(s); @@ -1457,7 +1440,7 @@ MIDFUNC(2,jnf_BSET_b,(RW4 d, RR4 s)) return; } s = readreg(s, 4); - d = rmw(d, 4 ,4); + d = rmw(d, 4, 4); AND_rri(REG_WORK1, s, 7); MOV_ri(REG_WORK2, 1); @@ -1852,7 +1835,7 @@ MENDFUNC(2,jff_DBCC,(RR2 d, IMM cc)) * C Always cleared. * */ - MIDFUNC(3,jnf_DIVU,(W4 d, RR4 s1, RR4 s2)) +MIDFUNC(3,jnf_DIVU,(W4 d, RR4 s1, RR4 s2)) { s1 = readreg(s1, 4); s2 = readreg(s2, 4); @@ -1873,14 +1856,14 @@ MENDFUNC(2,jff_DBCC,(RR2 d, IMM cc)) #endif STR_rR(REG_WORK1, REG_WORK2); #ifdef ARM_HAS_DIV - B_i(5); // end_of_op - + B_i(4); // end_of_op + // src is not 0 UDIV_rrr(REG_WORK1, s1, REG_WORK3); #else - B_i(11); // end_of_op - -// src is not 0 + B_i(10); // end_of_op + + // src is not 0 VMOVi_from_ARM_dr(SCRATCH_F64_1, s1, 0); VMOVi_from_ARM_dr(SCRATCH_F64_2, REG_WORK3, 0); VCVTIuto64_ds(SCRATCH_F64_1, SCRATCH_F32_1); @@ -1889,16 +1872,15 @@ MENDFUNC(2,jff_DBCC,(RR2 d, IMM cc)) VCVT64toIu_sd(SCRATCH_F32_1, SCRATCH_F64_3); VMOVi_to_ARM_rd(REG_WORK1, SCRATCH_F64_1, 0); #endif - - LSRS_rri(REG_WORK2, REG_WORK1, 16); // if result of this is not 0, DIVU overflows -> no result - BNE_i(2); + + LSRS_rri(REG_WORK2, REG_WORK1, 16); // if result of this is not 0, DIVU overflows -> no result + BNE_i(1); // Here we have to calc remainder - MUL_rrr(REG_WORK2, REG_WORK1, REG_WORK3); - SUB_rrr(REG_WORK2, s1, REG_WORK2); - PKHBT_rrrLSLi(d, REG_WORK1, REG_WORK2, 16); + MLS_rrrr(REG_WORK2, REG_WORK1, REG_WORK3, s1); + PKHBT_rrrLSLi(d, REG_WORK1, REG_WORK2, 16); // end_of_op - + unlock2(d); unlock2(s1); unlock2(s2); @@ -1910,7 +1892,7 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2)) s1 = readreg(s1, 4); s2 = readreg(s2, 4); d = writereg(d, 4); - + UNSIGNED16_REG_2_REG(REG_WORK3, s2); TST_rr(REG_WORK3, REG_WORK3); BNE_i(6); // src is not 0 @@ -1930,12 +1912,12 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2)) MOV_ri(REG_WORK1, ARM_Z_FLAG | ARM_V_FLAG); MSR_CPSRf_r(REG_WORK1); #ifdef ARM_HAS_DIV - B_i(12); // end_of_op + B_i(11); // end_of_op // src is not 0 UDIV_rrr(REG_WORK1, s1, REG_WORK3); #else - B_i(18); // end_of_op + B_i(17); // end_of_op // src is not 0 VMOVi_from_ARM_dr(SCRATCH_F64_1, s1, 0); @@ -1952,7 +1934,7 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2)) // Here we handle overflow MOV_ri(REG_WORK1, ARM_V_FLAG | ARM_N_FLAG); MSR_CPSRf_r(REG_WORK1); - B_i(6); + B_i(5); // Here we have to calc flags and remainder LSLS_rri(REG_WORK2, REG_WORK1, 16); // N and Z ok @@ -1960,8 +1942,7 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2)) BIC_rri(REG_WORK2, REG_WORK2, ARM_C_FLAG | ARM_V_FLAG); MSR_CPSRf_r(REG_WORK2); - MUL_rrr(REG_WORK2, REG_WORK1, REG_WORK3); - SUB_rrr(REG_WORK2, s1, REG_WORK2); + MLS_rrrr(REG_WORK2, REG_WORK1, REG_WORK3, s1); PKHBT_rrrLSLi(d, REG_WORK1, REG_WORK2, 16); // end_of_op @@ -2600,11 +2581,11 @@ MIDFUNC(3,jff_LSL_b_imm,(W4 d, RR4 s, IMM i)) MSR_CPSRf_i(0); - LSL_rri(d, s, 24); if (i) { - LSLS_rri(d, d, i); + LSLS_rri(d, s, i + 24); DUPLICACTE_CARRY } else { + LSL_rri(d, s, 24); TST_rr(d, d); } LSR_rri(d, d, 24); @@ -2621,11 +2602,11 @@ MIDFUNC(3,jff_LSL_w_imm,(W4 d, RR4 s, IMM i)) MSR_CPSRf_i(0); - LSL_rri(d, s, 16); if (i) { - LSLS_rri(d, d, i); + LSLS_rri(d, s, i + 16); DUPLICACTE_CARRY } else { + LSL_rri(d, s, 16); TST_rr(d, d); } LSR_rri(d, d, 16); @@ -2938,15 +2919,15 @@ MIDFUNC(3,jff_LSR_b_reg,(W4 d, RR4 s, RR4 i)) d = writereg(d, 4); int x = writereg(FLAGX, 4); - SIGNED8_REG_2_REG(d, s); // Make sure, sign is in MSB if shift count is 0 (to get correct N flag) MSR_CPSRf_i(0); ANDS_rri(REG_WORK1, i, 63); BEQ_i(4); // No shift -> X flag unchanged - AND_rri(d, d, 0xff); // Shift count is not 0 -> unsigned required + AND_rri(d, s, 0xff); // Shift count is not 0 -> unsigned required LSRS_rrr(d, d, REG_WORK1); MOV_ri(x, 1); CC_MOV_ri(NATIVE_CC_CC, x, 0); - B_i(0); + B_i(1); + SIGNED8_REG_2_REG(d, s); // Make sure, sign is in MSB if shift count is 0 (to get correct N flag) TST_rr(d, d); unlock2(x); @@ -2963,15 +2944,15 @@ MIDFUNC(3,jff_LSR_w_reg,(W4 d, RR4 s, RR4 i)) d = writereg(d, 4); int x = writereg(FLAGX, 4); - SIGNED16_REG_2_REG(d, s); // Make sure, sign is in MSB if shift count is 0 (to get correct N flag) MSR_CPSRf_i(0); ANDS_rri(REG_WORK1, i, 63); BEQ_i(4); // No shift -> X flag unchanged - UXTH_rr(d, d); // Shift count is not 0 -> unsigned required + UXTH_rr(d, s); // Shift count is not 0 -> unsigned required LSRS_rrr(d, d, REG_WORK1); MOV_ri(x, 1); CC_MOV_ri(NATIVE_CC_CC, x, 0); - B_i(0); + B_i(1); + SIGNED16_REG_2_REG(d, s); // Make sure, sign is in MSB if shift count is 0 (to get correct N flag) TST_rr(d, d); unlock2(x); @@ -3261,15 +3242,11 @@ MIDFUNC(2,jnf_MOVE16,(RR4 d, RR4 s)) ADD_rrr(s, s, REG_WORK1); ADD_rrr(d, d, REG_WORK1); - LDR_rR(REG_WORK1, s); - LDR_rRI(REG_WORK2, s, 4); - STR_rR(REG_WORK1, d); - STR_rRI(REG_WORK2, d, 4); + LDRD_rR(REG_WORK1, s); + STRD_rR(REG_WORK1, d); - LDR_rRI(REG_WORK1, s, 8); - LDR_rRI(REG_WORK2, s, 12); - STR_rRI(REG_WORK1, d, 8); - STR_rRI(REG_WORK2, d, 12); + LDRD_rRI(REG_WORK1, s, 8); + STRD_rRI(REG_WORK1, d, 8); POP_REGS((1 << s) | (1 << d)); @@ -3329,9 +3306,7 @@ MIDFUNC(2,jnf_MULS,(RW4 d, RR4 s)) s = readreg(s, 4); d = rmw(d, 4, 4); - SIGN_EXTEND_16_REG_2_REG(d, d); - SIGN_EXTEND_16_REG_2_REG(REG_WORK1, s); - MUL_rrr(d, d, REG_WORK1); + SMULxy_rrr(d, d, s, 0, 0); unlock2(s); unlock2(d); @@ -3647,8 +3622,7 @@ MIDFUNC(2,jnf_NEGX_b,(W4 d, RR4 s)) clobber_flags(); // Restore inverted X to carry (don't care about other flags) - LSL_rri(REG_WORK1, x, 29); - EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MVN_rrLSLi(REG_WORK1, x, 29); MSR_CPSRf_r(REG_WORK1); SIGNED8_REG_2_REG(REG_WORK1, s); @@ -3669,8 +3643,7 @@ MIDFUNC(2,jnf_NEGX_w,(W4 d, RR4 s)) clobber_flags(); // Restore inverted X to carry (don't care about other flags) - LSL_rri(REG_WORK1, x, 29); - EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MVN_rrLSLi(REG_WORK1, x, 29); MSR_CPSRf_r(REG_WORK1); SIGNED16_REG_2_REG(REG_WORK1, s); @@ -3691,8 +3664,7 @@ MIDFUNC(2,jnf_NEGX_l,(W4 d, RR4 s)) clobber_flags(); // Restore inverted X to carry (don't care about other flags) - LSL_rri(REG_WORK1, x, 29); - EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MVN_rrLSLi(REG_WORK1, x, 29); MSR_CPSRf_r(REG_WORK1); RSC_rri(d, s, 0); @@ -3713,8 +3685,7 @@ MIDFUNC(2,jff_NEGX_b,(W4 d, RR1 s)) CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); // Restore inverted X to carry (don't care about other flags) - LSL_rri(REG_WORK1, x, 29); - EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MVN_rrLSLi(REG_WORK1, x, 29); MSR_CPSRf_r(REG_WORK1); SIGNED8_REG_2_REG(REG_WORK1, s); @@ -3747,8 +3718,7 @@ MIDFUNC(2,jff_NEGX_w,(W4 d, RR2 s)) CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); // Restore inverted X to carry (don't care about other flags) - LSL_rri(REG_WORK1, x, 29); - EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MVN_rrLSLi(REG_WORK1, x, 29); MSR_CPSRf_r(REG_WORK1); SIGNED16_REG_2_REG(REG_WORK1, s); @@ -3781,8 +3751,7 @@ MIDFUNC(2,jff_NEGX_l,(W4 d, RR4 s)) CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); // Restore inverted X to carry (don't care about other flags) - LSL_rri(REG_WORK1, x, 29); - EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MVN_rrLSLi(REG_WORK1, x, 29); MSR_CPSRf_r(REG_WORK1); RSCS_rri(d, s, 0); @@ -5487,8 +5456,7 @@ MIDFUNC(3,jnf_SUBX,(W4 d, RR4 s, RR4 v)) clobber_flags(); // Restore inverted X to carry (don't care about other flags) - LSL_rri(REG_WORK1, x, 29); - EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MVN_rrLSLi(REG_WORK1, x, 29); MSR_CPSRf_r(REG_WORK1); SBC_rrr(d, s, v); @@ -5511,8 +5479,7 @@ MIDFUNC(3,jff_SUBX_b,(W4 d, RR1 s, RR1 v)) CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); // Restore inverted X to carry (don't care about other flags) - LSL_rri(REG_WORK1, x, 29); - EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MVN_rrLSLi(REG_WORK1, x, 29); MSR_CPSRf_r(REG_WORK1); LSL_rri(REG_WORK1, s, 24); @@ -5548,8 +5515,7 @@ MIDFUNC(3,jff_SUBX_w,(W4 d, RR2 s, RR2 v)) CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); // Restore inverted X to carry (don't care about other flags) - LSL_rri(REG_WORK1, x, 29); - EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MVN_rrLSLi(REG_WORK1, x, 29); MSR_CPSRf_r(REG_WORK1); LSL_rri(REG_WORK1, s, 16); @@ -5585,8 +5551,7 @@ MIDFUNC(3,jff_SUBX_l,(W4 d, RR4 s, RR4 v)) CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG); // Restore inverted X to carry (don't care about other flags) - LSL_rri(REG_WORK1, x, 29); - EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); + MVN_rrLSLi(REG_WORK1, x, 29); MSR_CPSRf_r(REG_WORK1); SBCS_rrr(d, s, v); diff --git a/src/jit/compemu_midfunc_arm2.h b/src/jit/compemu_midfunc_arm2.h index 45abf724..fca84036 100644 --- a/src/jit/compemu_midfunc_arm2.h +++ b/src/jit/compemu_midfunc_arm2.h @@ -46,7 +46,6 @@ DECLARE_MIDFUNC(jff_ADD_w_imm(W4 d, RR2 s, IMM v)); DECLARE_MIDFUNC(jff_ADD_l_imm(W4 d, RR4 s, IMM v)); // ADDA -DECLARE_MIDFUNC(jnf_ADDA_b(W4 d, RR1 s)); DECLARE_MIDFUNC(jnf_ADDA_w(W4 d, RR2 s)); DECLARE_MIDFUNC(jnf_ADDA_l(W4 d, RR4 s)); diff --git a/src/jit/compemu_support.cpp b/src/jit/compemu_support.cpp index 31859be5..34975e9e 100644 --- a/src/jit/compemu_support.cpp +++ b/src/jit/compemu_support.cpp @@ -8,7 +8,7 @@ * This file is part of the ARAnyM project which builds a new and powerful * TOS/FreeMiNT compatible virtual machine running on almost any hardware. * - * JIT compiler m68k -> IA-32 and AMD64 / ARM + * JIT compiler m68k -> ARM * * Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer * Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne @@ -29,9 +29,6 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ -#define writemem_special writemem -#define readmem_special readmem - #include #include "sysconfig.h" @@ -96,9 +93,6 @@ static int untranslated_compfn(const void *e1, const void *e2) static compop_func *compfunctbl[65536]; static compop_func *nfcompfunctbl[65536]; -#ifdef NOFLAGS_SUPPORT -static cpuop_func *nfcpufunctbl[65536]; -#endif uae_u8* comp_pc_p; // gb-- Extra data for Basilisk II/JIT @@ -115,14 +109,7 @@ static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has bee #else #define avoid_fpu (true) #endif -static const int align_loops = 0; // Align the start of loops -static const int align_jumps = 0; // Align the start of jumps -static int optcount[10] = { - 4, // How often a block has to be executed before it is translated - 0, // How often to use naive translation - 0, 0, 0, 0, - -1, -1, -1, -1 -}; +static int optcount = 4; // How often a block has to be executed before it is translated op_properties prop[65536]; @@ -135,14 +122,8 @@ STATIC_INLINE bool is_const_jump(uae_u32 opcode) return (prop[opcode].cflow == fl_const_jump); } -STATIC_INLINE unsigned int cft_map (unsigned int f) -{ - return f; -} - uae_u8* start_pc_p; uae_u32 start_pc; -uae_u32 current_block_pc_p; static uintptr current_block_start_target; uae_u32 needed_flags; static uintptr next_pc_p; @@ -150,21 +131,15 @@ static uintptr taken_pc_p; static int branch_cc; static int redo_current_block; -int segvcount = 0; uae_u8* current_compile_p = NULL; static uae_u8* max_compile_start; uae_u8* compiled_code = NULL; -const int POPALLSPACE_SIZE = 2048; /* That should be enough space */ +const int POPALLSPACE_SIZE = 512; /* That should be enough space */ uae_u8 *popallspace = NULL; void* pushall_call_handler = NULL; -static void* popall_do_nothing = NULL; -static void* popall_exec_nostats = NULL; static void* popall_execute_normal = NULL; -static void* popall_cache_miss = NULL; -static void* popall_recompile_block = NULL; static void* popall_check_checksum = NULL; -static void* popall_execute_exception = NULL; /* The 68k only ever executes from even addresses. So right now, we * waste half the entries in this array @@ -200,52 +175,30 @@ uae_u8 *cache_alloc (int size) #endif -#ifdef NOFLAGS_SUPPORT -/* 68040 */ -extern const struct cputbl op_smalltbl_0_nf[]; -#endif extern const struct comptbl op_smalltbl_0_comp_nf[]; extern const struct comptbl op_smalltbl_0_comp_ff[]; -#ifdef NOFLAGS_SUPPORT -/* 68020 + 68881 */ -extern const struct cputbl op_smalltbl_1_nf[]; -/* 68020 */ -extern const struct cputbl op_smalltbl_2_nf[]; -/* 68010 */ -extern const struct cputbl op_smalltbl_3_nf[]; -/* 68000 */ -extern const struct cputbl op_smalltbl_4_nf[]; -/* 68000 slow but compatible. */ -extern const struct cputbl op_smalltbl_5_nf[]; -#endif - static bigstate live; -static smallstate empty_ss; -static smallstate default_ss; -static int optlev; static int writereg(int r, int size); static void unlock2(int r); static void setlock(int r); static int readreg_specific(int r, int size, int spec); -static int writereg_specific(int r, int size, int spec); static void prepare_for_call_1(void); static void prepare_for_call_2(void); -STATIC_INLINE void align_target(uae_u32 a); STATIC_INLINE void flush_cpu_icache(void *from, void *to); -STATIC_INLINE void write_jmp_target(uae_u32 *jmpaddr, cpuop_func* a); -STATIC_INLINE void emit_jmp_target(uae_u32 a); +STATIC_INLINE void write_jmp_target(uae_u32 *jmpaddr, uintptr a); uae_u32 m68k_pc_offset; /* Flag handling is complicated. * - * x86 instructions create flags, which quite often are exactly what we - * want. So at times, the "68k" flags are actually in the x86 flags. + * ARM instructions create flags, which quite often are exactly what we + * want. So at times, the "68k" flags are actually in the ARM flags. + * Exception: carry is often inverted. * - * Then again, sometimes we do x86 instructions that clobber the x86 + * Then again, sometimes we do ARM instructions that clobber the ARM * flags, but don't represent a corresponding m68k instruction. In that * case, we have to save them. * @@ -309,12 +262,6 @@ STATIC_INLINE void remove_from_list(blockinfo* bi) bi->next->prev_p = bi->prev_p; } -STATIC_INLINE void remove_from_lists(blockinfo* bi) -{ - remove_from_list(bi); - remove_from_cl_list(bi); -} - STATIC_INLINE void add_to_cl_list(blockinfo* bi) { uae_u32 cl = cacheline(bi->pc_p); @@ -375,7 +322,7 @@ STATIC_INLINE void remove_deps(blockinfo* bi) STATIC_INLINE void adjust_jmpdep(dependency* d, cpuop_func* a) { - write_jmp_target(d->jmp_off, a); + write_jmp_target(d->jmp_off, (uintptr)a); } /******************************************************************** @@ -407,7 +354,7 @@ void invalidate_block(blockinfo* bi) int i; bi->optlevel = 0; - bi->count = optcount[0]-1; + bi->count = optcount-1; bi->handler = NULL; bi->handler_to_use = (cpuop_func *)popall_execute_normal; bi->direct_handler = NULL; @@ -478,7 +425,7 @@ STATIC_INLINE void mark_callers_recompile(blockinfo * bi) } } -STATIC_INLINE blockinfo* get_blockinfo_addr_new(void* addr, int setstate) +STATIC_INLINE blockinfo* get_blockinfo_addr_new(void* addr) { blockinfo* bi = get_blockinfo_addr(addr); int i; @@ -653,30 +600,6 @@ STATIC_INLINE void alloc_blockinfos(void) } } -bool check_prefs_changed_comp(bool checkonly) -{ - bool changed = 0; - - if (currprefs.compfpu != changed_prefs.compfpu || - currprefs.fpu_strict != changed_prefs.fpu_strict || - currprefs.cachesize != changed_prefs.cachesize) - changed = 1; - - if (checkonly) - return changed; - - currprefs.compfpu = changed_prefs.compfpu; - currprefs.fpu_strict = changed_prefs.fpu_strict; - - if (currprefs.cachesize != changed_prefs.cachesize) { - currprefs.cachesize = changed_prefs.cachesize; - alloc_cache(); - changed = 1; - } - - return changed; -} - /******************************************************************** * Functions to emit data into memory, and other general support * ********************************************************************/ @@ -701,29 +624,14 @@ STATIC_INLINE void skip_long() #define MAX_COMPILE_PTR max_compile_start -STATIC_INLINE uae_u32 reverse32(uae_u32 v) -{ -#if 1 - // gb-- We have specialized byteswapping functions, just use them - return do_byteswap_32(v); -#else - return ((v>>24)&0xff) | ((v>>8)&0xff00) | ((v<<8)&0xff0000) | ((v<<24)&0xff000000); -#endif -} - void set_target(uae_u8* t) { target = t; } -STATIC_INLINE uae_u8* get_target_noopt(void) -{ - return target; -} - STATIC_INLINE uae_u8* get_target(void) { - return get_target_noopt(); + return target; } /******************************************************************** @@ -744,7 +652,7 @@ STATIC_INLINE void compemu_raw_branch(IMM d); STATIC_INLINE void data_check_end(uae_s32 n, uae_s32 codesize) { - if(data_writepos + n > data_endpos || get_target_noopt() + codesize - data_writepos > DATA_BUFFER_MAXOFFSET) + if(data_writepos + n > data_endpos || get_target + codesize - data_writepos > DATA_BUFFER_MAXOFFSET) { // Start new buffer #ifdef DEBUG_DATA_BUFFER @@ -753,9 +661,9 @@ STATIC_INLINE void data_check_end(uae_s32 n, uae_s32 codesize) data_buffers_used++; #endif compemu_raw_branch(DATA_BUFFER_SIZE); - data_writepos = get_target_noopt(); + data_writepos = get_target(); data_endpos = data_writepos + DATA_BUFFER_SIZE; - set_target(get_target_noopt() + DATA_BUFFER_SIZE); + set_target(get_target() + DATA_BUFFER_SIZE); } } @@ -766,7 +674,7 @@ STATIC_INLINE uae_s32 data_word_offs(uae_u16 x) data_writepos += 2; *((uae_u16*)data_writepos) = 0; data_writepos += 2; - return (uae_s32)data_writepos - (uae_s32)get_target_noopt() - 12; + return (uae_s32)data_writepos - (uae_s32)get_target() - 12; } STATIC_INLINE uae_s32 data_long(uae_u32 x, uae_s32 codesize) @@ -782,12 +690,12 @@ STATIC_INLINE uae_s32 data_long_offs(uae_u32 x) data_check_end(4, 4); *((uae_u32*)data_writepos) = x; data_writepos += 4; - return (uae_s32)data_writepos - (uae_s32)get_target_noopt() - 12; + return (uae_s32)data_writepos - (uae_s32)get_target() - 12; } STATIC_INLINE uae_s32 get_data_offset(uae_s32 t) { - return t - (uae_s32)get_target_noopt() - 8; + return t - (uae_s32)get_target() - 8; } STATIC_INLINE void reset_data_buffer(void) @@ -821,7 +729,7 @@ static void make_flags_live_internal(void) return; if (live.flags_on_stack == VALID) { int tmp; - tmp = readreg_specific(FLAGTMP, 4, FLAG_NREG2); + tmp = readreg_specific(FLAGTMP, 4, -1); raw_reg_to_flags(tmp); unlock2(tmp); @@ -842,7 +750,7 @@ static void flags_to_stack(void) } { int tmp; - tmp = writereg_specific(FLAGTMP, 4, FLAG_NREG1); + tmp = writereg(FLAGTMP, 4); raw_flags_to_reg(tmp); unlock2(tmp); } @@ -862,25 +770,15 @@ STATIC_INLINE void flush_flags(void) flags_to_stack(); } -int touchcnt; +static int touchcnt; /******************************************************************** * register allocation per block logging * ********************************************************************/ -#define L_UNKNOWN -127 -#define L_UNAVAIL -1 -#define L_NEEDED -2 -#define L_UNNEEDED -3 - STATIC_INLINE void do_load_reg(int n, int r) { - if (r == FLAGTMP) - raw_load_flagreg(n, r); - else if (r == FLAGX) - raw_load_flagx(n, r); - else - compemu_raw_mov_l_rm(n, (uintptr) live.state[r].mem); + compemu_raw_mov_l_rm(n, (uintptr) live.state[r].mem); } /******************************************************************** @@ -918,11 +816,6 @@ STATIC_INLINE int isconst(int r) return live.state[r].status == ISCONST; } -int is_const(int r) -{ - return isconst(r); -} - STATIC_INLINE void writeback_const(int r) { if (!isconst(r)) @@ -1009,7 +902,6 @@ static int alloc_reg_hinted(int r, int size, int willclobber, int hint) bestreg = -1; when = 2000000000; - /* XXX use a regalloc_order table? */ for (i=0; i live.state[r].validsize ? size : live.state[r].validsize; int ndsize = size > live.state[r].dirtysize ? size : live.state[r].dirtysize; @@ -1274,11 +1170,7 @@ STATIC_INLINE int writereg_general(int r, int size, int spec) /* either the value was in memory to start with, or it was evicted and is in memory now */ if (answer < 0) { - answer = alloc_reg_hinted(r, size, 1, spec); - } - if (spec >= 0 && spec != answer) { - mov_nregs(spec, answer); - answer = spec; + answer = alloc_reg_hinted(r, size, 1, -1); } if (live.state[r].status == UNDEF) live.state[r].validsize = 4; @@ -1294,17 +1186,7 @@ STATIC_INLINE int writereg_general(int r, int size, int spec) return answer; } -static int writereg(int r, int size) -{ - return writereg_general(r, size, -1); -} - -static int writereg_specific(int r, int size, int spec) -{ - return writereg_general(r, size, spec); -} - -STATIC_INLINE int rmw_general(int r, int wsize, int rsize) +static int rmw(int r, int wsize, int rsize) { int n; int answer = -1; @@ -1312,7 +1194,7 @@ STATIC_INLINE int rmw_general(int r, int wsize, int rsize) if (live.state[r].status == UNDEF) { jit_log("WARNING: Unexpected read of undefined register %d", r); } - make_exclusive(r, 0, -1); + make_exclusive(r, 0); if (isinreg(r) && live.state[r].validsize >= rsize) { n = live.state[r].realreg; @@ -1339,11 +1221,6 @@ STATIC_INLINE int rmw_general(int r, int wsize, int rsize) return answer; } -static int rmw(int r, int wsize, int rsize) -{ - return rmw_general(r, wsize, rsize); -} - /******************************************************************** * FPU register status handling. EMIT TIME! * ********************************************************************/ @@ -1538,14 +1415,11 @@ void compiler_exit(void) #endif // Deallocate translation cache - if (compiled_code) { - cache_free(compiled_code, cache_size * 1024); - compiled_code = 0; - } + compiled_code = 0; // Deallocate popallspace if (popallspace) { - cache_free(popallspace, POPALLSPACE_SIZE); + cache_free(popallspace, POPALLSPACE_SIZE + MAX_JIT_CACHE * 1024); popallspace = 0; } @@ -1618,8 +1492,6 @@ void init_comp(void) #endif set_status(FLAGTMP, INMEM); - set_status(NEXT_HANDLER, UNDEF); - for (i = 0; i < VFREGS; i++) { if (i < 8) { /* First 8 registers map to 68k FPU registers */ live.fate[i].mem = (uae_u32*)(®s.fp[i].fp); @@ -1635,7 +1507,6 @@ void init_comp(void) live.fate[i].mem = (uae_u32*)(&scratch.fregs[i]); } - for (i=0; i= uae_p32(kickmem_bank.baseaddr) && @@ -1788,8 +1648,7 @@ static void prepare_for_call_2(void) f_free_nreg(i); #endif live.flags_in_flags = TRASH; /* Note: We assume we already rescued the - flags at the very start of the call_r - functions! */ + flags at the very start of the call_r functions! */ } /******************************************************************** @@ -1813,7 +1672,7 @@ void register_possible_exception(void) */ static uintptr get_handler(uintptr addr) { - blockinfo* bi = get_blockinfo_addr_new((void*)(uintptr) addr, 0); + blockinfo* bi = get_blockinfo_addr_new((void*)(uintptr) addr); return (uintptr)bi->direct_handler_to_use; } @@ -1841,7 +1700,7 @@ static void writemem_real(int address, int source, int size) } } -STATIC_INLINE void writemem(int address, int source, int offset, int size, int tmp) +STATIC_INLINE void writemem_special(int address, int source, int offset, int size, int tmp) { jnf_MEM_GETBANKFUNC(tmp, address, offset); /* Now tmp holds the address of the b/w/lput function */ @@ -1936,7 +1795,7 @@ static void readmem_real(int address, int dest, int size) } } -STATIC_INLINE void readmem(int address, int dest, int offset, int size, int tmp) +STATIC_INLINE void readmem_special(int address, int dest, int offset, int size, int tmp) { jnf_MEM_GETBANKFUNC(tmp, address, offset); /* Now tmp holds the address of the b/w/lget function */ @@ -1971,7 +1830,7 @@ void readlong(int address, int dest, int tmp) /* This one might appear a bit odd... */ STATIC_INLINE void get_n_addr_old(int address, int dest, int tmp) { - readmem(address, dest, 24, 4, tmp); + readmem_special(address, dest, 24, 4, tmp); } STATIC_INLINE void get_n_addr_real(int address, int dest) @@ -1994,7 +1853,10 @@ void get_n_addr_jmp(int address, int dest, int tmp) { /* For this, we need to get the same address as the rest of UAE would --- otherwise we end up translating everything twice */ - get_n_addr(address, dest, tmp); + if (special_mem) + get_n_addr_old(address,dest,tmp); + else + get_n_addr_real(address,dest); } /* base is a register, but dp is an actual value. @@ -2076,7 +1938,6 @@ void alloc_cache(void) { if (compiled_code) { flush_icache_hard(3); - cache_free(compiled_code, cache_size * 1024); compiled_code = 0; } @@ -2084,12 +1945,8 @@ void alloc_cache(void) if (cache_size == 0) return; - while (!compiled_code && cache_size) { - compiled_code = cache_alloc(cache_size * 1024); - if (compiled_code == NULL) { - cache_size /= 2; - } - } + if(popallspace) + compiled_code = popallspace + POPALLSPACE_SIZE; if (compiled_code) { jit_log("Actual translation cache size : %d KB at %p-%p", cache_size, compiled_code, compiled_code + cache_size*1024); @@ -2266,41 +2123,10 @@ static void check_checksum(void) STATIC_INLINE void match_states(blockinfo* bi) { - int i; - smallstate* s = &(bi->env); - if (bi->status == BI_NEED_CHECK) { block_check_checksum(bi); } - if (bi->status == BI_ACTIVE || - bi->status == BI_FINALIZING) { /* Deal with the *promises* the - block makes (about not using - certain vregs) */ - for (i = 0; i < 16; i++) { - if (s->virt[i] == L_UNNEEDED) { - jit_log2("unneeded reg %d at %p", i, target); - COMPCALL(forget_about)(i); // FIXME - } - } - } flush(1); - - /* And now deal with the *demands* the block makes */ - for (i = 0; i < N_REGS; i++) { - int v = s->nat[i]; - if (v >= 0) { - // printf("Loading reg %d into %d at %p\n",v,i,target); - readreg_specific(v, 4, i); - // do_load_reg(i,v); - // setlock(i); - } - } - for (i = 0; i < N_REGS; i++) { - int v = s->nat[i]; - if (v >= 0) { - unlock2(i); - } - } } STATIC_INLINE void create_popalls(void) @@ -2308,7 +2134,7 @@ STATIC_INLINE void create_popalls(void) int i, r; if (popallspace == NULL) { - if ((popallspace = cache_alloc (POPALLSPACE_SIZE)) == NULL) { + if ((popallspace = cache_alloc (POPALLSPACE_SIZE + MAX_JIT_CACHE * 1024)) == NULL) { jit_log("WARNING: Could not allocate popallspace!"); /* This is not fatal if JIT is not used. If JIT is * turned on, it will crash, but it would have crashed @@ -2317,15 +2143,6 @@ STATIC_INLINE void create_popalls(void) } } - int stack_space = STACK_OFFSET; - for (i = 0; i< N_REGS; i++) { - if (need_to_preserve[i]) - stack_space += sizeof(void *); - } - stack_space %= STACK_ALIGN; - if (stack_space) - stack_space = STACK_ALIGN - stack_space; - current_compile_p = popallspace; set_target(current_compile_p); @@ -2342,66 +2159,26 @@ STATIC_INLINE void create_popalls(void) In summary, JIT generated code is not leaf so we have to deal with it here to maintain correct stack alignment. */ - align_target(align_jumps); current_compile_p = get_target(); pushall_call_handler = get_target(); raw_push_regs_to_preserve(); - raw_dec_sp(stack_space); compemu_raw_init_r_regstruct((uintptr)®s); r = REG_PC_TMP; - compemu_raw_mov_l_rm(r, uae_p32(®s.pc_p)); - compemu_raw_and_TAGMASK(r); + compemu_raw_tag_pc(r, uae_p32(®s.pc_p)); compemu_raw_jmp_m_indexed(uae_p32(cache_tags), r, SIZEOF_VOID_P); /* now the exit points */ - align_target(align_jumps); - popall_do_nothing = get_target(); - raw_inc_sp(stack_space); - raw_pop_preserved_regs(); - compemu_raw_jmp(uae_p32(do_nothing)); - - align_target(align_jumps); popall_execute_normal = get_target(); - raw_inc_sp(stack_space); raw_pop_preserved_regs(); compemu_raw_jmp(uae_p32(execute_normal)); - align_target(align_jumps); - popall_cache_miss = get_target(); - raw_inc_sp(stack_space); - raw_pop_preserved_regs(); - compemu_raw_jmp(uae_p32(cache_miss)); - - align_target(align_jumps); - popall_recompile_block = get_target(); - raw_inc_sp(stack_space); - raw_pop_preserved_regs(); - compemu_raw_jmp(uae_p32(recompile_block)); - - align_target(align_jumps); - popall_exec_nostats = get_target(); - raw_inc_sp(stack_space); - raw_pop_preserved_regs(); - compemu_raw_jmp(uae_p32(exec_nostats)); - - align_target(align_jumps); popall_check_checksum = get_target(); - raw_inc_sp(stack_space); raw_pop_preserved_regs(); compemu_raw_jmp(uae_p32(check_checksum)); - align_target(align_jumps); - popall_execute_exception = get_target(); - raw_inc_sp(stack_space); - raw_pop_preserved_regs(); - compemu_raw_jmp(uae_p32(execute_exception)); - #if defined(CPU_arm) && !defined(ARMV6T2) reset_data_buffer(); #endif - - // No need to flush. Initialized and not modified - // flush_cpu_icache((void *)popallspace, (void *)target); } STATIC_INLINE void reset_lists(void) @@ -2419,17 +2196,17 @@ static void prepare_block(blockinfo* bi) int i; set_target(current_compile_p); - align_target(align_jumps); bi->direct_pen = (cpuop_func *)get_target(); compemu_raw_mov_l_rm(0, (uintptr)&(bi->pc_p)); compemu_raw_mov_l_mr((uintptr)®s.pc_p, 0); - compemu_raw_jmp((uintptr)popall_execute_normal); + raw_pop_preserved_regs(); + compemu_raw_jmp((uintptr)execute_normal); - align_target(align_jumps); bi->direct_pcc = (cpuop_func *)get_target(); compemu_raw_mov_l_rm(0, (uintptr)&(bi->pc_p)); compemu_raw_mov_l_mr((uintptr)®s.pc_p, 0); - compemu_raw_jmp((uintptr)popall_check_checksum); + raw_pop_preserved_regs(); + compemu_raw_jmp((uintptr)check_checksum); flush_cpu_icache((void *)current_compile_p, (void *)target); current_compile_p = get_target(); @@ -2438,10 +2215,7 @@ static void prepare_block(blockinfo* bi) bi->dep[i].prev_p = NULL; bi->dep[i].next = NULL; } - bi->env = default_ss; bi->status = BI_INVALID; - bi->havestate = 0; - //bi->env=empty_ss; } void compemu_reset(void) @@ -2449,7 +2223,7 @@ void compemu_reset(void) set_cache_state(0); } -// OPCODE is in big endian format, use cft_map() beforehand, if needed. +// OPCODE is in big endian format STATIC_INLINE void reset_compop(int opcode) { compfunctbl[opcode] = NULL; @@ -2463,20 +2237,9 @@ void build_comp(void) const struct comptbl* tbl = op_smalltbl_0_comp_ff; const struct comptbl* nftbl = op_smalltbl_0_comp_nf; int count; -#ifdef NOFLAGS_SUPPORT - struct comptbl *nfctbl = (currprefs.cpu_level >= 5 ? op_smalltbl_0_nf - : currprefs.cpu_level == 4 ? op_smalltbl_1_nf - : (currprefs.cpu_level == 2 || currprefs.cpu_level == 3) ? op_smalltbl_2_nf - : currprefs.cpu_level == 1 ? op_smalltbl_3_nf - : ! currprefs.cpu_compatible ? op_smalltbl_4_nf - : op_smalltbl_5_nf); -#endif for (opcode = 0; opcode < 65536; opcode++) { reset_compop(opcode); -#ifdef NOFLAGS_SUPPORT - nfcpufunctbl[opcode] = _op_illg; -#endif prop[opcode].use_flags = 0x1f; prop[opcode].set_flags = 0x1f; prop[opcode].cflow = fl_jump | fl_trap; // ILLEGAL instructions do trap @@ -2488,38 +2251,26 @@ void build_comp(void) cflow = fl_const_jump; else cflow &= ~fl_const_jump; - prop[cft_map(tbl[i].opcode)].cflow = cflow; + prop[tbl[i].opcode].cflow = cflow; bool uses_fpu = (tbl[i].specific & COMP_OPCODE_USES_FPU) != 0; if (uses_fpu && avoid_fpu) - compfunctbl[cft_map(tbl[i].opcode)] = NULL; + compfunctbl[tbl[i].opcode] = NULL; else - compfunctbl[cft_map(tbl[i].opcode)] = tbl[i].handler; + compfunctbl[tbl[i].opcode] = tbl[i].handler; } for (i = 0; nftbl[i].opcode < 65536; i++) { int uses_fpu = tbl[i].specific & COMP_OPCODE_USES_FPU; if (uses_fpu && avoid_fpu) - nfcompfunctbl[cft_map(nftbl[i].opcode)] = NULL; + nfcompfunctbl[nftbl[i].opcode] = NULL; else - nfcompfunctbl[cft_map(nftbl[i].opcode)] = nftbl[i].handler; -#ifdef NOFLAGS_SUPPORT - nfcpufunctbl[cft_map(nftbl[i].opcode)] = nfctbl[i].handler; -#endif + nfcompfunctbl[nftbl[i].opcode] = nftbl[i].handler; } -#ifdef NOFLAGS_SUPPORT - for (i = 0; nfctbl[i].handler; i++) { - nfcpufunctbl[cft_map(nfctbl[i].opcode)] = nfctbl[i].handler; - } -#endif - for (opcode = 0; opcode < 65536; opcode++) { compop_func *f; compop_func *nff; -#ifdef NOFLAGS_SUPPORT - cpuop_func *nfcf; -#endif int isaddx, cflow; int cpu_level = (currprefs.cpu_model - 68000) / 10; @@ -2529,38 +2280,26 @@ void build_comp(void) continue; if (table68k[opcode].handler != -1) { - f = compfunctbl[cft_map(table68k[opcode].handler)]; - nff = nfcompfunctbl[cft_map(table68k[opcode].handler)]; -#ifdef NOFLAGS_SUPPORT - nfcf = nfcpufunctbl[cft_map(table68k[opcode].handler)]; -#endif - cflow = prop[cft_map(table68k[opcode].handler)].cflow; - isaddx = prop[cft_map(table68k[opcode].handler)].is_addx; - prop[cft_map(opcode)].cflow = cflow; - prop[cft_map(opcode)].is_addx = isaddx; - compfunctbl[cft_map(opcode)] = f; - nfcompfunctbl[cft_map(opcode)] = nff; -#ifdef NOFLAGS_SUPPORT - nfcpufunctbl[cft_map(opcode)] = nfcf; -#endif + f = compfunctbl[table68k[opcode].handler]; + nff = nfcompfunctbl[table68k[opcode].handler]; + cflow = prop[table68k[opcode].handler].cflow; + isaddx = prop[table68k[opcode].handler].is_addx; + prop[opcode].cflow = cflow; + prop[opcode].is_addx = isaddx; + compfunctbl[opcode] = f; + nfcompfunctbl[opcode] = nff; } - prop[cft_map(opcode)].set_flags = table68k[opcode].flagdead; - prop[cft_map(opcode)].use_flags = table68k[opcode].flaglive; + prop[opcode].set_flags = table68k[opcode].flagdead; + prop[opcode].use_flags = table68k[opcode].flaglive; /* Unconditional jumps don't evaluate condition codes, so they * don't actually use any flags themselves */ - if (prop[cft_map(opcode)].cflow & fl_const_jump) - prop[cft_map(opcode)].use_flags = 0; + if (prop[opcode].cflow & fl_const_jump) + prop[opcode].use_flags = 0; } -#ifdef NOFLAGS_SUPPORT - for (i = 0; nfctbl[i].handler != NULL; i++) { - if (nfctbl[i].specific) - nfcpufunctbl[cft_map(tbl[i].opcode)] = nfctbl[i].handler; - } -#endif count = 0; for (opcode = 0; opcode < 65536; opcode++) { - if (compfunctbl[cft_map(opcode)]) + if (compfunctbl[opcode]) count++; } jit_log("Supposedly %d compileable opcodes!",count); @@ -2575,14 +2314,6 @@ void build_comp(void) cache_tags[i+1].bi = NULL; } compemu_reset(); - - for (i = 0; i < VREGS; i++) { - empty_ss.virt[i] = L_NEEDED; - } - for (i = 0; i < N_REGS; i++) { - empty_ss.nat[i] = L_UNKNOWN; - } - default_ss = empty_ss; } void flush_icache_hard(int n) @@ -2634,13 +2365,12 @@ void flush_icache(int n) bi = active; while (bi) { uae_u32 cl = cacheline(bi->pc_p); - if (bi->status == BI_INVALID || - bi->status == BI_NEED_RECOMP) { - if (bi == cache_tags[cl+1].bi) - cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; - bi->handler_to_use = (cpuop_func *)popall_execute_normal; - set_dhtu(bi,bi->direct_pen); - bi->status = BI_INVALID; + if (bi->status == BI_INVALID || bi->status == BI_NEED_RECOMP) { + if (bi == cache_tags[cl+1].bi) + cache_tags[cl].handler = (cpuop_func *)popall_execute_normal; + bi->handler_to_use = (cpuop_func *)popall_execute_normal; + set_dhtu(bi,bi->direct_pen); + bi->status = BI_INVALID; } else { if (bi == cache_tags[cl+1].bi) @@ -2695,7 +2425,6 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) void* specflags = (void*)®s.spcflags; blockinfo* bi = NULL; blockinfo* bi2; - int extra_len = 0; redo_current_block = 0; if (current_compile_p >= MAX_COMPILE_PTR) @@ -2703,17 +2432,14 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) alloc_blockinfos(); - bi = get_blockinfo_addr_new(pc_hist[0].location, 0); + bi = get_blockinfo_addr_new(pc_hist[0].location); bi2 = get_blockinfo(cl); - optlev = bi->optlevel; + int optlev = bi->optlevel; if (bi->count == -1) { - optlev++; - while (!optcount[optlev]) - optlev++; - bi->count = optcount[optlev] - 1; + optlev = 2; + bi->count = -2; } - current_block_pc_p = (uintptr)pc_hist[0].location; remove_deps(bi); /* We are about to create new code */ bi->optlevel = optlev; @@ -2747,9 +2473,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) max_pcp=(uintptr)currpcp; #endif - liveflags[i] = ((liveflags[i+1] & - (~prop[op].set_flags)) | - prop[op].use_flags); + liveflags[i] = ((liveflags[i + 1] & (~prop[op].set_flags)) | prop[op].use_flags); if (prop[op].is_addx && (liveflags[i+1] & FLAG_Z) == 0) liveflags[i] &= ~FLAG_Z; } @@ -2765,7 +2489,6 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) bi->needed_flags = liveflags[0]; /* This is the non-direct handler */ - align_target(align_loops); was_comp = 0; bi->direct_handler = (cpuop_func *)get_target(); @@ -2776,12 +2499,13 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) if (bi->count >= 0) { /* Need to generate countdown code */ compemu_raw_mov_l_mi((uintptr)®s.pc_p, (uintptr)pc_hist[0].location); compemu_raw_sub_l_mi((uintptr)&(bi->count), 1); - compemu_raw_jl((uintptr)popall_recompile_block); + compemu_raw_maybe_recompile((uintptr)recompile_block); } if (optlev == 0) { /* No need to actually translate */ /* Execute normally without keeping stats */ compemu_raw_mov_l_mi((uintptr)®s.pc_p, (uintptr)pc_hist[0].location); - compemu_raw_jmp((uintptr)popall_exec_nostats); + raw_pop_preserved_regs(); + compemu_raw_jmp(uae_p32(exec_nostats)); } else { next_pc_p = 0; @@ -2792,7 +2516,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) init_comp(); was_comp = 1; - for (i = 0; i < blocklen && get_target_noopt() < MAX_COMPILE_PTR; i++) { + for (i = 0; i < blocklen && get_target() < MAX_COMPILE_PTR; i++) { may_raise_exception = false; cpuop_func **cputbl; compop_func **comptbl; @@ -2800,11 +2524,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) needed_flags = (liveflags[i+1] & prop[opcode].set_flags); special_mem = pc_hist[i].specmem; if (!needed_flags) { -#ifdef NOFLAGS_SUPPORT - cputbl=nfcpufunctbl; -#else cputbl=cpufunctbl; -#endif comptbl=nfcompfunctbl; } else { @@ -2846,7 +2566,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) compemu_raw_call((uintptr)cputbl[opcode]); #ifdef PROFILE_UNTRANSLATED_INSNS // raw_cputbl_count[] is indexed with plain opcode (in m68k order) - compemu_raw_add_l_mi((uintptr)&raw_cputbl_count[cft_map(opcode)], 1); + compemu_raw_add_l_mi((uintptr)&raw_cputbl_count[opcode], 1); #endif if (i < blocklen - 1) { @@ -2860,7 +2580,8 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) compemu_raw_jz_b_oponly(); branchadd = (uae_s8 *)get_target(); compemu_raw_sub_l_mi(uae_p32(&countdown), scaled_cycles(totcycles)); - compemu_raw_jmp((uintptr)popall_do_nothing); + raw_pop_preserved_regs(); + compemu_raw_jmp((uintptr)do_nothing); *(branchadd - 4) = (((uintptr)get_target() - (uintptr)branchadd) - 4) >> 2; } } else if(may_raise_exception) { @@ -2892,36 +2613,32 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) } tmp = live; /* ouch! This is big... */ - compemu_raw_jcc_l_oponly(cc); - branchadd = (uae_u32*)get_target(); - skip_long(); + compemu_raw_jcc_l_oponly(cc); // Last emitted opcode is branch to target + branchadd = (uae_u32*)get_target() - 1; /* predicted outcome */ - tbi = get_blockinfo_addr_new((void*)t1, 1); + tbi = get_blockinfo_addr_new((void*)t1); match_states(tbi); #if defined(CPU_arm) && !defined(ARMV6T2) data_check_end(4, 56); #endif - compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), t1); - tba = (uae_u32*)get_target(); - emit_jmp_target(get_handler(t1)); + tba = compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), t1); + write_jmp_target(tba, get_handler(t1)); create_jmpdep(bi, 0, tba, t1); - align_target(align_jumps); /* not-predicted outcome */ - write_jmp_target(branchadd, (cpuop_func*)get_target()); + write_jmp_target(branchadd, (uintptr)get_target()); live = tmp; /* Ouch again */ - tbi = get_blockinfo_addr_new((void*)t2, 1); + tbi = get_blockinfo_addr_new((void*)t2); match_states(tbi); //flush(1); /* Can only get here if was_comp==1 */ #if defined(CPU_arm) && !defined(ARMV6T2) data_check_end(4, 56); #endif - compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), t2); - tba = (uae_u32*)get_target(); - emit_jmp_target(get_handler(t2)); + tba = compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), t2); + write_jmp_target(tba, get_handler(t2)); create_jmpdep(bi, 1, tba, t2); } else @@ -2943,15 +2660,14 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) uae_u32* tba; blockinfo* tbi; - tbi = get_blockinfo_addr_new((void*)v, 1); + tbi = get_blockinfo_addr_new((void*)v); match_states(tbi); #if defined(CPU_arm) && !defined(ARMV6T2) data_check_end(4, 56); #endif - compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), v); - tba = (uae_u32*)get_target(); - emit_jmp_target(get_handler(v)); + tba = compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), v); + write_jmp_target(tba, get_handler(v)); create_jmpdep(bi, 0, tba, v); } else { @@ -2978,8 +2694,8 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) add_to_active(bi); } #else - if (next_pc_p + extra_len >= max_pcp && next_pc_p + extra_len < max_pcp + LONGEST_68K_INST) - max_pcp = next_pc_p + extra_len; /* extra_len covers flags magic */ + if (next_pc_p >= max_pcp && next_pc_p < max_pcp + LONGEST_68K_INST) + max_pcp = next_pc_p; else max_pcp += LONGEST_68K_INST; @@ -2989,8 +2705,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) remove_from_list(bi); if (isinrom(min_pcp) && isinrom(max_pcp)) { add_to_dormant(bi); /* No need to checksum it on cache flush. - Please don't start changing ROMs in - flight! */ + Please don't start changing ROMs in flight! */ } else { calc_checksum(bi, &(bi->c1), &(bi->c2)); @@ -3000,12 +2715,11 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles) current_cache_size += get_target() - (uae_u8 *)current_compile_p; - align_target(align_jumps); /* This is the non-direct handler */ bi->handler = bi->handler_to_use = (cpuop_func *)get_target(); compemu_raw_cmp_l_mi((uintptr)®s.pc_p, (uintptr)pc_hist[0].location); - compemu_raw_jnz((uintptr)popall_cache_miss); + compemu_raw_maybe_cachemiss((uintptr)cache_miss); comp_pc_p = (uae_u8*)pc_hist[0].location; bi->status=BI_FINALIZING; diff --git a/src/jit/comptbl.h b/src/jit/comptbl.h index ab40977f..636491c7 100644 --- a/src/jit/comptbl.h +++ b/src/jit/comptbl.h @@ -1,7 +1,3 @@ -#ifdef NOFLAGS_SUPPORT -/* 68040 */ -extern const struct comptbl op_smalltbl_0_nf[]; -#endif extern const struct comptbl op_smalltbl_0_comp_nf[]; extern const struct comptbl op_smalltbl_0_comp_ff[]; extern compop_func op_0_0_comp_ff; diff --git a/src/jit/gencomp_arm.cpp b/src/jit/gencomp_arm.cpp index 5e074309..b3b77cd0 100644 --- a/src/jit/gencomp_arm.cpp +++ b/src/jit/gencomp_arm.cpp @@ -693,15 +693,14 @@ static void gen_move16(uae_u32 opcode, struct instr *curi) comprintf(" " RETURN "\n"); comprintf("} \n"); - comprintf("\tint src=scratchie++;\n"); - comprintf("\tint dst=scratchie++;\n"); - uae_u32 masked_op = (opcode & 0xfff8); if (masked_op == 0xf620) { // POSTINCREMENT SOURCE AND DESTINATION version: MOVE16 (Ax)+,(Ay)+ + comprintf("\tint srca=scratchie++;\n"); + comprintf("\tint dsta=scratchie++;\n"); comprintf("\t uae_u16 dstreg = ((%s)>>12) & 0x07;\n", gen_nextiword()); - comprintf("\t jnf_MOVE(src, srcreg + 8);\n"); - comprintf("\t jnf_MOVE(dst, dstreg + 8);\n"); + comprintf("\t jnf_MOVE(srca, srcreg + 8);\n"); + comprintf("\t jnf_MOVE(dsta, dstreg + 8);\n"); comprintf("\t if (srcreg != dstreg)\n"); comprintf("\t jnf_ADD_im8(srcreg + 8, srcreg + 8, 16);\n"); comprintf("\t jnf_ADD_im8(dstreg + 8, dstreg + 8, 16);\n"); @@ -718,7 +717,7 @@ static void gen_move16(uae_u32 opcode, struct instr *curi) break; } } - comprintf("\tjnf_MOVE16(dst, src);\n"); + comprintf("\tjnf_MOVE16(dsta, srca);\n"); } static void @@ -3285,10 +3284,6 @@ int main(int argc, char *argv[]) headerfile = fopen("jit/comptbl.h", "wb"); fprintf (headerfile, "" \ - "#ifdef NOFLAGS_SUPPORT\n" \ - "/* 68040 */\n" \ - "extern const struct comptbl op_smalltbl_0_nf[];\n" \ - "#endif\n" \ "extern const struct comptbl op_smalltbl_0_comp_nf[];\n" \ "extern const struct comptbl op_smalltbl_0_comp_ff[];\n" \ "");