JIT Performance enhancements, fix for MOVE16 in JIT

This commit is contained in:
Dimitris Panokostas 2018-02-16 17:16:18 +01:00
parent 2ed5859e5b
commit 034f310eaf
10 changed files with 386 additions and 795 deletions

View file

@ -85,12 +85,6 @@ extern void __clear_cache (char*, char*);
#define REG_PC_PRE R0_INDEX /* The register we use for preloading regs.pc_p */
#define REG_PC_TMP R1_INDEX /* Another register that is not the above */
#define MUL_NREG1 R0_INDEX /* %r4 will hold the low 32 bits after a 32x32 mul */
#define MUL_NREG2 R1_INDEX /* %r5 will hold the high 32 bits */
#define STACK_ALIGN 4
#define STACK_OFFSET sizeof(void *)
#define R_REGSTRUCT 11
uae_s8 always_used[]={2,3,R_REGSTRUCT,12,-1}; // r2, r3 and r12 are work register in emitted code
@ -164,8 +158,6 @@ STATIC_INLINE void SIGNED16_REG_2_REG(W4 d, RR4 s) {
#define SIGN_EXTEND_16_REG_2_REG(d,s) SIGNED16_REG_2_REG(d,s)
#define jit_unimplemented(fmt, ...) do{ jit_log("**** Unimplemented ****\n"); jit_log(fmt, ## __VA_ARGS__); abort(); }while (0)
LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, RR4 s))
{
ADD_rrr(d, d, s);
@ -220,8 +212,8 @@ LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, I
}
SIGNED8_IMM_2_REG(REG_WORK1, offset);
ADD_rrr(REG_WORK1, s, REG_WORK1);
ADD_rrr(REG_WORK1, s, REG_WORK1);
ADD_rrrLSLi(d, REG_WORK1, index, shft);
}
LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset))
@ -398,36 +390,6 @@ LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
STATIC_INLINE void raw_dec_sp(int off)
{
if (off) {
if(CHECK32(off)) {
SUB_rri(RSP_INDEX, RSP_INDEX, off);
} else {
LDR_rRI(REG_WORK1, RPC_INDEX, 4);
SUB_rrr(RSP_INDEX, RSP_INDEX, REG_WORK1);
B_i(0);
//<value>:
emit_long(off);
}
}
}
STATIC_INLINE void raw_inc_sp(int off)
{
if (off) {
if(CHECK32(off)) {
ADD_rri(RSP_INDEX, RSP_INDEX, off);
} else {
LDR_rRI(REG_WORK1, RPC_INDEX, 4);
ADD_rrr(RSP_INDEX, RSP_INDEX, REG_WORK1);
B_i(0);
//<value>:
emit_long(off);
}
}
}
STATIC_INLINE void raw_push_regs_to_preserve(void) {
PUSH_REGS(PRESERVE_MASK);
}
@ -436,11 +398,6 @@ STATIC_INLINE void raw_pop_preserved_regs(void) {
POP_REGS(PRESERVE_MASK);
}
STATIC_INLINE void raw_load_flagx(uae_u32 t, uae_u32 r)
{
LDR_rRI(t, R_REGSTRUCT, 17 * 4); // X flag are next to 8 Dregs, 8 Aregs and CPSR in struct regstruct
}
STATIC_INLINE void raw_flags_evicted(int r)
{
live.state[FLAGTMP].status = INMEM;
@ -465,24 +422,6 @@ STATIC_INLINE void raw_reg_to_flags(int r)
MSR_CPSRf_r(r);
}
STATIC_INLINE void raw_load_flagreg(uae_u32 t, uae_u32 r)
{
LDR_rRI(t, R_REGSTRUCT, 16 * 4); // Flags are next to 8 Dregs and 8 Aregs in struct regstruct
}
/* %eax register is clobbered if target processor doesn't support fucomi */
#define FFLAG_NREG_CLOBBER_CONDITION 0
#define FFLAG_NREG R0_INDEX
#define FLAG_NREG2 -1
#define FLAG_NREG1 -1
#define FLAG_NREG3 -1
STATIC_INLINE void raw_emit_nop_filler(int nbytes)
{
nbytes >>= 2;
while(nbytes--) { NOP(); }
}
//
// Arm instructions
//
@ -526,19 +465,46 @@ LOWFUNC(WRITE,NONE,2,compemu_raw_MERGE_rr,(RW4 d, RR4 s))
}
LENDFUNC(WRITE,NONE,2,compemu_raw_MERGE_rr,(RW4 d, RR4 s))
LOWFUNC(WRITE,NONE,2,compemu_raw_MERGE8_rr,(RW4 d, RR4 s))
{
#ifdef ARMV6T2
UBFX_rrii(REG_WORK1, s, 8, 24);
BFI_rrii(d, REG_WORK1, 8, 31);
#else
AND_rri(REG_WORK1, s, 0xffffff00);
BIC_rri(d, d, 0xffffff00);
ORR_rrr(d, d, REG_WORK1);
#endif
}
LENDFUNC(WRITE,NONE,2,compemu_raw_MERGE8_rr,(RW4 d, RR4 s))
LOWFUNC(WRITE,RMW,2,compemu_raw_add_l_mi,(IMM d, IMM s))
{
#ifdef ARMV6T2
MOVW_ri16(REG_WORK1, d);
MOVT_ri16(REG_WORK1, d >> 16);
LDR_rR(REG_WORK2, REG_WORK1);
if(d >= (uae_u32) &regs && d < ((uae_u32) &regs) + sizeof(struct regstruct)) {
uae_s32 idx = d - (uae_u32) & regs;
LDR_rRI(REG_WORK2, R_REGSTRUCT, idx);
} else {
MOVW_ri16(REG_WORK1, d);
MOVT_ri16(REG_WORK1, d >> 16);
LDR_rR(REG_WORK2, REG_WORK1);
}
MOVW_ri16(REG_WORK3, s);
if(s >> 16)
MOVT_ri16(REG_WORK3, s >> 16);
ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK3);
if(CHECK32(s)) {
ADD_rri(REG_WORK2, REG_WORK2, s);
} else {
MOVW_ri16(REG_WORK3, s);
if(s >> 16)
MOVT_ri16(REG_WORK3, s >> 16);
ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK3);
}
STR_rR(REG_WORK2, REG_WORK1);
if(d >= (uae_u32) &regs && d < ((uae_u32) &regs) + sizeof(struct regstruct)) {
uae_s32 idx = d - (uae_u32) & regs;
STR_rRI(REG_WORK2, R_REGSTRUCT, idx);
} else {
STR_rR(REG_WORK2, REG_WORK1);
}
#else
uae_s32 offs = data_long_offs(d);
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
@ -554,18 +520,6 @@ LOWFUNC(WRITE,RMW,2,compemu_raw_add_l_mi,(IMM d, IMM s))
}
LENDFUNC(WRITE,RMW,2,compemu_raw_add_l_mi,(IMM d, IMM s))
LOWFUNC(WRITE,NONE,2,compemu_raw_and_TAGMASK,(RW4 d))
{
// TAGMASK is 0x0000ffff
#ifdef ARMV6T2
BFC_rii(d, 16, 31);
#else
BIC_rri(d, d, 0x00ff0000);
BIC_rri(d, d, 0xff000000);
#endif
}
LENDFUNC(WRITE,NONE,2,compemu_raw_and_TAGMASK,(RW4 d))
LOWFUNC(WRITE,READ,2,compemu_raw_cmp_l_mi,(MEMR d, IMM s))
{
clobber_flags();
@ -845,157 +799,140 @@ STATIC_INLINE void compemu_raw_jcc_l_oponly(int cc)
{
switch (cc) {
case NATIVE_CC_HI: // HI
BEQ_i(2); // beq no jump
BCS_i(1); // bcs no jump
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
// no jump
BEQ_i(0); // beq no jump
BCC_i(0); // bcc jump
break;
case NATIVE_CC_LS: // LS
BEQ_i(0); // beq jump
BCC_i(1); // bcc no jump
BCC_i(0); // bcc no jump
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
B_i(0);
// no jump
break;
case NATIVE_CC_F_OGT: // Jump if valid and greater than
BVS_i(2); // do not jump if NaN
BLE_i(1); // do not jump if less or equal
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
BVS_i(0); // do not jump if NaN
BGT_i(0); // jump if greater than
break;
case NATIVE_CC_F_OGE: // Jump if valid and greater or equal
BVS_i(2); // do not jump if NaN
BCC_i(1); // do not jump if carry cleared
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
BVS_i(0); // do not jump if NaN
BCS_i(0); // jump if carry set
break;
case NATIVE_CC_F_OLT: // Jump if vaild and less than
BVS_i(2); // do not jump if NaN
BCS_i(1); // do not jump if carry set
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
BVS_i(0); // do not jump if NaN
BCC_i(0); // jump if carry cleared
break;
case NATIVE_CC_F_OLE: // Jump if valid and less or equal
BVS_i(2); // do not jump if NaN
BGT_i(1); // do not jump if greater than
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
BVS_i(0); // do not jump if NaN
BLE_i(0); // jump if less or equal
break;
case NATIVE_CC_F_OGL: // Jump if valid and greator or less
BVS_i(2); // do not jump if NaN
BEQ_i(1); // do not jump if equal
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
BVS_i(0); // do not jump if NaN
BNE_i(0); // jump if not equal
break;
case NATIVE_CC_F_OR: // Jump if valid
BVS_i(1); // do not jump if NaN
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
BVC_i(0);
break;
case NATIVE_CC_F_UN: // Jump if NAN
BVC_i(1); // do not jump if valid
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
BVS_i(0);
break;
case NATIVE_CC_F_UEQ: // Jump if NAN or equal
BVS_i(0); // jump if NaN
BNE_i(1); // do not jump if greater or less
BNE_i(0); // do not jump if greater or less
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
B_i(0);
break;
case NATIVE_CC_F_UGT: // Jump if NAN or greater than
BVS_i(0); // jump if NaN
BLS_i(1); // do not jump if lower or same
BLS_i(0); // do not jump if lower or same
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
B_i(0);
break;
case NATIVE_CC_F_UGE: // Jump if NAN or greater or equal
BVS_i(0); // jump if NaN
BMI_i(1); // do not jump if lower
BMI_i(0); // do not jump if lower
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
B_i(0);
break;
case NATIVE_CC_F_ULT: // Jump if NAN or less than
BVS_i(0); // jump if NaN
BGE_i(1); // do not jump if greater or equal
BGE_i(0); // do not jump if greater or equal
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
B_i(0);
break;
case NATIVE_CC_F_ULE: // Jump if NAN or less or equal
BVS_i(0); // jump if NaN
BGT_i(1); // do not jump if greater
BGT_i(0); // do not jump if greater
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
B_i(0);
break;
default:
CC_B_i(cc^1, 1);
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
CC_B_i(cc, 0);
break;
}
// emit of target will be done by caller
// emit of target into last branch will be done by caller
}
STATIC_INLINE void compemu_raw_handle_except(IMM cycles)
{
uae_u32* branchadd;
uae_u32* branchadd;
clobber_flags();
#ifdef ARMV6T2
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
#else
auto offset = data_long_offs((uae_u32)(&jit_exception));
LDR_rRI(REG_WORK2, RPC_INDEX, offset);
#endif
LDR_rR(REG_WORK1, REG_WORK2);
clobber_flags();
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
LDR_rR(REG_WORK1, REG_WORK2);
TST_rr(REG_WORK1, REG_WORK1);
BNE_i(1); // exception, skip LDR and target
LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // jump to next opcode
branchadd = (uae_u32*)get_target();
skip_long(); // emit of target (next opcode handler) will be done later
// countdown -= scaled_cycles(totcycles);
uae_s32 offs = (uae_u32)&countdown - (uae_u32)&regs;
BEQ_i(0); // no exception, jump to next instruction
// countdown -= scaled_cycles(totcycles);
uae_s32 offs = (uae_u32)&countdown - (uae_u32)&regs;
LDR_rRI(REG_WORK1, R_REGSTRUCT, offs);
if (CHECK32(cycles)) {
SUBS_rri(REG_WORK1, REG_WORK1, cycles);
}
else {
if(CHECK32(cycles)) {
SUBS_rri(REG_WORK1, REG_WORK1, cycles);
} else {
#ifdef ARMV6T2
MOVW_ri16(REG_WORK2, cycles);
if (cycles >> 16)
MOVT_ri16(REG_WORK2, cycles >> 16);
MOVW_ri16(REG_WORK2, cycles);
if(cycles >> 16)
MOVT_ri16(REG_WORK2, cycles >> 16);
#else
int offs2 = data_long_offs(cycles);
LDR_rRI(REG_WORK2, RPC_INDEX, offs2);
int offs2 = data_long_offs(cycles);
LDR_rRI(REG_WORK2, RPC_INDEX, offs2);
#endif
SUBS_rrr(REG_WORK1, REG_WORK1, REG_WORK2);
}
SUBS_rrr(REG_WORK1, REG_WORK1, REG_WORK2);
}
STR_rRI(REG_WORK1, R_REGSTRUCT, offs);
LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // <popall_execute_exception>
emit_long((uintptr)popall_execute_exception);
raw_pop_preserved_regs();
LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // <execute_exception>
emit_long((uintptr)execute_exception);
// Write target of next instruction
write_jmp_target(branchadd, (cpuop_func*)get_target());
write_jmp_target(branchadd, (uintptr)get_target());
}
STATIC_INLINE void compemu_raw_jl(uae_u32 t)
STATIC_INLINE void compemu_raw_maybe_recompile(uae_u32 t)
{
#ifdef ARMV6T2
MOVW_ri16(REG_WORK1, t);
MOVT_ri16(REG_WORK1, t >> 16);
CC_BX_r(NATIVE_CC_LT, REG_WORK1);
BGE_i(2);
raw_pop_preserved_regs();
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
emit_long(t);
#else
uae_s32 offs = data_long_offs(t);
CC_LDR_rRI(NATIVE_CC_LT, RPC_INDEX, RPC_INDEX, offs);
@ -1004,8 +941,14 @@ STATIC_INLINE void compemu_raw_jl(uae_u32 t)
STATIC_INLINE void compemu_raw_jmp(uae_u32 t)
{
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
emit_long(t);
if(t >= (uae_u32)popallspace && t < (uae_u32)(popallspace + POPALLSPACE_SIZE + MAX_JIT_CACHE * 1024)) {
uae_u32* loc = (uae_u32*)get_target();
B_i(0);
write_jmp_target(loc, t);
} else {
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
emit_long(t);
}
}
STATIC_INLINE void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
@ -1024,10 +967,11 @@ STATIC_INLINE void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
emit_long(base);
}
STATIC_INLINE void compemu_raw_jnz(uae_u32 t)
STATIC_INLINE void compemu_raw_maybe_cachemiss(uae_u32 t)
{
#ifdef ARMV6T2
BEQ_i(1);
BEQ_i(2);
raw_pop_preserved_regs();
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
emit_long(t);
#else
@ -1093,19 +1037,22 @@ LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_inreg,(RR4 rr_pc, IMM cycles))
BIC_rri(rr_pc, rr_pc, 0x00ff0000);
BIC_rri(rr_pc, rr_pc, 0xff000000);
#endif
LDR_rRI(REG_WORK1, RPC_INDEX, 4); // <cache_tags>
LDR_rRI(REG_WORK1, RPC_INDEX, 8); // <cache_tags>
LDR_rRR_LSLi(RPC_INDEX, REG_WORK1, rr_pc, 2);
LDR_rRI(RPC_INDEX, RPC_INDEX, 0); // <popall_do_nothing>
raw_pop_preserved_regs();
LDR_rRI(RPC_INDEX, RPC_INDEX, 0); // <do_nothing>
emit_long((uintptr)cache_tags);
emit_long((uintptr)popall_do_nothing);
emit_long((uintptr)do_nothing);
}
LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_inreg,(RR4 rr_pc, IMM cycles))
LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
//LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
STATIC_INLINE uae_u32* compemu_raw_endblock_pc_isconst(IMM cycles, IMM v)
{
uae_u32* tba;
clobber_flags();
// countdown -= scaled_cycles(totcycles);
@ -1126,20 +1073,29 @@ LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
}
STR_rRI(REG_WORK1, R_REGSTRUCT, offs);
CC_LDR_rRI(NATIVE_CC_MI^1, RPC_INDEX, RPC_INDEX, 16); // <target>
tba = (uae_u32*)get_target();
CC_B_i(NATIVE_CC_MI^1, 0); // <target set by caller>
LDR_rRI(REG_WORK1, RPC_INDEX, 4); // <v>
LDR_rRI(REG_WORK1, RPC_INDEX, 8); // <v>
offs = (uae_u32)&regs.pc_p - (uae_u32)&regs;
STR_rRI(REG_WORK1, R_REGSTRUCT, offs);
LDR_rRI(RPC_INDEX, RPC_INDEX, 0); // <popall_do_nothing>
raw_pop_preserved_regs();
LDR_rRI(RPC_INDEX, RPC_INDEX, 0); // <do_nothing>
emit_long(v);
emit_long((uintptr)popall_do_nothing);
// <target emitted by caller>
}
LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
emit_long((uintptr)do_nothing);
return tba;
}
//LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
LOWFUNC(NONE,READ,2,compemu_raw_tag_pc,(W4 d, MEMR s))
{
uae_s32 idx = (uae_u32)(s) - (uae_u32)&regs;
LDRH_rRI(d, R_REGSTRUCT, idx);
}
LENDFUNC(NONE,READ,2,compemu_raw_tag_pc,(W4 d, MEMR s))
/*************************************************************************
* FPU stuff *
@ -1477,52 +1433,49 @@ LOWFUNC(NONE,WRITE,2,raw_fp_from_exten_mr,(RR4 adr, FR s))
{
uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) &regs;
VMOVi_to_ARM_rd(REG_WORK2, s, 1); // get high part of double
VMOVi_to_ARM_rd(REG_WORK1, s, 1); // get high part of double
VCMP64_d0(s);
VMRS_CPSR();
BEQ_i(22); // iszero
BEQ_i(20); // iszero
UBFX_rrii(REG_WORK3, REG_WORK2, 20, 11); // get exponent
MOVW_ri16(REG_WORK1, 2047);
CMP_rr(REG_WORK3, REG_WORK1);
BEQ_i(15); // isnan
UBFX_rrii(REG_WORK2, REG_WORK1, 20, 11); // get exponent
MOVW_ri16(REG_WORK3, 2047);
CMP_rr(REG_WORK2, REG_WORK3);
BEQ_i(13); // isnan
MOVW_ri16(REG_WORK1, 15360); // diff of bias between double and long double
ADD_rrr(REG_WORK3, REG_WORK3, REG_WORK1); // exponent done
AND_rri(REG_WORK2, REG_WORK2, 0x80000000); // extract sign
ORR_rrrLSLi(REG_WORK3, REG_WORK2, REG_WORK3, 16); // merge sign and exponent
MOVW_ri16(REG_WORK3, 15360); // diff of bias between double and long double
ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK3); // exponent done
AND_rri(REG_WORK1, REG_WORK1, 0x80000000); // extract sign
ORR_rrrLSLi(REG_WORK2, REG_WORK1, REG_WORK2, 16); // merge sign and exponent
LDR_rRI(REG_WORK1, R_REGSTRUCT, offs);
ADD_rrr(REG_WORK1, adr, REG_WORK1);
LDR_rRI(REG_WORK3, R_REGSTRUCT, offs);
ADD_rrr(REG_WORK3, adr, REG_WORK3);
REV_rr(REG_WORK3, REG_WORK3);
STRH_rR(REG_WORK3, REG_WORK1); // write exponent
REV_rr(REG_WORK2, REG_WORK2);
STRH_rR(REG_WORK2, REG_WORK3); // write exponent
VSHL64_ddi(SCRATCH_F64_1, s, 11); // shift mantissa to correct position
VMOV64_rrd(REG_WORK3, REG_WORK2, SCRATCH_F64_1);
ORR_rri(REG_WORK2, REG_WORK2, 0x80000000); // insert explicit 1
REV_rr(REG_WORK2, REG_WORK2);
REV_rr(REG_WORK3, REG_WORK3);
STR_rRI(REG_WORK2, REG_WORK1, 4);
STR_rRI(REG_WORK3, REG_WORK1, 8);
B_i(10); // end_of_op
VREV64_8_dd(SCRATCH_F64_1, SCRATCH_F64_1);
VMOV64_rrd(REG_WORK1, REG_WORK2, SCRATCH_F64_1);
ORR_rri(REG_WORK1, REG_WORK1, 0x80); // insert explicit 1
STRD_rRI(REG_WORK1, REG_WORK3, 4);
B_i(9); // end_of_op
// isnan
MOVW_ri16(REG_WORK2, 0x7fff);
LSL_rri(REG_WORK2, REG_WORK2, 16);
MVN_ri(REG_WORK3, 0);
MOVW_ri16(REG_WORK1, 0x7fff);
LSL_rri(REG_WORK1, REG_WORK1, 16);
MVN_ri(REG_WORK2, 0);
// iszero
CC_AND_rri(NATIVE_CC_EQ, REG_WORK2, REG_WORK2, 0x80000000); // extract sign
CC_MOV_ri(NATIVE_CC_EQ, REG_WORK3, 0);
CC_AND_rri(NATIVE_CC_EQ, REG_WORK1, REG_WORK1, 0x80000000); // extract sign
CC_MOV_ri(NATIVE_CC_EQ, REG_WORK2, 0);
LDR_rRI(REG_WORK1, R_REGSTRUCT, offs);
ADD_rrr(REG_WORK1, adr, REG_WORK1);
LDR_rRI(REG_WORK3, R_REGSTRUCT, offs);
ADD_rrr(REG_WORK3, adr, REG_WORK3);
REV_rr(REG_WORK2, REG_WORK2);
STR_rR(REG_WORK2, REG_WORK1);
STR_rRI(REG_WORK3, REG_WORK1, 4);
STR_rRI(REG_WORK3, REG_WORK1, 8);
REV_rr(REG_WORK1, REG_WORK1);
STRD_rR(REG_WORK1, REG_WORK3);
STR_rRI(REG_WORK2, REG_WORK3, 8);
// end_of_op
@ -1533,44 +1486,42 @@ LOWFUNC(NONE,READ,2,raw_fp_to_exten_rm,(FW d, RR4 adr))
{
uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) &regs;
LDR_rRI(REG_WORK1, R_REGSTRUCT, offs);
ADD_rrr(REG_WORK1, adr, REG_WORK1);
LDR_rRI(REG_WORK3, R_REGSTRUCT, offs);
ADD_rrr(REG_WORK3, adr, REG_WORK3);
LDR_rRI(REG_WORK2, REG_WORK1, 4);
LDR_rRI(REG_WORK3, REG_WORK1, 8);
REV_rr(REG_WORK2, REG_WORK2);
REV_rr(REG_WORK3, REG_WORK3);
BIC_rri(REG_WORK2, REG_WORK2, 0x80000000); // clear explicit 1
VMOV64_drr(d, REG_WORK3, REG_WORK2);
LDRD_rRI(REG_WORK1, REG_WORK3, 4);
BIC_rri(REG_WORK1, REG_WORK1, 0x80); // clear explicit 1
VMOV64_drr(d, REG_WORK1, REG_WORK2);
VREV64_8_dd(d, d);
LDR_rR(REG_WORK2, REG_WORK1);
REV_rr(REG_WORK2, REG_WORK2);
LSR_rri(REG_WORK2, REG_WORK2, 16); // exponent now in lower half
MOVW_ri16(REG_WORK3, 0x7fff);
ANDS_rrr(REG_WORK3, REG_WORK3, REG_WORK2);
LDRH_rR(REG_WORK1, REG_WORK3);
REV16_rr(REG_WORK1, REG_WORK1); // exponent now in lower half
MOVW_ri16(REG_WORK2, 0x7fff);
ANDS_rrr(REG_WORK2, REG_WORK2, REG_WORK1);
BNE_i(9); // not_zero
VCMP64_d0(d);
VMRS_CPSR();
BNE_i(6); // not zero
// zero
VMOV_I64_dimmI(d, 0x00);
TST_ri(REG_WORK2, 0x8000); // check sign
TST_ri(REG_WORK1, 0x8000); // check sign
BEQ_i(12); // end_of_op
MOV_ri(REG_WORK2, 0x80000000);
MOV_ri(REG_WORK3, 0);
VMOV64_drr(d, REG_WORK3, REG_WORK2);
MOV_ri(REG_WORK1, 0x80000000);
MOV_ri(REG_WORK2, 0);
VMOV64_drr(d, REG_WORK2, REG_WORK1);
B_i(8); // end_of_op
// not_zero
MOVW_ri16(REG_WORK1, 15360); // diff of bias between double and long double
SUB_rrr(REG_WORK3, REG_WORK3, REG_WORK1); // exponent done, ToDo: check for carry -> result gets Inf in double
UBFX_rrii(REG_WORK2, REG_WORK2, 15, 1); // extract sign
BFI_rrii(REG_WORK3, REG_WORK2, 11, 11); // insert sign
MOVW_ri16(REG_WORK3, 15360); // diff of bias between double and long double
SUB_rrr(REG_WORK2, REG_WORK2, REG_WORK3); // exponent done, ToDo: check for carry -> result gets Inf in double
UBFX_rrii(REG_WORK1, REG_WORK1, 15, 1); // extract sign
BFI_rrii(REG_WORK2, REG_WORK1, 11, 11); // insert sign
VSHR64_ddi(d, d, 11); // shift mantissa to correct position
LSL_rri(REG_WORK3, REG_WORK3, 20);
VMOV_I64_dimmI(0, 0x00);
VMOVi_from_ARM_dr(0, REG_WORK3, 1);
VORR_ddd(d, d, 0);
LSL_rri(REG_WORK2, REG_WORK2, 20);
VMOV_I64_dimmI(SCRATCH_F64_1, 0x00);
VMOVi_from_ARM_dr(SCRATCH_F64_1, REG_WORK2, 1);
VORR_ddd(d, d, SCRATCH_F64_1);
// end_of_op
}

View file

@ -645,8 +645,8 @@ enum {
#define CC_ADD_rrrRORr(cc,Rd,Rn,Rm,Rs) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_ROR_r(Rm,Rs))
#define CC_ADD_rrrRRX(cc,Rd,Rn,Rm) _OP3(cc,_ADD,0,Rd,Rn,SHIFT_RRX(Rm))
#define ADD_rri8(cc,Rd,Rn,i) CC_ADD_rri8(NATIVE_CC_AL,Rd,Rn,i)
#define ADD_rri8RORi(cc,Rd,Rn,Rm,i) CC_ADD_rri8RORi(NATIVE_CC_AL,Rd,Rn,Rm,i)
#define ADD_rri8(Rd,Rn,i) CC_ADD_rri8(NATIVE_CC_AL,Rd,Rn,i)
#define ADD_rri8RORi(Rd,Rn,Rm,i) CC_ADD_rri8RORi(NATIVE_CC_AL,Rd,Rn,Rm,i)
#define ADD_rri(Rd,Rn,i) CC_ADD_rri(NATIVE_CC_AL,Rd,Rn,i)
#define ADD_rrr(Rd,Rn,Rm) CC_ADD_rrr(NATIVE_CC_AL,Rd,Rn,Rm)
@ -1320,6 +1320,12 @@ enum {
#define CC_STM_Ri(cc,Rn,i) _W(((cc) << 28) | (0x8 << 24) | (0x8 << 20) | ((Rn) << 16) | i)
#define STM_Ri(Rn,i) CC_STM_Ri(NATIVE_CC_AL,Rn,i)
#define CC_MLS_rrrr(cc,Rd,Rn,Rm,Ra) _W(((cc) << 28) | (0x0 << 24) | (0x6 << 20) | (Rd << 16) | (Ra << 12) | (Rm << 8) | (0x9 << 4) | Rn)
#define MLS_rrrr(Rd,Rn,Rm,Ra) CC_MLS_rrrr(NATIVE_CC_AL,Rd,Rn,Rm,Ra)
#define CC_SMULxy_rrr(cc,Rd,Rn,Rm,x,y) _W(((cc) << 28) | (0x1 << 24) | (0x6 << 20) | (Rd << 16) | (0x0 << 12) | (Rm << 8) | (0x8 << 4) | (Rn) | (x << 5) | (y << 6))
#define SMULxy_rrr(Rd,Rn,Rm,x,y) CC_SMULxy_rrr(NATIVE_CC_AL,Rd,Rn,Rm,x,y)
// ARMv6T2
//#ifdef ARMV6T2
@ -1492,7 +1498,10 @@ enum {
#define FIMM6(imm) ((imm) << 16)
#define VSHL64_ddi(Dd,Dm,imm) _W((0xf << 28) | (0x2 << 24) | (0x8 << 20) | (0x5 << 8) | (0x9 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm) | FIMM6(imm))
#define VSHR64_ddi(Dd,Dm,imm) _W((0xf << 28) | (0x3 << 24) | (0x8 << 20) | (0x0 << 8) | (0x9 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm) | FIMM6(64-imm))
#define VSLI64_ddi(Dd,Dm,i) _W((0xf << 28) | (0x3 << 24) | (0x8 << 20) | (0x5 << 8) | (0x9 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm) | FIMM6(i))
#define VORR_ddd(Dd,Dn,Dm) _W((0xf << 28) | (0x2 << 24) | (0x2 << 20) | (0x1 << 8) | (0x1 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
#define VREV64_8_dd(Dd,Dm) _W((0xf << 28) | (0x3 << 24) | (0xb << 20) | (0x0 << 16) | (0x0 << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#endif /* ARM_RTASM_H */

View file

@ -33301,14 +33301,12 @@ if (special_mem) {
FAIL(1);
return 0;
}
int src=scratchie++;
int dst=scratchie++;
{ int srca=scratchie++;
mov_l_rr(srca,srcreg+8);
{ int dsta = scratchie++;
mov_l_ri(dsta,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */
jnf_ADD_im8(srcreg + 8, srcreg + 8, 16);
jnf_MOVE16(dst, src);
jnf_MOVE16(dsta, srca);
}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc();
if (failure) m68k_pc_offset=m68k_pc_offset_thisinst;
return 0;
@ -33325,14 +33323,12 @@ if (special_mem) {
FAIL(1);
return 0;
}
int src=scratchie++;
int dst=scratchie++;
{ int srca = scratchie++;
mov_l_ri(srca,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */
{ int dsta=scratchie++;
mov_l_rr(dsta,dstreg+8);
jnf_ADD_im8(dstreg + 8, dstreg + 8, 16);
jnf_MOVE16(dst, src);
jnf_MOVE16(dsta, srca);
}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc();
if (failure) m68k_pc_offset=m68k_pc_offset_thisinst;
return 0;
@ -33349,14 +33345,12 @@ if (special_mem) {
FAIL(1);
return 0;
}
int src=scratchie++;
int dst=scratchie++;
{ int srca=dodgy?scratchie++:srcreg+8;
if (dodgy)
mov_l_rr(srca,srcreg+8);
{ int dsta = scratchie++;
mov_l_ri(dsta,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */
jnf_MOVE16(dst, src);
jnf_MOVE16(dsta, srca);
}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc();
if (failure) m68k_pc_offset=m68k_pc_offset_thisinst;
return 0;
@ -33373,14 +33367,12 @@ if (special_mem) {
FAIL(1);
return 0;
}
int src=scratchie++;
int dst=scratchie++;
{ int srca = scratchie++;
mov_l_ri(srca,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */
{ int dsta=dodgy?scratchie++:dstreg+8;
if (dodgy)
mov_l_rr(dsta,dstreg+8);
jnf_MOVE16(dst, src);
jnf_MOVE16(dsta, srca);
}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc();
if (failure) m68k_pc_offset=m68k_pc_offset_thisinst;
return 0;
@ -33398,15 +33390,15 @@ if (special_mem) {
FAIL(1);
return 0;
}
int src=scratchie++;
int dst=scratchie++;
int srca=scratchie++;
int dsta=scratchie++;
uae_u16 dstreg = ((comp_get_iword((m68k_pc_offset+=2)-2))>>12) & 0x07;
jnf_MOVE(src, srcreg + 8);
jnf_MOVE(dst, dstreg + 8);
jnf_MOVE(srca, srcreg + 8);
jnf_MOVE(dsta, dstreg + 8);
if (srcreg != dstreg)
jnf_ADD_im8(srcreg + 8, srcreg + 8, 16);
jnf_ADD_im8(dstreg + 8, dstreg + 8, 16);
jnf_MOVE16(dst, src);
jnf_MOVE16(dsta, srca);
} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc();
if (failure) m68k_pc_offset=m68k_pc_offset_thisinst;
return 0;
@ -64819,14 +64811,12 @@ if (special_mem) {
FAIL(1);
return 0;
}
int src=scratchie++;
int dst=scratchie++;
{ int srca=scratchie++;
mov_l_rr(srca,srcreg+8);
{ int dsta = scratchie++;
mov_l_ri(dsta,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */
jnf_ADD_im8(srcreg + 8, srcreg + 8, 16);
jnf_MOVE16(dst, src);
jnf_MOVE16(dsta, srca);
}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc();
if (failure) m68k_pc_offset=m68k_pc_offset_thisinst;
return 0;
@ -64843,14 +64833,12 @@ if (special_mem) {
FAIL(1);
return 0;
}
int src=scratchie++;
int dst=scratchie++;
{ int srca = scratchie++;
mov_l_ri(srca,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */
{ int dsta=scratchie++;
mov_l_rr(dsta,dstreg+8);
jnf_ADD_im8(dstreg + 8, dstreg + 8, 16);
jnf_MOVE16(dst, src);
jnf_MOVE16(dsta, srca);
}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc();
if (failure) m68k_pc_offset=m68k_pc_offset_thisinst;
return 0;
@ -64867,14 +64855,12 @@ if (special_mem) {
FAIL(1);
return 0;
}
int src=scratchie++;
int dst=scratchie++;
{ int srca=dodgy?scratchie++:srcreg+8;
if (dodgy)
mov_l_rr(srca,srcreg+8);
{ int dsta = scratchie++;
mov_l_ri(dsta,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */
jnf_MOVE16(dst, src);
jnf_MOVE16(dsta, srca);
}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc();
if (failure) m68k_pc_offset=m68k_pc_offset_thisinst;
return 0;
@ -64891,14 +64877,12 @@ if (special_mem) {
FAIL(1);
return 0;
}
int src=scratchie++;
int dst=scratchie++;
{ int srca = scratchie++;
mov_l_ri(srca,comp_get_ilong((m68k_pc_offset+=4)-4)); /* absl */
{ int dsta=dodgy?scratchie++:dstreg+8;
if (dodgy)
mov_l_rr(dsta,dstreg+8);
jnf_MOVE16(dst, src);
jnf_MOVE16(dsta, srca);
}}} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc();
if (failure) m68k_pc_offset=m68k_pc_offset_thisinst;
return 0;
@ -64916,15 +64900,15 @@ if (special_mem) {
FAIL(1);
return 0;
}
int src=scratchie++;
int dst=scratchie++;
int srca=scratchie++;
int dsta=scratchie++;
uae_u16 dstreg = ((comp_get_iword((m68k_pc_offset+=2)-2))>>12) & 0x07;
jnf_MOVE(src, srcreg + 8);
jnf_MOVE(dst, dstreg + 8);
jnf_MOVE(srca, srcreg + 8);
jnf_MOVE(dsta, dstreg + 8);
if (srcreg != dstreg)
jnf_ADD_im8(srcreg + 8, srcreg + 8, 16);
jnf_ADD_im8(dstreg + 8, dstreg + 8, 16);
jnf_MOVE16(dst, src);
jnf_MOVE16(dsta, srca);
} if (m68k_pc_offset>SYNC_PC_OFFSET) sync_m68k_pc();
if (failure) m68k_pc_offset=m68k_pc_offset_thisinst;
return 0;

View file

@ -162,7 +162,7 @@ extern uae_u32 needed_flags;
extern uae_u8* comp_pc_p;
extern void* pushall_call_handler;
#define VREGS 32
#define VREGS 24
#define VFREGS 10
#define INMEM 1
@ -205,19 +205,11 @@ STATIC_INLINE int end_block(uae_u16 opcode)
#define PC_P 16
#define FLAGX 17
#define FLAGTMP 18
#define NEXT_HANDLER 19
#define S1 20
#define S2 21
#define S3 22
#define S4 23
#define S5 24
#define S6 25
#define S7 26
#define S8 27
#define S9 28
#define S10 29
#define S11 30
#define S12 31
#define S1 19
#define S2 20
#define S3 21
#define S4 22
#define S5 23
#define FP_RESULT 8
#define FS1 9
@ -263,14 +255,6 @@ typedef struct {
fn_status fat[N_FREGS];
} bigstate;
typedef struct {
/* Integer part */
uae_s8 virt[VREGS];
uae_s8 nat[N_REGS];
} smallstate;
extern int touchcnt;
#define IMM uae_s32
#define RR1 uae_u32
#define RR2 uae_u32
@ -329,7 +313,6 @@ extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp);
#define SYNC_PC_OFFSET 124
extern void sync_m68k_pc(void);
extern uae_u32 get_const(int r);
extern int is_const(int r);
extern void register_branch(uae_u32 not_taken, uae_u32 taken, uae_u8 cond);
extern void register_possible_exception(void);
@ -385,11 +368,9 @@ typedef struct blockinfo_t {
uae_u8 optlevel;
uae_u8 needed_flags;
uae_u8 status;
uae_u8 havestate;
dependency dep[2]; /* Holds things we depend on */
dependency* deplist; /* List of things that depend on this */
smallstate env;
} blockinfo;
#define BI_INVALID 0

View file

@ -477,13 +477,10 @@ STATIC_INLINE void flush_cpu_icache(void *start, void *stop)
#endif
}
STATIC_INLINE void write_jmp_target(uae_u32* jmpaddr, cpuop_func* a) {
*(jmpaddr) = (uae_u32)a;
flush_cpu_icache((void *)jmpaddr, (void *)&jmpaddr[1]);
}
STATIC_INLINE void emit_jmp_target(uae_u32 a) {
emit_long((uae_u32)a);
STATIC_INLINE void write_jmp_target(uae_u32* jmpaddr, uintptr a) {
uae_s32 off = ((uae_u32)a - (uae_u32)jmpaddr - 8) >> 2;
*(jmpaddr) = (*(jmpaddr) & 0xff000000) | (off & 0x00ffffff);
flush_cpu_icache((void *)jmpaddr, (void *)&jmpaddr[1]);
}

View file

@ -278,18 +278,6 @@ MENDFUNC(3,jff_ADD_l,(W4 d, RR4 s, RR4 v))
* Flags: Not affected.
*
*/
MIDFUNC(2,jnf_ADDA_b,(W4 d, RR1 s))
{
s = readreg(s, 4);
d = rmw(d, 4, 4);
SXTAB_rrr(d, d, s);
unlock2(d);
unlock2(s);
}
MENDFUNC(2,jnf_ADDA_b,(W4 d, RR1 s))
MIDFUNC(2,jnf_ADDA_w,(W4 d, RR2 s))
{
s = readreg(s, 4);
@ -337,13 +325,8 @@ MIDFUNC(3,jnf_ADDX,(W4 d, RR4 s, RR4 v))
v = readreg(v, 4);
d = writereg(d, 4);
clobber_flags();
// Restore X to carry (don't care about other flags)
LSL_rri(REG_WORK1, x, 29);
MSR_CPSRf_r(REG_WORK1);
ADC_rrr(d, s, v);
ADD_rrr(d, s, v);
ADD_rrr(d, d, x);
unlock2(d);
unlock2(s);
@ -1457,7 +1440,7 @@ MIDFUNC(2,jnf_BSET_b,(RW4 d, RR4 s))
return;
}
s = readreg(s, 4);
d = rmw(d, 4 ,4);
d = rmw(d, 4, 4);
AND_rri(REG_WORK1, s, 7);
MOV_ri(REG_WORK2, 1);
@ -1852,7 +1835,7 @@ MENDFUNC(2,jff_DBCC,(RR2 d, IMM cc))
* C Always cleared.
*
*/
MIDFUNC(3,jnf_DIVU,(W4 d, RR4 s1, RR4 s2))
MIDFUNC(3,jnf_DIVU,(W4 d, RR4 s1, RR4 s2))
{
s1 = readreg(s1, 4);
s2 = readreg(s2, 4);
@ -1873,14 +1856,14 @@ MENDFUNC(2,jff_DBCC,(RR2 d, IMM cc))
#endif
STR_rR(REG_WORK1, REG_WORK2);
#ifdef ARM_HAS_DIV
B_i(5); // end_of_op
B_i(4); // end_of_op
// src is not 0
UDIV_rrr(REG_WORK1, s1, REG_WORK3);
#else
B_i(11); // end_of_op
// src is not 0
B_i(10); // end_of_op
// src is not 0
VMOVi_from_ARM_dr(SCRATCH_F64_1, s1, 0);
VMOVi_from_ARM_dr(SCRATCH_F64_2, REG_WORK3, 0);
VCVTIuto64_ds(SCRATCH_F64_1, SCRATCH_F32_1);
@ -1889,16 +1872,15 @@ MENDFUNC(2,jff_DBCC,(RR2 d, IMM cc))
VCVT64toIu_sd(SCRATCH_F32_1, SCRATCH_F64_3);
VMOVi_to_ARM_rd(REG_WORK1, SCRATCH_F64_1, 0);
#endif
LSRS_rri(REG_WORK2, REG_WORK1, 16); // if result of this is not 0, DIVU overflows -> no result
BNE_i(2);
LSRS_rri(REG_WORK2, REG_WORK1, 16); // if result of this is not 0, DIVU overflows -> no result
BNE_i(1);
// Here we have to calc remainder
MUL_rrr(REG_WORK2, REG_WORK1, REG_WORK3);
SUB_rrr(REG_WORK2, s1, REG_WORK2);
PKHBT_rrrLSLi(d, REG_WORK1, REG_WORK2, 16);
MLS_rrrr(REG_WORK2, REG_WORK1, REG_WORK3, s1);
PKHBT_rrrLSLi(d, REG_WORK1, REG_WORK2, 16);
// end_of_op
unlock2(d);
unlock2(s1);
unlock2(s2);
@ -1910,7 +1892,7 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
s1 = readreg(s1, 4);
s2 = readreg(s2, 4);
d = writereg(d, 4);
UNSIGNED16_REG_2_REG(REG_WORK3, s2);
TST_rr(REG_WORK3, REG_WORK3);
BNE_i(6); // src is not 0
@ -1930,12 +1912,12 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
MOV_ri(REG_WORK1, ARM_Z_FLAG | ARM_V_FLAG);
MSR_CPSRf_r(REG_WORK1);
#ifdef ARM_HAS_DIV
B_i(12); // end_of_op
B_i(11); // end_of_op
// src is not 0
UDIV_rrr(REG_WORK1, s1, REG_WORK3);
#else
B_i(18); // end_of_op
B_i(17); // end_of_op
// src is not 0
VMOVi_from_ARM_dr(SCRATCH_F64_1, s1, 0);
@ -1952,7 +1934,7 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
// Here we handle overflow
MOV_ri(REG_WORK1, ARM_V_FLAG | ARM_N_FLAG);
MSR_CPSRf_r(REG_WORK1);
B_i(6);
B_i(5);
// Here we have to calc flags and remainder
LSLS_rri(REG_WORK2, REG_WORK1, 16); // N and Z ok
@ -1960,8 +1942,7 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
BIC_rri(REG_WORK2, REG_WORK2, ARM_C_FLAG | ARM_V_FLAG);
MSR_CPSRf_r(REG_WORK2);
MUL_rrr(REG_WORK2, REG_WORK1, REG_WORK3);
SUB_rrr(REG_WORK2, s1, REG_WORK2);
MLS_rrrr(REG_WORK2, REG_WORK1, REG_WORK3, s1);
PKHBT_rrrLSLi(d, REG_WORK1, REG_WORK2, 16);
// end_of_op
@ -2600,11 +2581,11 @@ MIDFUNC(3,jff_LSL_b_imm,(W4 d, RR4 s, IMM i))
MSR_CPSRf_i(0);
LSL_rri(d, s, 24);
if (i) {
LSLS_rri(d, d, i);
LSLS_rri(d, s, i + 24);
DUPLICACTE_CARRY
} else {
LSL_rri(d, s, 24);
TST_rr(d, d);
}
LSR_rri(d, d, 24);
@ -2621,11 +2602,11 @@ MIDFUNC(3,jff_LSL_w_imm,(W4 d, RR4 s, IMM i))
MSR_CPSRf_i(0);
LSL_rri(d, s, 16);
if (i) {
LSLS_rri(d, d, i);
LSLS_rri(d, s, i + 16);
DUPLICACTE_CARRY
} else {
LSL_rri(d, s, 16);
TST_rr(d, d);
}
LSR_rri(d, d, 16);
@ -2938,15 +2919,15 @@ MIDFUNC(3,jff_LSR_b_reg,(W4 d, RR4 s, RR4 i))
d = writereg(d, 4);
int x = writereg(FLAGX, 4);
SIGNED8_REG_2_REG(d, s); // Make sure, sign is in MSB if shift count is 0 (to get correct N flag)
MSR_CPSRf_i(0);
ANDS_rri(REG_WORK1, i, 63);
BEQ_i(4); // No shift -> X flag unchanged
AND_rri(d, d, 0xff); // Shift count is not 0 -> unsigned required
AND_rri(d, s, 0xff); // Shift count is not 0 -> unsigned required
LSRS_rrr(d, d, REG_WORK1);
MOV_ri(x, 1);
CC_MOV_ri(NATIVE_CC_CC, x, 0);
B_i(0);
B_i(1);
SIGNED8_REG_2_REG(d, s); // Make sure, sign is in MSB if shift count is 0 (to get correct N flag)
TST_rr(d, d);
unlock2(x);
@ -2963,15 +2944,15 @@ MIDFUNC(3,jff_LSR_w_reg,(W4 d, RR4 s, RR4 i))
d = writereg(d, 4);
int x = writereg(FLAGX, 4);
SIGNED16_REG_2_REG(d, s); // Make sure, sign is in MSB if shift count is 0 (to get correct N flag)
MSR_CPSRf_i(0);
ANDS_rri(REG_WORK1, i, 63);
BEQ_i(4); // No shift -> X flag unchanged
UXTH_rr(d, d); // Shift count is not 0 -> unsigned required
UXTH_rr(d, s); // Shift count is not 0 -> unsigned required
LSRS_rrr(d, d, REG_WORK1);
MOV_ri(x, 1);
CC_MOV_ri(NATIVE_CC_CC, x, 0);
B_i(0);
B_i(1);
SIGNED16_REG_2_REG(d, s); // Make sure, sign is in MSB if shift count is 0 (to get correct N flag)
TST_rr(d, d);
unlock2(x);
@ -3261,15 +3242,11 @@ MIDFUNC(2,jnf_MOVE16,(RR4 d, RR4 s))
ADD_rrr(s, s, REG_WORK1);
ADD_rrr(d, d, REG_WORK1);
LDR_rR(REG_WORK1, s);
LDR_rRI(REG_WORK2, s, 4);
STR_rR(REG_WORK1, d);
STR_rRI(REG_WORK2, d, 4);
LDRD_rR(REG_WORK1, s);
STRD_rR(REG_WORK1, d);
LDR_rRI(REG_WORK1, s, 8);
LDR_rRI(REG_WORK2, s, 12);
STR_rRI(REG_WORK1, d, 8);
STR_rRI(REG_WORK2, d, 12);
LDRD_rRI(REG_WORK1, s, 8);
STRD_rRI(REG_WORK1, d, 8);
POP_REGS((1 << s) | (1 << d));
@ -3329,9 +3306,7 @@ MIDFUNC(2,jnf_MULS,(RW4 d, RR4 s))
s = readreg(s, 4);
d = rmw(d, 4, 4);
SIGN_EXTEND_16_REG_2_REG(d, d);
SIGN_EXTEND_16_REG_2_REG(REG_WORK1, s);
MUL_rrr(d, d, REG_WORK1);
SMULxy_rrr(d, d, s, 0, 0);
unlock2(s);
unlock2(d);
@ -3647,8 +3622,7 @@ MIDFUNC(2,jnf_NEGX_b,(W4 d, RR4 s))
clobber_flags();
// Restore inverted X to carry (don't care about other flags)
LSL_rri(REG_WORK1, x, 29);
EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);
MVN_rrLSLi(REG_WORK1, x, 29);
MSR_CPSRf_r(REG_WORK1);
SIGNED8_REG_2_REG(REG_WORK1, s);
@ -3669,8 +3643,7 @@ MIDFUNC(2,jnf_NEGX_w,(W4 d, RR4 s))
clobber_flags();
// Restore inverted X to carry (don't care about other flags)
LSL_rri(REG_WORK1, x, 29);
EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);
MVN_rrLSLi(REG_WORK1, x, 29);
MSR_CPSRf_r(REG_WORK1);
SIGNED16_REG_2_REG(REG_WORK1, s);
@ -3691,8 +3664,7 @@ MIDFUNC(2,jnf_NEGX_l,(W4 d, RR4 s))
clobber_flags();
// Restore inverted X to carry (don't care about other flags)
LSL_rri(REG_WORK1, x, 29);
EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);
MVN_rrLSLi(REG_WORK1, x, 29);
MSR_CPSRf_r(REG_WORK1);
RSC_rri(d, s, 0);
@ -3713,8 +3685,7 @@ MIDFUNC(2,jff_NEGX_b,(W4 d, RR1 s))
CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG);
// Restore inverted X to carry (don't care about other flags)
LSL_rri(REG_WORK1, x, 29);
EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);
MVN_rrLSLi(REG_WORK1, x, 29);
MSR_CPSRf_r(REG_WORK1);
SIGNED8_REG_2_REG(REG_WORK1, s);
@ -3747,8 +3718,7 @@ MIDFUNC(2,jff_NEGX_w,(W4 d, RR2 s))
CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG);
// Restore inverted X to carry (don't care about other flags)
LSL_rri(REG_WORK1, x, 29);
EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);
MVN_rrLSLi(REG_WORK1, x, 29);
MSR_CPSRf_r(REG_WORK1);
SIGNED16_REG_2_REG(REG_WORK1, s);
@ -3781,8 +3751,7 @@ MIDFUNC(2,jff_NEGX_l,(W4 d, RR4 s))
CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG);
// Restore inverted X to carry (don't care about other flags)
LSL_rri(REG_WORK1, x, 29);
EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);
MVN_rrLSLi(REG_WORK1, x, 29);
MSR_CPSRf_r(REG_WORK1);
RSCS_rri(d, s, 0);
@ -5487,8 +5456,7 @@ MIDFUNC(3,jnf_SUBX,(W4 d, RR4 s, RR4 v))
clobber_flags();
// Restore inverted X to carry (don't care about other flags)
LSL_rri(REG_WORK1, x, 29);
EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);
MVN_rrLSLi(REG_WORK1, x, 29);
MSR_CPSRf_r(REG_WORK1);
SBC_rrr(d, s, v);
@ -5511,8 +5479,7 @@ MIDFUNC(3,jff_SUBX_b,(W4 d, RR1 s, RR1 v))
CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG);
// Restore inverted X to carry (don't care about other flags)
LSL_rri(REG_WORK1, x, 29);
EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);
MVN_rrLSLi(REG_WORK1, x, 29);
MSR_CPSRf_r(REG_WORK1);
LSL_rri(REG_WORK1, s, 24);
@ -5548,8 +5515,7 @@ MIDFUNC(3,jff_SUBX_w,(W4 d, RR2 s, RR2 v))
CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG);
// Restore inverted X to carry (don't care about other flags)
LSL_rri(REG_WORK1, x, 29);
EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);
MVN_rrLSLi(REG_WORK1, x, 29);
MSR_CPSRf_r(REG_WORK1);
LSL_rri(REG_WORK1, s, 16);
@ -5585,8 +5551,7 @@ MIDFUNC(3,jff_SUBX_l,(W4 d, RR4 s, RR4 v))
CC_MVN_ri(NATIVE_CC_NE, REG_WORK2, ARM_Z_FLAG);
// Restore inverted X to carry (don't care about other flags)
LSL_rri(REG_WORK1, x, 29);
EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG);
MVN_rrLSLi(REG_WORK1, x, 29);
MSR_CPSRf_r(REG_WORK1);
SBCS_rrr(d, s, v);

View file

@ -46,7 +46,6 @@ DECLARE_MIDFUNC(jff_ADD_w_imm(W4 d, RR2 s, IMM v));
DECLARE_MIDFUNC(jff_ADD_l_imm(W4 d, RR4 s, IMM v));
// ADDA
DECLARE_MIDFUNC(jnf_ADDA_b(W4 d, RR1 s));
DECLARE_MIDFUNC(jnf_ADDA_w(W4 d, RR2 s));
DECLARE_MIDFUNC(jnf_ADDA_l(W4 d, RR4 s));

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,3 @@
#ifdef NOFLAGS_SUPPORT
/* 68040 */
extern const struct comptbl op_smalltbl_0_nf[];
#endif
extern const struct comptbl op_smalltbl_0_comp_nf[];
extern const struct comptbl op_smalltbl_0_comp_ff[];
extern compop_func op_0_0_comp_ff;

View file

@ -693,15 +693,14 @@ static void gen_move16(uae_u32 opcode, struct instr *curi)
comprintf(" " RETURN "\n");
comprintf("} \n");
comprintf("\tint src=scratchie++;\n");
comprintf("\tint dst=scratchie++;\n");
uae_u32 masked_op = (opcode & 0xfff8);
if (masked_op == 0xf620) {
// POSTINCREMENT SOURCE AND DESTINATION version: MOVE16 (Ax)+,(Ay)+
comprintf("\tint srca=scratchie++;\n");
comprintf("\tint dsta=scratchie++;\n");
comprintf("\t uae_u16 dstreg = ((%s)>>12) & 0x07;\n", gen_nextiword());
comprintf("\t jnf_MOVE(src, srcreg + 8);\n");
comprintf("\t jnf_MOVE(dst, dstreg + 8);\n");
comprintf("\t jnf_MOVE(srca, srcreg + 8);\n");
comprintf("\t jnf_MOVE(dsta, dstreg + 8);\n");
comprintf("\t if (srcreg != dstreg)\n");
comprintf("\t jnf_ADD_im8(srcreg + 8, srcreg + 8, 16);\n");
comprintf("\t jnf_ADD_im8(dstreg + 8, dstreg + 8, 16);\n");
@ -718,7 +717,7 @@ static void gen_move16(uae_u32 opcode, struct instr *curi)
break;
}
}
comprintf("\tjnf_MOVE16(dst, src);\n");
comprintf("\tjnf_MOVE16(dsta, srca);\n");
}
static void
@ -3285,10 +3284,6 @@ int main(int argc, char *argv[])
headerfile = fopen("jit/comptbl.h", "wb");
fprintf (headerfile, "" \
"#ifdef NOFLAGS_SUPPORT\n" \
"/* 68040 */\n" \
"extern const struct comptbl op_smalltbl_0_nf[];\n" \
"#endif\n" \
"extern const struct comptbl op_smalltbl_0_comp_nf[];\n" \
"extern const struct comptbl op_smalltbl_0_comp_ff[];\n" \
"");