Added JIT FPU, removed Softfloat option (too slow to be useful) - Thanks TomB!

This commit is contained in:
Dimitris Panokostas 2018-01-30 01:08:23 +01:00
parent f24301e8dd
commit a8815b211e
55 changed files with 3579 additions and 10940 deletions

View file

@ -333,6 +333,24 @@ LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s))
}
LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s))
LOWFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
{
if(s >= (uae_u32) &regs && s < ((uae_u32) &regs) + sizeof(struct regstruct)) {
uae_s32 idx = s - (uae_u32) & regs;
LDR_rRI(d, R_REGSTRUCT, idx);
} else {
#ifdef ARMV6T2
MOVW_ri16(REG_WORK1, s);
MOVT_ri16(REG_WORK1, s >> 16);
#else
uae_s32 offs = data_long_offs(s);
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
#endif
LDR_rR(d, REG_WORK1);
}
}
LENDFUNC(NONE,READ,2,raw_mov_l_rm,(W4 d, MEMR s))
LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, RR2 s))
{
PKHBT_rrr(d, s, d);
@ -465,11 +483,6 @@ STATIC_INLINE void raw_emit_nop_filler(int nbytes)
while(nbytes--) { NOP(); }
}
STATIC_INLINE void raw_emit_nop(void)
{
NOP();
}
//
// Arm instructions
//
@ -611,7 +624,7 @@ LOWFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset))
}
LENDFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset))
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s))
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(MEMW d, RR1 s))
{
if(d >= (uae_u32) &regs && d < ((uae_u32) &regs) + sizeof(struct regstruct)) {
uae_s32 idx = d - (uae_u32) & regs;
@ -627,7 +640,7 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s))
STRB_rR(s, REG_WORK1);
}
}
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s))
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(MEMW d, RR1 s))
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s))
{
@ -664,7 +677,7 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s))
}
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s))
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s))
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(MEMW d, RR4 s))
{
if(d >= (uae_u32) &regs && d < ((uae_u32) &regs) + sizeof(struct regstruct)) {
uae_s32 idx = d - (uae_u32) & regs;
@ -680,7 +693,7 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s))
STR_rR(s, REG_WORK1);
}
}
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s))
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(MEMW d, RR4 s))
LOWFUNC(NONE,NONE,2,compemu_raw_mov_l_ri,(W4 d, IMM s))
{
@ -831,26 +844,101 @@ STATIC_INLINE void compemu_raw_call_r(RR4 r)
STATIC_INLINE void compemu_raw_jcc_l_oponly(int cc)
{
switch (cc) {
case 9: // LS
BEQ_i(0); // beq <dojmp>
BCC_i(1); // bcc <jp>
case NATIVE_CC_HI: // HI
BEQ_i(2); // beq no jump
BCS_i(1); // bcs no jump
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
// no jump
break;
//<dojmp>:
LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // ldr pc, [pc] ; <value>
break;
case NATIVE_CC_LS: // LS
BEQ_i(0); // beq jump
BCC_i(1); // bcc no jump
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
// no jump
break;
case 8: // HI
BEQ_i(2); // beq <jp>
BCS_i(1); // bcs <jp>
case NATIVE_CC_F_OGT: // Jump if valid and greater than
BVS_i(2); // do not jump if NaN
BLE_i(1); // do not jump if less or equal
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
//<dojmp>:
LDR_rRI(RPC_INDEX, RPC_INDEX, -4); // ldr pc, [pc] ; <value>
break;
case NATIVE_CC_F_OGE: // Jump if valid and greater or equal
BVS_i(2); // do not jump if NaN
BCC_i(1); // do not jump if carry cleared
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_OLT: // Jump if vaild and less than
BVS_i(2); // do not jump if NaN
BCS_i(1); // do not jump if carry set
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_OLE: // Jump if valid and less or equal
BVS_i(2); // do not jump if NaN
BGT_i(1); // do not jump if greater than
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_OGL: // Jump if valid and greator or less
BVS_i(2); // do not jump if NaN
BEQ_i(1); // do not jump if equal
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
default:
CC_B_i(cc^1, 1);
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_OR: // Jump if valid
BVS_i(1); // do not jump if NaN
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_UN: // Jump if NAN
BVC_i(1); // do not jump if valid
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_UEQ: // Jump if NAN or equal
BVS_i(0); // jump if NaN
BNE_i(1); // do not jump if greater or less
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_UGT: // Jump if NAN or greater than
BVS_i(0); // jump if NaN
BLS_i(1); // do not jump if lower or same
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_UGE: // Jump if NAN or greater or equal
BVS_i(0); // jump if NaN
BMI_i(1); // do not jump if lower
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_ULT: // Jump if NAN or less than
BVS_i(0); // jump if NaN
BGE_i(1); // do not jump if greater or equal
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
case NATIVE_CC_F_ULE: // Jump if NAN or less or equal
BVS_i(0); // jump if NaN
BGT_i(1); // do not jump if greater
// jump
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
default:
CC_B_i(cc^1, 1);
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
break;
}
// emit of target will be done by caller
}
@ -889,11 +977,6 @@ STATIC_INLINE void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
emit_long(base);
}
STATIC_INLINE void compemu_raw_jmp_r(RR4 r)
{
BX_r(r);
}
STATIC_INLINE void compemu_raw_jnz(uae_u32 t)
{
#ifdef ARMV6T2
@ -1009,3 +1092,317 @@ LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
// <target emitted by caller>
}
LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
/*************************************************************************
* FPU stuff *
*************************************************************************/
LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
{
VMOV64_rr(d, s);
}
LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
LOWFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s))
{
if(mem >= (uae_u32) &regs && mem < (uae_u32) &regs + 1020 && ((mem - (uae_u32) &regs) & 0x3) == 0) {
VSTR64(s, R_REGSTRUCT, (mem - (uae_u32) &regs));
} else {
MOVW_ri16(REG_WORK1, mem);
MOVT_ri16(REG_WORK1, mem >> 16);
VSTR64(s, REG_WORK1, 0);
}
}
LENDFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s))
LOWFUNC(NONE,READ,2,compemu_raw_fmov_rm,(FW d, MEMR mem))
{
if(mem >= (uae_u32) &regs && mem < (uae_u32) &regs + 1020 && ((mem - (uae_u32) &regs) & 0x3) == 0) {
VLDR64(d, R_REGSTRUCT, (mem - (uae_u32) &regs));
} else {
MOVW_ri16(REG_WORK1, mem);
MOVT_ri16(REG_WORK1, mem >> 16);
VLDR64(d, REG_WORK1, 0);
}
}
LENDFUNC(NONE,READ,2,compemu_raw_fmov_rm,(FW d, MEMW mem))
LOWFUNC(NONE,NONE,2,raw_fmov_l_rr,(FW d, RR4 s))
{
VMOVi_from_ARM(SCRATCH_F64_1, s);
VCVT_64_from_i(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fmov_l_rr,(FW d, RR4 s))
LOWFUNC(NONE,NONE,2,raw_fmov_s_rr,(FW d, RR4 s))
{
VMOV32_from_ARM(SCRATCH_F32_1, s);
VCVT_32_to_64(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fmov_s_rr,(FW d, RR4 s))
LOWFUNC(NONE,NONE,2,raw_fmov_w_rr,(FW d, RR2 s))
{
SIGN_EXTEND_16_REG_2_REG(REG_WORK1, s);
VMOVi_from_ARM(SCRATCH_F64_1, REG_WORK1);
VCVT_64_from_i(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fmov_w_rr,(FW d, RR2 s))
LOWFUNC(NONE,NONE,2,raw_fmov_b_rr,(FW d, RR1 s))
{
SIGN_EXTEND_8_REG_2_REG(REG_WORK1, s);
VMOVi_from_ARM(SCRATCH_F64_1, REG_WORK1);
VCVT_64_from_i(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fmov_b_rr,(FW d, RR1 s))
LOWFUNC(NONE,NONE,2,raw_fmov_d_rrr,(FW d, RR4 s1, RR4 s2))
{
VMOV64_from_ARM(d, s1, s2);
}
LENDFUNC(NONE,NONE,2,raw_fmov_d_rrr,(FW d, RR4 s1, RR4 s2))
LOWFUNC(NONE,NONE,2,raw_fmov_to_l_rr,(W4 d, FR s))
{
VCVTR_64_to_i(SCRATCH_F32_1, s);
VMOVi_to_ARM(d, SCRATCH_F64_1);
}
LENDFUNC(NONE,NONE,2,raw_fmov_to_l_rr,(W4 d, FR s))
LOWFUNC(NONE,NONE,2,raw_fmov_to_s_rr,(W4 d, FR s))
{
VCVT_64_to_32(SCRATCH_F32_1, s);
VMOV32_to_ARM(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fmov_to_s_rr,(W4 d, FR s))
LOWFUNC(NONE,NONE,2,raw_fmov_to_w_rr,(W4 d, FR s))
{
VCVTR_64_to_i(SCRATCH_F32_1, s);
VMOVi_to_ARM(REG_WORK1, SCRATCH_F64_1);
SSAT_rir(REG_WORK1, 15, REG_WORK1);
BFI_rrii(d, REG_WORK1, 0, 15);
}
LENDFUNC(NONE,NONE,2,raw_fmov_to_w_rr,(W4 d, FR s))
LOWFUNC(NONE,NONE,2,raw_fmov_to_b_rr,(W4 d, FR s))
{
VCVTR_64_to_i(SCRATCH_F32_1, s);
VMOVi_to_ARM(REG_WORK1, SCRATCH_F64_1);
SSAT_rir(REG_WORK1, 7, REG_WORK1);
BFI_rrii(d, REG_WORK1, 0, 7);
}
LENDFUNC(NONE,NONE,2,raw_fmov_to_b_rr,(W4 d, FR s))
LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_0,(FW r))
{
VMOV64_i(r, 0x7, 0x0); // load imm #1 into reg
VSUB64(r, r, r);
}
LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_0,(FW r))
LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_1,(FW r))
{
VMOV64_i(r, 0x7, 0x0); // load imm #1 into reg
}
LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_1,(FW r))
LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r))
{
VMOV64_i(r, 0x2, 0x4); // load imm #10 into reg
}
LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r))
LOWFUNC(NONE,NONE,1,raw_fmov_d_ri_100,(FW r))
{
VMOV64_i(r, 0x2, 0x4); // load imm #10 into reg
VMUL64(r, r, r);
}
LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r))
LOWFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m))
{
MOVW_ri16(REG_WORK1, m);
MOVT_ri16(REG_WORK1, m >> 16);
VLDR64(r, REG_WORK1, 0);
}
LENDFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m))
LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
{
MOVW_ri16(REG_WORK1, m);
MOVT_ri16(REG_WORK1, m >> 16);
VLDR32(SCRATCH_F32_1, REG_WORK1, 0);
VCVT_32_to_64(r, SCRATCH_F32_1);
}
LENDFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
LOWFUNC(NONE,NONE,3,raw_fmov_to_d_rrr,(W4 d1, W4 d2, FR s))
{
VMOV64_to_ARM(d1, d2, s);
}
LENDFUNC(NONE,NONE,3,raw_fmov_to_d_rrr,(W4 d1, W4 d2, FR s))
LOWFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
{
VSQRT64(d, s);
}
LENDFUNC(NONE,NONE,2,raw_fsqrt_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
{
VABS64(d, s);
}
LENDFUNC(NONE,NONE,2,raw_fabs_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
{
VNEG64(d, s);
}
LENDFUNC(NONE,NONE,2,raw_fneg_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
{
VDIV64(d, d, s);
}
LENDFUNC(NONE,NONE,2,raw_fdiv_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
{
VADD64(d, d, s);
}
LENDFUNC(NONE,NONE,2,raw_fadd_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
{
VMUL64(d, d, s);
}
LENDFUNC(NONE,NONE,2,raw_fmul_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
{
VSUB64(d, d, s);
}
LENDFUNC(NONE,NONE,2,raw_fsub_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
{
VCVTR_64_to_i(SCRATCH_F32_1, s);
VCVT_64_from_i(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_frndint_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,2,raw_frndintz_rr,(FW d, FR s))
{
VCVT_64_to_i(SCRATCH_F32_1, s);
VCVT_64_from_i(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_frndintz_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fmod_rr,(FRW d, FR s))
{
VDIV64(SCRATCH_F64_2, d, s);
VCVT_64_to_i(SCRATCH_F32_1, SCRATCH_F64_2);
VCVT_64_from_i(SCRATCH_F64_2, SCRATCH_F32_1);
VMUL64(SCRATCH_F64_1, SCRATCH_F64_2, s);
VSUB64(d, d, SCRATCH_F64_1);
}
LENDFUNC(NONE,NONE,2,raw_fmod_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fsgldiv_rr,(FRW d, FR s))
{
VCVT_64_to_32(SCRATCH_F32_1, d);
VCVT_64_to_32(SCRATCH_F32_2, s);
VDIV32(SCRATCH_F32_1, SCRATCH_F32_1, SCRATCH_F32_2);
VCVT_32_to_64(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fsgldiv_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r))
{
VCVT_64_to_32(SCRATCH_F32_1, r);
VCVT_32_to_64(r, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,1,raw_fcuts_r,(FRW r))
LOWFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
{
VMRS(REG_WORK1);
BIC_rri(REG_WORK2, REG_WORK1, 0x00c00000);
VMSR(REG_WORK2);
VDIV64(SCRATCH_F64_2, d, s);
VCVTR_64_to_i(SCRATCH_F32_1, SCRATCH_F64_2);
VCVT_64_from_i(SCRATCH_F64_2, SCRATCH_F32_1);
VMUL64(SCRATCH_F64_1, SCRATCH_F64_2, s);
VSUB64(d, d, SCRATCH_F64_1);
VMRS(REG_WORK2);
UBFX_rrii(REG_WORK1, REG_WORK1, 22, 2);
BFI_rrii(REG_WORK2, REG_WORK1, 22, 2);
VMSR(REG_WORK2);
}
LENDFUNC(NONE,NONE,2,raw_frem1_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fsglmul_rr,(FRW d, FR s))
{
VCVT_64_to_32(SCRATCH_F32_1, d);
VCVT_64_to_32(SCRATCH_F32_2, s);
VMUL32(SCRATCH_F32_1, SCRATCH_F32_1, SCRATCH_F32_2);
VCVT_32_to_64(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fsglmul_rr,(FRW d, FR s))
LOWFUNC(NONE,NONE,2,raw_fmovs_rr,(FW d, FR s))
{
VCVT_64_to_32(SCRATCH_F32_1, s);
VCVT_32_to_64(d, SCRATCH_F32_1);
}
LENDFUNC(NONE,NONE,2,raw_fmovs_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s))
{
VMOV64_rr(0, s);
MOVW_ri16(REG_WORK1, (uae_u32)func);
MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16);
PUSH(RLR_INDEX);
BLX_r(REG_WORK1);
POP(RLR_INDEX);
VMOV64_rr(d, 0);
}
LENDFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s))
LOWFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s))
{
double (*func)(double,double) = pow;
if(x == 2) {
VMOV64_i(0, 0x0, 0x0); // load imm #2 into first reg
} else {
VMOV64_i(0, 0x2, 0x4); // load imm #10 into first reg
}
VMOV64_rr(1, s);
MOVW_ri16(REG_WORK1, (uae_u32)func);
MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16);
PUSH(RLR_INDEX);
BLX_r(REG_WORK1);
POP(RLR_INDEX);
VMOV64_rr(d, 0);
}
LENDFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s))
STATIC_INLINE void raw_fflags_into_flags(int r)
{
VCMP64_0(r);
VMRS(15); // special case: move flags from FPSCR to APSR_nzcv
}

View file

@ -1319,6 +1319,7 @@ enum {
// ARMv6T2
#ifdef ARMV6T2
#define CC_BFI_rrii(cc,Rd,Rn,lsb,msb) _W(((cc) << 28) | (0x3e << 21) | ((msb) << 16) | (Rd << 12) | ((lsb) << 7) | (0x1 << 4) | (Rn))
#define BFI_rrii(Rd,Rn,lsb,msb) CC_BFI_rrii(NATIVE_CC_AL,Rd,Rn,lsb,msb)
@ -1333,10 +1334,138 @@ enum {
#define CC_MOVT_ri16(cc,Rd,i) _W(((cc) << 28) | (0x34 << 20) | (((i >> 12) & 0xf) << 16) | (Rd << 12) | (i & 0x0fff))
#define MOVT_ri16(Rd,i) CC_MOVT_ri16(NATIVE_CC_AL,Rd,i)
#define CC_SSAT_rir(cc,Rd,i,Rn) _W(((cc) << 28) | (0x6a << 20) | (i << 16) | (Rd << 12) | (0x1 << 4) | (Rn))
#define SSAT_rir(Rd,i,Rn) CC_SSAT_rir(NATIVE_CC_AL,Rd,i,Rn)
#endif
// Floatingpoint
#define FADR_ADD(offs) ((1 << 23) | (offs) >> 2)
#define FADR_SUB(offs) ((0 << 23) | (offs) >> 2)
#define FIMM8(offs) (offs >= 0 ? FADR_ADD(offs) : FADR_SUB(-offs))
#define MAKE_Dd(Dd) (((Dd & 0x10) << 18) | ((Dd & 0x0f) << 12))
#define MAKE_Dm(Dm) (((Dm & 0x10) << 1) | ((Dm & 0x0f) << 0))
#define MAKE_Dn(Dn) (((Dn & 0x10) << 3) | ((Dn & 0x0f) << 16))
#define MAKE_Sd(Sd) (((Sd & 0x01) << 22) | ((Sd & 0x1e) << 11))
#define MAKE_Sm(Sm) (((Sm & 0x01) << 5) | ((Sm & 0x1e) >> 1))
#define MAKE_Sn(Sn) (((Sn & 0x01) << 7) | ((Sn & 0x1e) << 15))
#define CC_VLDR64(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x1 << 20) | (Rn << 16) | (0xb << 8) | FIMM8(offs) | MAKE_Dd(Dd))
#define VLDR64(Dd,Rn,offs) CC_VLDR64(NATIVE_CC_AL,Dd,Rn,offs)
#define CC_VLDR32(cc,Sd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x1 << 20) | (Rn << 16) | (0xa << 8) | FIMM8(offs) | MAKE_Sd(Sd))
#define VLDR32(Sd,Rn,offs) CC_VLDR32(NATIVE_CC_AL,Sd,Rn,offs)
#define CC_VSTR64(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x0 << 20) | (Rn << 16) | (0xb << 8) | FIMM8(offs) | MAKE_Dd(Dd))
#define VSTR64(Dd,Rn,offs) CC_VSTR64(NATIVE_CC_AL,Dd,Rn,offs)
#define CC_VSTR32(cc,Dd,Rn,offs) _W(((cc) << 28) | (0xd << 24) | (0x0 << 20) | (Rn << 16) | (0xa << 8) | FIMM8(offs) | MAKE_Dd(Dd))
#define VSTR32(Dd,Rn,offs) CC_VSTR32(NATIVE_CC_AL,Dd,Rn,offs)
#define CC_VMOV64_rr(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#define VMOV64_rr(Dd,Dm) CC_VMOV64_rr(NATIVE_CC_AL,Dd,Dm)
#define CC_VMOV32_rr(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VMOV32_rr(Sd,Sm) CC_VMOV32_rr(NATIVE_CC_AL,Sd,Sm)
#define CC_VMOV32_to_ARM(cc,Rt,Sn) _W(((cc) << 28) | (0xe << 24) | (0x1 << 20) | (Rt << 12) | (0xa << 8) | (0x1 << 4) | MAKE_Sn(Sn))
#define VMOV32_to_ARM(Rt,Sn) CC_VMOV32_to_ARM(NATIVE_CC_AL,Rt,Sn)
#define CC_VMOV32_from_ARM(cc,Sn,Rt) _W(((cc) << 28) | (0xe << 24) | (0x0 << 20) | (Rt << 12) | (0xa << 8) | (0x1 << 4) | MAKE_Sn(Sn))
#define VMOV32_from_ARM(Sn,Rt) CC_VMOV32_from_ARM(NATIVE_CC_AL,Sn,Rt)
#define CC_VMOVi_from_ARM(cc,Dn,Rt) _W(((cc) << 28) | (0xe << 24) | (0x0 << 20) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dn(Dn))
#define VMOVi_from_ARM(Dn,Rt) CC_VMOVi_from_ARM(NATIVE_CC_AL,Dn,Rt)
#define CC_VMOVi_to_ARM(cc,Rt,Dn) _W(((cc) << 28) | (0xe << 24) | (0x1 << 20) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dn(Dn))
#define VMOVi_to_ARM(Rt,Dn) CC_VMOVi_to_ARM(NATIVE_CC_AL,Rt,Dn)
#define CC_VMOV64_to_ARM(cc,Rt,Rt2,Dm) _W(((cc) << 28) | (0xc << 24) | (0x5 << 20) | (Rt2 << 16) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dm(Dm))
#define VMOV64_to_ARM(Rt,Rt2,Dm) CC_VMOV64_to_ARM(NATIVE_CC_AL,Rt,Rt2,Dm)
#define CC_VMOV64_from_ARM(cc,Dm,Rt,Rt2) _W(((cc) << 28) | (0xc << 24) | (0x4 << 20) | (Rt2 << 16) | (Rt << 12) | (0xb << 8) | (0x1 << 4) | MAKE_Dm(Dm))
#define VMOV64_from_ARM(Dm,Rt,Rt2) CC_VMOV64_from_ARM(NATIVE_CC_AL,Dm,Rt,Rt2)
#define CC_VCVT_64_to_32(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x7 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm))
#define VCVT_64_to_32(Sd,Dm) CC_VCVT_64_to_32(NATIVE_CC_AL,Sd,Dm)
#define CC_VCVT_32_to_64(cc,Dd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x7 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Sm(Sm))
#define VCVT_32_to_64(Dd,Sm) CC_VCVT_32_to_64(NATIVE_CC_AL,Dd,Sm)
#define CC_VCVTR_64_to_i(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xb << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm))
#define VCVTR_64_to_i(Sd,Dm) CC_VCVTR_64_to_i(NATIVE_CC_AL,Sd,Dm)
#define CC_VCVTR_32_to_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VCVTR_32_to_i(Sd,Sm) CC_VCVTR_32_to_i(NATIVE_CC_AL,Sd,Sm)
#define CC_VCVT_64_to_i(cc,Sd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xb << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Dm(Dm))
#define VCVT_64_to_i(Sd,Dm) CC_VCVT_64_to_i(NATIVE_CC_AL,Sd,Dm)
#define CC_VCVT_32_to_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xd << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VCVT_32_to_i(Sd,Sm) CC_VCVT_32_to_i(NATIVE_CC_AL,Sd,Sm)
#define CC_VCVT_64_from_i(cc,Dd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x8 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Sm(Sm))
#define VCVT_64_from_i(Dd,Sm) CC_VCVT_64_from_i(NATIVE_CC_AL,Dd,Sm)
#define CC_VCVT_32_from_i(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x8 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VCVT_32_from_i(Sd,Sm) CC_VCVT_32_from_i(NATIVE_CC_AL,Dd,Sm)
#define CC_VMOV_rr64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#define VMOV_rr64(Dd,Dm) CC_VMOV_rr64(NATIVE_CC_AL,Dd,Dm)
#define CC_VMOV_rr32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VMOV_rr32(Sd,Sm) CC_VMOV_rr32(NATIVE_CC_AL,Sd,Sm)
#define CC_VADD64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
#define VADD64(Dd,Dn,Dm) CC_VADD64(NATIVE_CC_AL,Dd,Dn,Dm)
#define CC_VADD32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm))
#define VADD32(Sd,Sn,Sm) CC_VADD32(NATIVE_CC_AL,Sd,Sn,Sm)
#define CC_VSUB64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
#define VSUB64(Dd,Dn,Dm) CC_VSUB64(NATIVE_CC_AL,Dd,Dn,Dm)
#define CC_VSUB32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x3 << 20) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm))
#define VSUB32(Sd,Sn,Sm) CC_VSUB32(NATIVE_CC_AL,Sd,Sn,Sm)
#define CC_VMUL64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x2 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
#define VMUL64(Dd,Dn,Dm) CC_VMUL64(NATIVE_CC_AL,Dd,Dn,Dm)
#define CC_VMUL32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x2 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm))
#define VMUL32(Sd,Sn,Sm) CC_VMUL32(NATIVE_CC_AL,Sd,Sn,Sm)
#define CC_VDIV64(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0xe << 24) | (0x8 << 20) | (0xb << 8) | (0x0 << 4) | MAKE_Dd(Dd) | MAKE_Dn(Dn) | MAKE_Dm(Dm))
#define VDIV64(Dd,Dn,Dm) CC_VDIV64(NATIVE_CC_AL,Dd,Dn,Dm)
#define CC_VDIV32(cc,Sd,Sn,Sm) _W(((cc) << 28) | (0xe << 24) | (0x8 << 20) | (0xa << 8) | (0x0 << 4) | MAKE_Sd(Sd) | MAKE_Sn(Sn) | MAKE_Sm(Sm))
#define VDIV32(Sd,Sn,Sm) CC_VDIV32(NATIVE_CC_AL,Sd,Sn,Sm)
#define CC_VABS64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#define VABS64(Dd,Dm) CC_VABS64(NATIVE_CC_AL,Dd,Dm)
#define CC_VABS32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VABS32(Sd,Sm) CC_VABS32(NATIVE_CC_AL,Sd,Sm)
#define CC_VNEG64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xb << 8) | (0x4 << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#define VNEG64(Dd,Dm) CC_VNEG64(NATIVE_CC_AL,Dd,Dm)
#define CC_VNEG32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xa << 8) | (0x4 << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VNEG32(Sd,Sm) CC_VNEG32(NATIVE_CC_AL,Sd,Sm)
#define CC_VSQRT64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#define VSQRT64(Dd,Dm) CC_VSQRT64(NATIVE_CC_AL,Dd,Dm)
#define CC_VSQRT32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x1 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VSQRT32(Sd,Sm) CC_VSQRT32(NATIVE_CC_AL,Sd,Sm)
#define CC_VCMP64(cc,Dd,Dm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x4 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd) | MAKE_Dm(Dm))
#define VCMP64(Dd,Dm) CC_VCMP64(NATIVE_CC_AL,Dd,Dm)
#define CC_VCMP32(cc,Sd,Sm) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x4 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd) | MAKE_Sm(Sm))
#define VCMP32(Sd,Sm) CC_VCMP32(NATIVE_CC_AL,Sd,Sm)
#define CC_VCMP64_0(cc,Dd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd))
#define VCMP64_0(Dd) CC_VCMP64_0(NATIVE_CC_AL,Dd)
#define CC_VTST64(cc,Dd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xb << 8) | (0xc << 4) | MAKE_Dd(Dd))
#define VTST64(Dd) CC_VTST64(NATIVE_CC_AL,Dd)
#define CC_VTST32(cc,Sd) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (0x5 << 16) | (0xa << 8) | (0xc << 4) | MAKE_Sd(Sd))
#define VTST32(Sd) CC_VTST32(NATIVE_CC_AL,Sd)
#define CC_VMRS(cc,Rt) _W(((cc) << 28) | (0xe << 24) | (0xf << 20) | (0x1 << 16) | (Rt << 12) | (0xa << 8) | (0x1 << 4))
#define VMRS(Rt) CC_VMRS(NATIVE_CC_AL,Rt)
#define CC_VMSR(cc,Rt) _W(((cc) << 28) | (0xe << 24) | (0xe << 20) | (0x1 << 16) | (Rt << 12) | (0xa << 8) | (0x1 << 4))
#define VMSR(Rt) CC_VMSR(NATIVE_CC_AL,Rt)
#define CC_VMOV64_i(cc,Dd,imm4H,imm4L) _W(((cc) << 28) | (0xe << 24) | (0xb << 20) | (imm4H << 16) | (0xb << 8) | (imm4L) | MAKE_Dd(Dd))
#define VMOV64_i(Dd,imm4H,imm4L) CC_VMOV64_i(NATIVE_CC_AL,Dd,imm4H,imm4L)
// Floatingpoint used by non FPU JIT
#define CC_VMOV_sr(cc,Sd,Rn) _W(((cc) << 28) | (0x70 << 21) | (0 << 20) | (Sd << 16) | (Rn << 12) | (0x0a << 8) | (0x10))
#define VMOV_sr(Sd,Rn) CC_VMOV_sr(NATIVE_CC_AL,Sd,Rn)
@ -1352,4 +1481,5 @@ enum {
#define CC_VDIV_ddd(cc,Dd,Dn,Dm) _W(((cc) << 28) | (0x1d << 23) | (0x0 << 20) | (Dn << 16) | (Dd << 12) | (0xb << 8) | (0x0 << 4) | (Dm))
#define VDIV_ddd(Dd,Dn,Dm) CC_VDIV_ddd(NATIVE_CC_AL,Dd,Dn,Dm)
#endif /* ARM_RTASM_H */

File diff suppressed because it is too large Load diff

View file

@ -90,7 +90,7 @@ typedef union {
#define BYTES_PER_INST 10240 /* paranoid ;-) */
#if defined(CPU_arm)
#define LONGEST_68K_INST 256 /* The number of bytes the longest possible
#define LONGEST_68K_INST 128 /* The number of bytes the longest possible
68k instruction takes */
#else
#define LONGEST_68K_INST 16 /* The number of bytes the longest possible
@ -127,7 +127,8 @@ typedef union {
#else
#define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */
#endif
#define N_FREGS 6 /* That leaves us two positions on the stack to play with */
#define N_FREGS 16 // We use 16 regs: 0 - FP_RESULT, 1-3 - SCRATCH, 4-7 - ???, 8-15 - Amiga regs FP0-FP7
/* Functions exposed to newcpu, or to what was moved from newcpu.c to
* compemu_support.c */
@ -151,11 +152,21 @@ extern int check_for_cache_miss(void);
#define scaled_cycles(x) (currprefs.m68k_speed<0?(((x)/SCALE)?(((x)/SCALE<MAXCYCLES?((x)/SCALE):MAXCYCLES)):1):(x))
/* JIT FPU compilation */
extern void comp_fpp_opp (uae_u32 opcode, uae_u16 extra);
extern void comp_fbcc_opp (uae_u32 opcode);
extern void comp_fscc_opp (uae_u32 opcode, uae_u16 extra);
void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra);
void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc);
void comp_fsave_opp (uae_u32 opcode);
void comp_frestore_opp (uae_u32 opcode);
extern uae_u32 needed_flags;
extern uae_u8* comp_pc_p;
extern void* pushall_call_handler;
#define VREGS 32
#define VFREGS 16
#define INMEM 1
#define CLEAN 2
@ -173,6 +184,13 @@ typedef struct {
uae_u8 dirtysize;
} reg_status;
typedef struct {
uae_u32* mem;
uae_u8 status;
uae_s8 realreg; /* gb-- realreg can hold -1 */
uae_u8 needflush;
} freg_status;
typedef struct {
uae_u8 use_flags;
uae_u8 set_flags;
@ -209,6 +227,13 @@ STATIC_INLINE int end_block(uae_u16 opcode)
#define FS2 10
#define FS3 11
#define SCRATCH_F64_1 1
#define SCRATCH_F64_2 2
#define SCRATCH_F64_3 3
#define SCRATCH_F32_1 2
#define SCRATCH_F32_2 4
#define SCRATCH_F32_3 6
typedef struct {
uae_u32 touched;
uae_s8 holds[VREGS];
@ -216,6 +241,11 @@ typedef struct {
uae_u8 locked;
} n_status;
typedef struct {
uae_s8 holds;
uae_u8 nholds;
} fn_status;
/* For flag handling */
#define NADA 1
#define TRASH 2
@ -233,6 +263,9 @@ typedef struct {
uae_u32 flags_on_stack;
uae_u32 flags_in_flags;
uae_u32 flags_are_important;
/* FPU part */
freg_status fate[VFREGS];
fn_status fat[N_FREGS];
} bigstate;
typedef struct {
@ -276,9 +309,9 @@ extern int touchcnt;
#include "compemu_midfunc_arm2.h"
#endif
//#if defined(CPU_i386) || defined(CPU_x86_64)
//#include "compemu_midfunc_x86.h"
//#endif
#if defined(CPU_i386) || defined(CPU_x86_64)
#include "compemu_midfunc_x86.h"
#endif
#undef DECLARE_MIDFUNC
@ -297,7 +330,7 @@ extern void writelong_clobber(int address, int source, int tmp);
extern void get_n_addr(int address, int dest, int tmp);
extern void get_n_addr_jmp(int address, int dest, int tmp);
extern void calc_disp_ea_020(int base, uae_u32 dp, int target, int tmp);
#define SYNC_PC_OFFSET 100
#define SYNC_PC_OFFSET 124
extern void sync_m68k_pc(void);
extern uae_u32 get_const(int r);
extern int is_const(int r);
@ -374,13 +407,9 @@ void execute_normal(void);
void exec_nostats(void);
void do_nothing(void);
void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra);
void comp_fscc_opp (uae_u32 opcode, uae_u16 extra);
void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc);
void comp_fbcc_opp (uae_u32 opcode);
void comp_fsave_opp (uae_u32 opcode);
void comp_frestore_opp (uae_u32 opcode);
void comp_fpp_opp (uae_u32 opcode, uae_u16 extra);
/* ARAnyM uses fpu_register name, used in scratch_t */
/* FIXME: check that no ARAnyM code assumes different floating point type */
typedef fptype fpu_register;
void jit_abort(const TCHAR *format,...);

View file

@ -8,7 +8,7 @@
* Modified 2005 Peter Keunecke
*/
#include <math.h>
#include <cmath>
#include "sysconfig.h"
#include "sysdeps.h"
@ -18,41 +18,794 @@
#include "custom.h"
#include "newcpu.h"
#include "compemu.h"
#include "flags_arm.h"
#if defined(JIT)
extern void fpp_to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3);
static const int sz1[8] = { 4, 4, 12, 12, 2, 8, 1, 0 };
static const int sz2[8] = { 4, 4, 12, 12, 2, 8, 2, 0 };
/* return the required floating point precision or -1 for failure, 0=E, 1=S, 2=D */
STATIC_INLINE int comp_fp_get (uae_u32 opcode, uae_u16 extra, int treg)
{
int reg = opcode & 7;
int mode = (opcode >> 3) & 7;
int size = (extra >> 10) & 7;
if ((size == 2 && (mode != 7 || reg != 4)) || size == 3 || size == 7) /* 2 = long double, 3 = packed decimal, 7 is not defined */
return -1;
switch (mode) {
case 0: /* Dn */
switch (size) {
case 0: /* Long */
fmov_l_rr (treg, reg);
return 2;
case 1: /* Single */
fmov_s_rr (treg, reg);
return 1;
case 4: /* Word */
fmov_w_rr (treg, reg);
return 1;
case 6: /* Byte */
fmov_b_rr (treg, reg);
return 1;
default:
return -1;
}
case 1: /* An, invalid mode */
return -1;
case 2: /* (An) */
mov_l_rr (S1, reg + 8);
break;
case 3: /* (An)+ */
mov_l_rr (S1, reg + 8);
arm_ADD_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
break;
case 4: /* -(An) */
arm_SUB_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
mov_l_rr (S1, reg + 8);
break;
case 5: /* (d16,An) */
{
uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_rr (S1, reg + 8);
lea_l_brr (S1, S1, off);
break;
}
case 6: /* (d8,An,Xn) or (bd,An,Xn) or ([bd,An,Xn],od) or ([bd,An],Xn,od) */
{
uae_u32 dp = comp_get_iword ((m68k_pc_offset += 2) - 2);
calc_disp_ea_020 (reg + 8, dp, S1, S2);
break;
}
case 7:
switch (reg) {
case 0: /* (xxx).W */
{
uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_ri (S1, off);
break;
}
case 1: /* (xxx).L */
{
uae_u32 off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
mov_l_ri (S1, off);
break;
}
case 2: /* (d16,PC) */
{
uae_u32 address = start_pc + ((uae_char*) comp_pc_p - (uae_char*) start_pc_p) +
m68k_pc_offset;
uae_s32 PC16off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_ri (S1, address + PC16off);
break;
}
case 3: /* (d8,PC,Xn) or (bd,PC,Xn) or ([bd,PC,Xn],od) or ([bd,PC],Xn,od) */
return -1; /* rarely used, fallback to non-JIT */
case 4: /* # < data >; Constants should be converted just once by the JIT */
m68k_pc_offset += sz2[size];
switch (size) {
case 0:
{
uae_s32 li = comp_get_ilong(m68k_pc_offset - 4);
float si = (float)li;
if (li == (int)si) {
//write_log ("converted immediate LONG constant to SINGLE\n");
fmov_s_ri(treg, *(uae_u32 *)&si);
return 1;
}
//write_log ("immediate LONG constant\n");
fmov_l_ri(treg, *(uae_u32 *)&li);
return 2;
}
case 1:
//write_log (_T("immediate SINGLE constant\n"));
fmov_s_ri(treg, comp_get_ilong(m68k_pc_offset - 4));
return 1;
case 2:
{
//write_log (_T("immediate LONG DOUBLE constant\n"));
uae_u32 wrd1, wrd2, wrd3;
fpdata tmp;
wrd3 = comp_get_ilong(m68k_pc_offset - 4);
wrd2 = comp_get_ilong(m68k_pc_offset - 8);
wrd1 = comp_get_iword(m68k_pc_offset - 12) << 16;
fpp_to_exten(&tmp, wrd1, wrd2, wrd3);
mov_l_ri(S1, ((uae_u32*)&tmp)[0]);
mov_l_ri(S2, ((uae_u32*)&tmp)[1]);
fmov_d_rrr (treg, S1, S2);
return 0;
}
case 4:
{
float si = (float)(uae_s16)comp_get_iword(m68k_pc_offset-2);
//write_log (_T("converted immediate WORD constant %f to SINGLE\n"), si);
fmov_s_ri(treg, *(uae_u32 *)&si);
return 1;
}
case 5:
{
//write_log (_T("immediate DOUBLE constant\n"));
mov_l_ri(S1, comp_get_ilong(m68k_pc_offset - 4));
mov_l_ri(S2, comp_get_ilong(m68k_pc_offset - 8));
fmov_d_rrr (treg, S1, S2);
return 2;
}
case 6:
{
float si = (float)(uae_s8)comp_get_ibyte(m68k_pc_offset - 2);
//write_log (_T("converted immediate BYTE constant to SINGLE\n"));
fmov_s_ri(treg, *(uae_u32 *)&si);
return 1;
}
default: /* never reached */
return -1;
}
default: /* never reached */
return -1;
}
}
switch (size) {
case 0: /* Long */
readlong (S1, S2, S3);
fmov_l_rr (treg, S2);
return 2;
case 1: /* Single */
readlong (S1, S2, S3);
fmov_s_rr (treg, S2);
return 1;
case 4: /* Word */
readword (S1, S2, S3);
fmov_w_rr (treg, S2);
return 1;
case 5: /* Double */
readlong (S1, S2, S3);
add_l_ri (S1, 4);
readlong (S1, S4, S3);
fmov_d_rrr (treg, S4, S2);
return 2;
case 6: /* Byte */
readbyte (S1, S2, S3);
fmov_b_rr (treg, S2);
return 1;
default:
return -1;
}
return -1;
}
/* return of -1 means failure, >=0 means OK */
STATIC_INLINE int comp_fp_put (uae_u32 opcode, uae_u16 extra)
{
int reg = opcode & 7;
int sreg = (extra >> 7) & 7;
int mode = (opcode >> 3) & 7;
int size = (extra >> 10) & 7;
if (size == 2 || size == 3 || size == 7) /* 2 = long double, 3 = packed decimal, 7 is not defined */
return -1;
switch (mode) {
case 0: /* Dn */
switch (size) {
case 0: /* FMOVE.L FPx, Dn */
fmov_to_l_rr(reg, sreg);
return 0;
case 1: /* FMOVE.S FPx, Dn */
fmov_to_s_rr(reg, sreg);
return 0;
case 4: /* FMOVE.W FPx, Dn */
fmov_to_w_rr(reg, sreg);
return 0;
case 6: /* FMOVE.B FPx, Dn */
fmov_to_b_rr(reg, sreg);
return 0;
default:
return -1;
}
case 1: /* An, invalid mode */
return -1;
case 2: /* (An) */
mov_l_rr (S1, reg + 8);
break;
case 3: /* (An)+ */
mov_l_rr (S1, reg + 8);
arm_ADD_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
break;
case 4: /* -(An) */
arm_SUB_l_ri8(reg + 8, (reg == 7 ? sz2[size] : sz1[size]));
mov_l_rr (S1, reg + 8);
break;
case 5: /* (d16,An) */
{
uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_rr (S1, reg + 8);
add_l_ri (S1, off);
break;
}
case 6: /* (d8,An,Xn) or (bd,An,Xn) or ([bd,An,Xn],od) or ([bd,An],Xn,od) */
{
uae_u32 dp = comp_get_iword ((m68k_pc_offset += 2) - 2);
calc_disp_ea_020 (reg + 8, dp, S1, S2);
break;
}
case 7:
switch (reg) {
case 0: /* (xxx).W */
{
uae_u32 off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_ri (S1, off);
break;
}
case 1: /* (xxx).L */
{
uae_u32 off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
mov_l_ri (S1, off);
break;
}
default: /* All other modes are not allowed for FPx to <EA> */
write_log (_T ("JIT FMOVE FPx,<EA> Mode is not allowed %04x %04x\n"), opcode, extra);
return -1;
}
}
switch (size) {
case 0: /* Long */
fmov_to_l_rr(S2, sreg);
writelong_clobber (S1, S2, S3);
return 0;
case 1: /* Single */
fmov_to_s_rr(S2, sreg);
writelong_clobber (S1, S2, S3);
return 0;
case 4: /* Word */
fmov_to_w_rr(S2, sreg);
writeword (S1, S2, S3);
return 0;
case 5: /* Double */
fmov_to_d_rrr(S2, S3, sreg);
writelong_clobber (S1, S3, S4);
add_l_ri (S1, 4);
writelong_clobber (S1, S2, S4);
return 0;
case 6: /* Byte */
fmov_to_b_rr(S2, sreg);
writebyte (S1, S2, S3);
return 0;
default:
return -1;
}
return -1;
}
/* return -1 for failure, or register number for success */
STATIC_INLINE int comp_fp_adr (uae_u32 opcode)
{
uae_s32 off;
int mode = (opcode >> 3) & 7;
int reg = opcode & 7;
switch (mode) {
case 2:
case 3:
case 4:
mov_l_rr (S1, 8 + reg);
return S1;
case 5:
off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_rr (S1, 8 + reg);
add_l_ri (S1, off);
return S1;
case 7:
switch (reg) {
case 0:
off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
mov_l_ri (S1, off);
return S1;
case 1:
off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
mov_l_ri (S1, off);
return S1;
}
default:
return -1;
}
}
void comp_fdbcc_opp (uae_u32 opcode, uae_u16 extra)
{
printf("comp_fdbcc_opp not yet implemented\n");
FAIL (1);
return;
}
void comp_fscc_opp (uae_u32 opcode, uae_u16 extra)
{
printf("comp_fscc_opp not yet implemented\n");
//printf("comp_fscc_opp() called (0x%04x, 0x%04x)\n", opcode, extra);
if (!currprefs.compfpu) {
FAIL (1);
return;
}
FAIL (1);
return;
}
void comp_ftrapcc_opp (uae_u32 opcode, uaecptr oldpc)
{
printf("comp_ftrapcc_opp not yet implemented\n");
FAIL (1);
return;
}
void comp_fbcc_opp (uae_u32 opcode)
{
printf("comp_fbcc_opp not yet implemented\n");
uae_u32 start_68k_offset = m68k_pc_offset;
uae_u32 off, v1, v2;
int cc;
if (!currprefs.compfpu) {
FAIL (1);
return;
}
if (opcode & 0x20) { /* only cc from 00 to 1f are defined */
FAIL (1);
return;
}
if (!(opcode & 0x40)) {
off = (uae_s32) (uae_s16) comp_get_iword ((m68k_pc_offset += 2) - 2);
}
else {
off = comp_get_ilong ((m68k_pc_offset += 4) - 4);
}
/* according to fpp.c, the 0x10 bit is ignored
(it handles exception handling, which we don't
do, anyway ;-) */
cc = opcode & 0x0f;
if(cc == 0)
return; /* jump never */
/* Note, "off" will sometimes be (unsigned) "negative", so the following
* uintptr can be > 0xffffffff, but the result will be correct due to
* wraparound when truncated to 32 bit in the call to mov_l_ri. */
mov_l_ri(S1, (uintptr)
(comp_pc_p + off - (m68k_pc_offset - start_68k_offset)));
mov_l_ri(PC_P, (uintptr) comp_pc_p);
/* Now they are both constant. Might as well fold in m68k_pc_offset */
add_l_ri (S1, m68k_pc_offset);
add_l_ri (PC_P, m68k_pc_offset);
m68k_pc_offset = 0;
v1 = get_const (PC_P);
v2 = get_const (S1);
fflags_into_flags ();
switch (cc) {
case 1: register_branch (v1, v2, NATIVE_CC_EQ); break;
case 2: register_branch (v1, v2, NATIVE_CC_F_OGT); break;
case 3: register_branch (v1, v2, NATIVE_CC_F_OGE); break;
case 4: register_branch (v1, v2, NATIVE_CC_F_OLT); break;
case 5: register_branch (v1, v2, NATIVE_CC_F_OLE); break;
case 6: register_branch (v1, v2, NATIVE_CC_F_OGL); break;
case 7: register_branch (v1, v2, NATIVE_CC_F_OR); break;
case 8: register_branch (v1, v2, NATIVE_CC_F_UN); break;
case 9: register_branch (v1, v2, NATIVE_CC_F_UEQ); break;
case 10: register_branch (v1, v2, NATIVE_CC_F_UGT); break;
case 11: register_branch (v1, v2, NATIVE_CC_F_UGE); break;
case 12: register_branch (v1, v2, NATIVE_CC_F_ULT); break;
case 13: register_branch (v1, v2, NATIVE_CC_F_ULE); break;
case 14: register_branch (v1, v2, NATIVE_CC_NE); break;
case 15: register_branch (v2, v2, NATIVE_CC_AL); break;
}
}
void comp_fsave_opp (uae_u32 opcode)
{
printf("comp_fsave_opp not yet implemented\n");
FAIL (1);
return;
}
void comp_frestore_opp (uae_u32 opcode)
{
printf("comp_frestore_opp not yet implemented\n");
FAIL (1);
return;
}
static uae_u32 dhex_pi[] ={0x54442D18, 0x400921FB};
static uae_u32 dhex_exp_1[] ={0x8B145769, 0x4005BF0A};
static uae_u32 dhex_l2_e[] ={0x652B82FE, 0x3FF71547};
static uae_u32 dhex_ln_2[] ={0xFEFA39EF, 0x3FE62E42};
static uae_u32 dhex_ln_10[] ={0xBBB55516, 0x40026BB1};
static uae_u32 dhex_l10_2[] ={0x509F79FF, 0x3FD34413};
static uae_u32 dhex_l10_e[] ={0x1526E50E, 0x3FDBCB7B};
static uae_u32 dhex_1e16[] ={0x37E08000, 0x4341C379};
static uae_u32 dhex_1e32[] ={0xB5056E17, 0x4693B8B5};
static uae_u32 dhex_1e64[] ={0xE93FF9F5, 0x4D384F03};
static uae_u32 dhex_1e128[] ={0xF9301D32, 0x5A827748};
static uae_u32 dhex_1e256[] ={0x7F73BF3C, 0x75154FDD};
static uae_u32 dhex_inf[] ={0x00000000, 0x7ff00000};
static uae_u32 dhex_nan[] ={0xffffffff, 0x7fffffff};
extern double fp_1e8;
void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
{
printf("comp_fpp_opp not yet implemented\n");
int reg;
int sreg, prec = 0;
int dreg = (extra >> 7) & 7;
int source = (extra >> 13) & 7;
int opmode = extra & 0x7f;
if (!currprefs.compfpu) {
FAIL (1);
return;
}
switch (source) {
case 3: /* FMOVE FPx, <EA> */
if (comp_fp_put (opcode, extra) < 0)
FAIL (1);
return;
case 4: /* FMOVE.L <EA>, ControlReg */
if (!(opcode & 0x30)) { /* Dn or An */
if (extra & 0x1000) { /* FPCR */
mov_l_mr (uae_p32(&regs.fpcr), opcode & 15);
return;
}
if (extra & 0x0800) { /* FPSR */
FAIL (1);
return;
// set_fpsr(m68k_dreg (regs, opcode & 15));
}
if (extra & 0x0400) { /* FPIAR */
mov_l_mr (uae_p32(&regs.fpiar), opcode & 15); return;
}
}
else if ((opcode & 0x3f) == 0x3c) {
if (extra & 0x1000) { /* FPCR */
uae_u32 val = comp_get_ilong ((m68k_pc_offset += 4) - 4);
mov_l_mi (uae_p32(&regs.fpcr), val);
return;
}
if (extra & 0x0800) { /* FPSR */
FAIL (1);
return;
}
if (extra & 0x0400) { /* FPIAR */
uae_u32 val = comp_get_ilong ((m68k_pc_offset += 4) - 4);
mov_l_mi (uae_p32(&regs.fpiar), val);
return;
}
}
FAIL (1);
return;
case 5: /* FMOVE.L ControlReg, <EA> */
if (!(opcode & 0x30)) { /* Dn or An */
if (extra & 0x1000) { /* FPCR */
mov_l_rm (opcode & 15, uae_p32(&regs.fpcr)); return;
}
if (extra & 0x0800) { /* FPSR */
FAIL (1);
return;
}
if (extra & 0x0400) { /* FPIAR */
mov_l_rm (opcode & 15, uae_p32(&regs.fpiar)); return;
}
}
FAIL (1);
return;
case 6:
case 7:
FAIL (1);
return;
case 2: /* from <EA> to FPx */
dont_care_fflags ();
if ((extra & 0xfc00) == 0x5c00) { /* FMOVECR */
//write_log (_T("JIT FMOVECR %x\n"), opmode);
switch (opmode) {
case 0x00:
fmov_d_rm (dreg, uae_p32(&dhex_pi));
break;
case 0x0b:
fmov_d_rm (dreg, uae_p32(&dhex_l10_2));
break;
case 0x0c:
fmov_d_rm (dreg, uae_p32(&dhex_exp_1));
break;
case 0x0d:
fmov_d_rm (dreg, uae_p32(&dhex_l2_e));
break;
case 0x0e:
fmov_d_rm (dreg, uae_p32(&dhex_l10_e));
break;
case 0x0f:
fmov_d_ri_0 (dreg);
break;
case 0x30:
fmov_d_rm (dreg, uae_p32(&dhex_ln_2));
break;
case 0x31:
fmov_d_rm (dreg, uae_p32(&dhex_ln_10));
break;
case 0x32:
fmov_d_ri_1 (dreg);
break;
case 0x33:
fmov_d_ri_10 (dreg);
break;
case 0x34:
fmov_d_ri_100 (dreg);
break;
case 0x35:
fmov_l_ri (dreg, 10000);
break;
case 0x36:
fmov_rm (dreg, uae_p32(&fp_1e8));
break;
case 0x37:
fmov_d_rm (dreg, uae_p32(&dhex_1e16));
break;
case 0x38:
fmov_d_rm (dreg, uae_p32(&dhex_1e32));
break;
case 0x39:
fmov_d_rm (dreg, uae_p32(&dhex_1e64));
break;
case 0x3a:
fmov_d_rm (dreg, uae_p32(&dhex_1e128));
break;
case 0x3b:
fmov_d_rm (dreg, uae_p32(&dhex_1e256));
break;
default:
FAIL (1);
return;
}
fmov_rr (FP_RESULT, dreg);
return;
}
if (opmode & 0x20) /* two operands, so we need a scratch reg */
sreg = FS1;
else /* one operand only, thus we can load the argument into dreg */
sreg = dreg;
if ((prec = comp_fp_get (opcode, extra, sreg)) < 0) {
FAIL (1);
return;
}
if (!opmode) { /* FMOVE <EA>,FPx */
fmov_rr (FP_RESULT, dreg);
return;
}
/* no break here for <EA> to dreg */
case 0: /* directly from sreg to dreg */
if (!source) { /* no <EA> */
dont_care_fflags ();
sreg = (extra >> 10) & 7;
}
switch (opmode) {
case 0x00: /* FMOVE */
fmov_rr (dreg, sreg);
break;
case 0x01: /* FINT */
frndint_rr (dreg, sreg);
break;
case 0x02: /* FSINH */
ffunc_rr (sinh, dreg, sreg);
break;
case 0x03: /* FINTRZ */
frndintz_rr (dreg, sreg);
break;
case 0x04: /* FSQRT */
fsqrt_rr (dreg, sreg);
break;
case 0x06: /* FLOGNP1 */
ffunc_rr (log1p, dreg, sreg);
break;
case 0x08: /* FETOXM1 */
ffunc_rr (expm1, dreg, sreg);
break;
case 0x09: /* FTANH */
ffunc_rr (tanh, dreg, sreg);
break;
case 0x0a: /* FATAN */
ffunc_rr (atan, dreg, sreg);
break;
case 0x0c: /* FASIN */
ffunc_rr (asin, dreg, sreg);
break;
case 0x0d: /* FATANH */
ffunc_rr (atanh, dreg, sreg);
break;
case 0x0e: /* FSIN */
ffunc_rr (sin, dreg, sreg);
break;
case 0x0f: /* FTAN */
ffunc_rr (tan, dreg, sreg);
break;
case 0x10: /* FETOX */
ffunc_rr (exp, dreg, sreg);
break;
case 0x11: /* FTWOTOX */
fpowx_rr (2, dreg, sreg);
break;
case 0x12: /* FTENTOX */
fpowx_rr (10, dreg, sreg);
break;
case 0x14: /* FLOGN */
ffunc_rr (log, dreg, sreg);
break;
case 0x15: /* FLOG10 */
ffunc_rr (log10, dreg, sreg);
break;
case 0x16: /* FLOG2 */
ffunc_rr (log2, dreg, sreg);
break;
case 0x18: /* FABS */
fabs_rr (dreg, sreg);
break;
case 0x19: /* FCOSH */
ffunc_rr (cosh, dreg, sreg);
break;
case 0x1a: /* FNEG */
fneg_rr (dreg, sreg);
break;
case 0x1c: /* FACOS */
ffunc_rr (acos, dreg, sreg);
break;
case 0x1d: /* FCOS */
ffunc_rr (cos, dreg, sreg);
break;
case 0x20: /* FDIV */
fdiv_rr (dreg, sreg);
break;
case 0x21: /* FMOD */
fmod_rr (dreg, sreg);
break;
case 0x22: /* FADD */
fadd_rr (dreg, sreg);
break;
case 0x23: /* FMUL */
fmul_rr (dreg, sreg);
break;
case 0x24: /* FSGLDIV */
fsgldiv_rr (dreg, sreg);
break;
case 0x60: /* FSDIV */
fdiv_rr (dreg, sreg);
if (!currprefs.fpu_strict) /* faster, but less strict rounding */
break;
fcuts_r (dreg);
break;
case 0x25: /* FREM */
frem1_rr (dreg, sreg);
break;
case 0x27: /* FSGLMUL */
fsglmul_rr (dreg, sreg);
break;
case 0x63: /* FSMUL */
fmul_rr (dreg, sreg);
if (!currprefs.fpu_strict) /* faster, but less strict rounding */
break;
fcuts_r (dreg);
break;
case 0x28: /* FSUB */
fsub_rr (dreg, sreg);
break;
case 0x30: /* FSINCOS */
case 0x31:
case 0x32:
case 0x33:
case 0x34:
case 0x35:
case 0x36:
case 0x37:
if (dreg == (extra & 7))
ffunc_rr (sin, dreg, sreg);
else
fsincos_rr (dreg, extra & 7, sreg);
break;
case 0x38: /* FCMP */
fmov_rr (FP_RESULT, dreg);
fsub_rr (FP_RESULT, sreg);
return;
case 0x3a: /* FTST */
fmov_rr (FP_RESULT, sreg);
return;
case 0x40: /* FSMOVE */
if (prec == 1 || !currprefs.fpu_strict) {
if (sreg != dreg) /* no <EA> */
fmov_rr (dreg, sreg);
}
else {
fmovs_rr (dreg, sreg);
}
break;
case 0x44: /* FDMOVE */
if (sreg != dreg) /* no <EA> */
fmov_rr (dreg, sreg);
break;
case 0x41: /* FSSQRT */
fsqrt_rr (dreg, sreg);
if (!currprefs.fpu_strict) /* faster, but less strict rounding */
break;
fcuts_r (dreg);
break;
case 0x45: /* FDSQRT */
fsqrt_rr (dreg, sreg);
break;
case 0x58: /* FSABS */
fabs_rr (dreg, sreg);
if (prec != 1 && currprefs.fpu_strict)
fcuts_r (dreg);
break;
case 0x5a: /* FSNEG */
fneg_rr (dreg, sreg);
if (prec != 1 && currprefs.fpu_strict)
fcuts_r (dreg);
break;
case 0x5c: /* FDABS */
fabs_rr (dreg, sreg);
break;
case 0x5e: /* FDNEG */
fneg_rr (dreg, sreg);
break;
case 0x62: /* FSADD */
fadd_rr (dreg, sreg);
if (!currprefs.fpu_strict) /* faster, but less strict rounding */
break;
fcuts_r (dreg);
break;
case 0x64: /* FDDIV */
fdiv_rr (dreg, sreg);
break;
case 0x66: /* FDADD */
fadd_rr (dreg, sreg);
break;
case 0x67: /* FDMUL */
fmul_rr (dreg, sreg);
break;
case 0x68: /* FSSUB */
fsub_rr (dreg, sreg);
if (!currprefs.fpu_strict) /* faster, but less strict rounding */
break;
fcuts_r (dreg);
break;
case 0x6c: /* FDSUB */
fsub_rr (dreg, sreg);
break;
default:
FAIL (1);
return;
}
fmov_rr (FP_RESULT, dreg);
return;
default:
write_log (_T ("Unsupported JIT-FPU instruction: 0x%04x %04x\n"), opcode, extra);
FAIL (1);
return;
}
}
#endif

View file

@ -224,9 +224,6 @@ MIDFUNC(2,mov_l_rr,(W4 d, RR4 s))
live.nat[s].holds[live.nat[s].nholds] = d;
live.nat[s].nholds++;
#if defined(DEBUG) && DEBUG > 1
jit_log("Added %d to nreg %d(%d), now holds %d regs", d, s, live.state[d].realind, live.nat[s].nholds);
#endif
unlock2(s);
}
MENDFUNC(2,mov_l_rr,(W4 d, RR4 s))
@ -244,6 +241,14 @@ MIDFUNC(2,mov_l_mr,(IMM d, RR4 s))
}
MENDFUNC(2,mov_l_mr,(IMM d, RR4 s))
MIDFUNC(2,mov_l_rm,(W4 d, IMM s))
{
d = writereg(d, 4);
raw_mov_l_rm(d, s);
unlock2(d);
}
MENDFUNC(2,mov_l_rm,(W4 d, IMM s))
MIDFUNC(2,mov_l_ri,(W4 d, IMM s))
{
set_const(d, s);
@ -480,3 +485,435 @@ STATIC_INLINE void write_jmp_target(uae_u32* jmpaddr, cpuop_func* a) {
STATIC_INLINE void emit_jmp_target(uae_u32 a) {
emit_long((uae_u32)a);
}
/*************************************************************************
* FPU stuff *
*************************************************************************/
MIDFUNC(1,f_forget_about,(FW r))
{
if (f_isinreg(r))
f_disassociate(r);
live.fate[r].status=UNDEF;
}
MENDFUNC(1,f_forget_about,(FW r))
MIDFUNC(0,dont_care_fflags,(void))
{
f_disassociate(FP_RESULT);
}
MENDFUNC(0,dont_care_fflags,(void))
MIDFUNC(2,fmov_rr,(FW d, FR s))
{
if (d == s) { /* How pointless! */
return;
}
s = f_readreg(s);
d = f_writereg(d);
raw_fmov_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fmov_rr,(FW d, FR s))
MIDFUNC(2,fmov_l_rr,(FW d, RR4 s))
{
s = readreg(s, 4);
d = f_writereg(d);
raw_fmov_l_rr(d, s);
f_unlock(d);
unlock2(s);
}
MENDFUNC(2,fmov_l_rr,(FW d, RR4 s))
MIDFUNC(2,fmov_s_rr,(FW d, RR4 s))
{
s = readreg(s, 4);
d = f_writereg(d);
raw_fmov_s_rr(d, s);
f_unlock(d);
unlock2(s);
}
MENDFUNC(2,fmov_s_rr,(FW d, RR4 s))
MIDFUNC(2,fmov_w_rr,(FW d, RR2 s))
{
s = readreg(s, 2);
d = f_writereg(d);
raw_fmov_w_rr(d, s);
f_unlock(d);
unlock2(s);
}
MENDFUNC(2,fmov_w_rr,(FW d, RR2 s))
MIDFUNC(2,fmov_b_rr,(FW d, RR1 s))
{
s = readreg(s, 1);
d = f_writereg(d);
raw_fmov_b_rr(d, s);
f_unlock(d);
unlock2(s);
}
MENDFUNC(2,fmov_b_rr,(FW d, RR1 s))
MIDFUNC(3,fmov_d_rrr,(FW d, RR4 s1, RR4 s2))
{
s1 = readreg(s1, 4);
s2 = readreg(s2, 4);
d = f_writereg(d);
raw_fmov_d_rrr(d, s1, s2);
f_unlock(d);
unlock2(s2);
unlock2(s1);
}
MENDFUNC(3,fmov_d_rrr,(FW d, RR4 s1, RR4 s2))
MIDFUNC(2,fmov_l_ri,(FW d, IMM i))
{
switch(i) {
case 0:
fmov_d_ri_0(d);
break;
case 1:
fmov_d_ri_1(d);
break;
case 10:
fmov_d_ri_10(d);
break;
case 100:
fmov_d_ri_100(d);
break;
default:
d = f_writereg(d);
compemu_raw_mov_l_ri(REG_WORK1, i);
raw_fmov_l_rr(d, REG_WORK1);
f_unlock(d);
}
}
MENDFUNC(2,fmov_l_ri,(FW d, IMM i))
MIDFUNC(2,fmov_s_ri,(FW d, IMM i))
{
d = f_writereg(d);
compemu_raw_mov_l_ri(REG_WORK1, i);
raw_fmov_s_rr(d, REG_WORK1);
f_unlock(d);
}
MENDFUNC(2,fmov_s_ri,(FW d, IMM i))
MIDFUNC(2,fmov_to_l_rr,(W4 d, FR s))
{
s = f_readreg(s);
d = writereg(d, 4);
raw_fmov_to_l_rr(d, s);
unlock2(d);
f_unlock(s);
}
MENDFUNC(2,fmov_to_l_rr,(W4 d, FR s))
MIDFUNC(2,fmov_to_s_rr,(W4 d, FR s))
{
s = f_readreg(s);
d = writereg(d, 4);
raw_fmov_to_s_rr(d, s);
unlock2(d);
f_unlock(s);
}
MENDFUNC(2,fmov_to_s_rr,(W4 d, FR s))
MIDFUNC(2,fmov_to_w_rr,(W4 d, FR s))
{
s = f_readreg(s);
d = rmw(d, 2, 4);
raw_fmov_to_w_rr(d, s);
unlock2(d);
f_unlock(s);
}
MENDFUNC(2,fmov_to_w_rr,(W4 d, FR s))
MIDFUNC(2,fmov_to_b_rr,(W4 d, FR s))
{
s = f_readreg(s);
d = rmw(d, 1, 4);
raw_fmov_to_b_rr(d, s);
unlock2(d);
f_unlock(s);
}
MENDFUNC(2,fmov_to_b_rr,(W4 d, FR s))
MIDFUNC(1,fmov_d_ri_0,(FW r))
{
r = f_writereg(r);
raw_fmov_d_ri_0(r);
f_unlock(r);
}
MENDFUNC(1,fmov_d_ri_0,(FW r))
MIDFUNC(1,fmov_d_ri_1,(FW r))
{
r = f_writereg(r);
raw_fmov_d_ri_1(r);
f_unlock(r);
}
MENDFUNC(1,fmov_d_ri_1,(FW r))
MIDFUNC(1,fmov_d_ri_10,(FW r))
{
r = f_writereg(r);
raw_fmov_d_ri_10(r);
f_unlock(r);
}
MENDFUNC(1,fmov_d_ri_10,(FW r))
MIDFUNC(1,fmov_d_ri_100,(FW r))
{
r = f_writereg(r);
raw_fmov_d_ri_100(r);
f_unlock(r);
}
MENDFUNC(1,fmov_d_ri_100,(FW r))
MIDFUNC(2,fmov_d_rm,(FW r, MEMR m))
{
r = f_writereg(r);
raw_fmov_d_rm(r, m);
f_unlock(r);
}
MENDFUNC(2,fmov_d_rm,(FW r, MEMR m))
MIDFUNC(2,fmovs_rm,(FW r, MEMR m))
{
r = f_writereg(r);
raw_fmovs_rm(r, m);
f_unlock(r);
}
MENDFUNC(2,fmovs_rm,(FW r, MEMR m))
MIDFUNC(2,fmov_rm,(FW r, MEMR m))
{
r = f_writereg(r);
raw_fmov_d_rm(r, m);
f_unlock(r);
}
MENDFUNC(2,fmov_rm,(FW r, MEMR m))
MIDFUNC(3,fmov_to_d_rrr,(W4 d1, W4 d2, FR s))
{
s = f_readreg(s);
d1 = writereg(d1, 4);
d2 = writereg(d2, 4);
raw_fmov_to_d_rrr(d1, d2, s);
unlock2(d2);
unlock2(d1);
f_unlock(s);
}
MENDFUNC(3,fmov_to_d_rrr,(W4 d1, W4 d2, FR s))
MIDFUNC(2,fsqrt_rr,(FW d, FR s))
{
s = f_readreg(s);
d = f_writereg(d);
raw_fsqrt_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fsqrt_rr,(FW d, FR s))
MIDFUNC(2,fabs_rr,(FW d, FR s))
{
s = f_readreg(s);
d = f_writereg(d);
raw_fabs_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fabs_rr,(FW d, FR s))
MIDFUNC(2,fneg_rr,(FW d, FR s))
{
s = f_readreg(s);
d = f_writereg(d);
raw_fneg_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fneg_rr,(FW d, FR s))
MIDFUNC(2,fdiv_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fdiv_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fdiv_rr,(FRW d, FR s))
MIDFUNC(2,fadd_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fadd_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fadd_rr,(FRW d, FR s))
MIDFUNC(2,fmul_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fmul_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fmul_rr,(FRW d, FR s))
MIDFUNC(2,fsub_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fsub_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fsub_rr,(FRW d, FR s))
MIDFUNC(2,frndint_rr,(FW d, FR s))
{
s = f_readreg(s);
d = f_writereg(d);
raw_frndint_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,frndint_rr,(FW d, FR s))
MIDFUNC(2,frndintz_rr,(FW d, FR s))
{
s = f_readreg(s);
d = f_writereg(d);
raw_frndintz_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,frndintz_rr,(FW d, FR s))
MIDFUNC(2,fmod_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fmod_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fmod_rr,(FRW d, FR s))
MIDFUNC(2,fsgldiv_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fsgldiv_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fsgldiv_rr,(FRW d, FR s))
MIDFUNC(1,fcuts_r,(FRW r))
{
r = f_rmw(r);
raw_fcuts_r(r);
f_unlock(r);
}
MENDFUNC(1,fcuts_r,(FRW r))
MIDFUNC(2,frem1_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_frem1_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,frem1_rr,(FRW d, FR s))
MIDFUNC(2,fsglmul_rr,(FRW d, FR s))
{
s = f_readreg(s);
d = f_rmw(d);
raw_fsglmul_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fsglmul_rr,(FRW d, FR s))
MIDFUNC(2,fmovs_rr,(FW d, FR s))
{
s = f_readreg(s);
d = f_writereg(d);
raw_fmovs_rr(d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(2,fmovs_rr,(FW d, FR s))
MIDFUNC(3,ffunc_rr,(double (*func)(double), FW d, FR s))
{
clobber_flags();
prepare_for_call_1();
prepare_for_call_2();
s = f_readreg(s);
d = f_writereg(d);
raw_ffunc_rr(func, d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(3,ffunc_rr,(double (*func)(double), FW d, FR s))
MIDFUNC(3,fsincos_rr,(FW d, FW c, FR s))
{
clobber_flags();
prepare_for_call_1();
prepare_for_call_2();
s = f_readreg(s); /* s for source */
d = f_writereg(d); /* d for sine */
c = f_writereg(c); /* c for cosine */
raw_ffunc_rr(cos, c, s);
raw_ffunc_rr(sin, d, s);
f_unlock(s);
f_unlock(d);
f_unlock(c);
}
MENDFUNC(3,fsincos_rr,(FW d, FW c, FR s))
MIDFUNC(3,fpowx_rr,(uae_u32 x, FW d, FR s))
{
clobber_flags();
prepare_for_call_1();
prepare_for_call_2();
s = f_readreg(s);
d = f_writereg(d);
raw_fpowx_rr(x, d, s);
f_unlock(s);
f_unlock(d);
}
MENDFUNC(3,fpowx_rr,(uae_u32 x, FW d, FR s))
MIDFUNC(1,fflags_into_flags,())
{
clobber_flags();
fflags_into_flags_internal();
}
MENDFUNC(1,fflags_into_flags,())

View file

@ -50,6 +50,7 @@ DECLARE_MIDFUNC(lea_l_brr_indexed(W4 d, RR4 s, RR4 index, IMM factor, IMM offset
DECLARE_MIDFUNC(lea_l_rr_indexed(W4 d, RR4 s, RR4 index, IMM factor));
DECLARE_MIDFUNC(mov_l_rr(W4 d, RR4 s));
DECLARE_MIDFUNC(mov_l_mr(IMM d, RR4 s));
DECLARE_MIDFUNC(mov_l_rm(W4 d, IMM s));
DECLARE_MIDFUNC(mov_b_rm(W1 d, IMM s));
DECLARE_MIDFUNC(mov_l_ri(W4 d, IMM s));
DECLARE_MIDFUNC(mov_w_ri(W2 d, IMM s));
@ -66,3 +67,44 @@ DECLARE_MIDFUNC(make_flags_live(void));
DECLARE_MIDFUNC(forget_about(W4 r));
DECLARE_MIDFUNC(f_forget_about(FW r));
DECLARE_MIDFUNC(dont_care_fflags(void));
DECLARE_MIDFUNC(fmov_rr(FW d, FR s));
DECLARE_MIDFUNC(fmov_l_rr(FW d, RR4 s));
DECLARE_MIDFUNC(fmov_s_rr(FW d, RR4 s));
DECLARE_MIDFUNC(fmov_w_rr(FW d, RR2 s));
DECLARE_MIDFUNC(fmov_b_rr(FW d, RR1 s));
DECLARE_MIDFUNC(fmov_d_rrr(FW d, RR4 s1, RR4 s2));
DECLARE_MIDFUNC(fmov_l_ri(FW d, IMM i));
DECLARE_MIDFUNC(fmov_s_ri(FW d, IMM i));
DECLARE_MIDFUNC(fmov_to_l_rr(W4 d, FR s));
DECLARE_MIDFUNC(fmov_to_s_rr(W4 d, FR s));
DECLARE_MIDFUNC(fmov_to_w_rr(W4 d, FR s));
DECLARE_MIDFUNC(fmov_to_b_rr(W4 d, FR s));
DECLARE_MIDFUNC(fmov_d_ri_0(FW d));
DECLARE_MIDFUNC(fmov_d_ri_1(FW d));
DECLARE_MIDFUNC(fmov_d_ri_10(FW d));
DECLARE_MIDFUNC(fmov_d_ri_100(FW d));
DECLARE_MIDFUNC(fmov_d_rm(FW r, MEMR m));
DECLARE_MIDFUNC(fmovs_rm(FW r, MEMR m));
DECLARE_MIDFUNC(fmov_rm(FW r, MEMR m));
DECLARE_MIDFUNC(fmov_to_d_rrr(W4 d1, W4 d2, FR s));
DECLARE_MIDFUNC(fsqrt_rr(FW d, FR s));
DECLARE_MIDFUNC(fabs_rr(FW d, FR s));
DECLARE_MIDFUNC(fneg_rr(FW d, FR s));
DECLARE_MIDFUNC(fdiv_rr(FRW d, FR s));
DECLARE_MIDFUNC(fadd_rr(FRW d, FR s));
DECLARE_MIDFUNC(fmul_rr(FRW d, FR s));
DECLARE_MIDFUNC(fsub_rr(FRW d, FR s));
DECLARE_MIDFUNC(frndint_rr(FW d, FR s));
DECLARE_MIDFUNC(frndintz_rr(FW d, FR s));
DECLARE_MIDFUNC(fmod_rr(FRW d, FR s));
DECLARE_MIDFUNC(fsgldiv_rr(FRW d, FR s));
DECLARE_MIDFUNC(fcuts_r(FRW r));
DECLARE_MIDFUNC(frem1_rr(FRW d, FR s));
DECLARE_MIDFUNC(fsglmul_rr(FRW d, FR s));
DECLARE_MIDFUNC(fmovs_rr(FW d, FR s));
DECLARE_MIDFUNC(ffunc_rr(double (*func)(double), FW d, FR s));
DECLARE_MIDFUNC(fsincos_rr(FW d, FW c, FR s));
DECLARE_MIDFUNC(fpowx_rr(uae_u32 x, FW d, FR s));
DECLARE_MIDFUNC(fflags_into_flags());

View file

@ -32,6 +32,8 @@
#define writemem_special writemem
#define readmem_special readmem
#include <math.h>
#include "sysconfig.h"
#include "sysdeps.h"
@ -108,7 +110,11 @@ const int follow_const_jumps = 0;
static uae_u32 cache_size = 0; // Size of total cache allocated for compiled blocks
static uae_u32 current_cache_size = 0; // Cache grows upwards: how much has been consumed already
#ifdef USE_JIT_FPU
#define avoid_fpu (!currprefs.compfpu)
#else
#define avoid_fpu (true)
#endif
static const int align_loops = 0; // Align the start of loops
static const int align_jumps = 0; // Align the start of jumps
static int optcount[10] = {
@ -646,13 +652,15 @@ bool check_prefs_changed_comp(bool checkonly)
{
bool changed = 0;
if (currprefs.fpu_strict != changed_prefs.fpu_strict ||
if (currprefs.compfpu != changed_prefs.compfpu ||
currprefs.fpu_strict != changed_prefs.fpu_strict ||
currprefs.cachesize != changed_prefs.cachesize)
changed = 1;
if (checkonly)
return changed;
currprefs.compfpu = changed_prefs.compfpu;
currprefs.fpu_strict = changed_prefs.fpu_strict;
if (currprefs.cachesize != changed_prefs.cachesize) {
@ -955,6 +963,7 @@ static void evict(int r)
if (live.nat[rr].nholds != live.state[r].realind) { /* Was not last */
int topreg = live.nat[rr].holds[live.nat[rr].nholds];
int thisind = live.state[r].realind;
live.nat[rr].holds[thisind] = topreg;
live.state[topreg].realind = thisind;
}
@ -1343,6 +1352,142 @@ static int rmw(int r, int wsize, int rsize)
return rmw_general(r, wsize, rsize);
}
/********************************************************************
* FPU register status handling. EMIT TIME! *
********************************************************************/
STATIC_INLINE void f_tomem_drop(int r)
{
if (live.fate[r].status == DIRTY) {
compemu_raw_fmov_mr_drop((uintptr)live.fate[r].mem, live.fate[r].realreg);
live.fate[r].status = INMEM;
}
}
STATIC_INLINE int f_isinreg(int r)
{
return live.fate[r].status == CLEAN || live.fate[r].status == DIRTY;
}
STATIC_INLINE void f_evict(int r)
{
int rr;
if (!f_isinreg(r))
return;
rr = live.fate[r].realreg;
f_tomem_drop(r);
live.fat[rr].nholds = 0;
live.fate[r].status = INMEM;
live.fate[r].realreg = -1;
}
STATIC_INLINE void f_free_nreg(int r)
{
int vr;
vr = live.fat[r].holds;
f_evict(vr);
}
/* Use with care! */
STATIC_INLINE void f_isclean(int r)
{
if (!f_isinreg(r))
return;
live.fate[r].status = CLEAN;
}
STATIC_INLINE void f_disassociate(int r)
{
f_isclean(r);
f_evict(r);
}
static int f_alloc_reg(int r, int willclobber)
{
int bestreg;
if(r < 8)
bestreg = r + 8; // map real Amiga reg to ARM VFP reg 8-15
else
bestreg = r - 8; // map FP_RESULT, FS1, FS2 or FS3 to ARM VFP reg 0-3
if (!willclobber) {
if (live.fate[r].status == INMEM) {
compemu_raw_fmov_rm(bestreg, (uintptr)live.fate[r].mem);
live.fate[r].status=CLEAN;
}
}
else {
live.fate[r].status = DIRTY;
}
live.fate[r].realreg=bestreg;
live.fat[bestreg].holds = r;
live.fat[bestreg].nholds = 1;
return bestreg;
}
STATIC_INLINE void f_unlock(int r)
{
}
STATIC_INLINE int f_readreg(int r)
{
int answer=-1;
if (f_isinreg(r)) {
answer = live.fate[r].realreg;
}
/* either the value was in memory to start with, or it was evicted and
is in memory now */
if (answer < 0)
answer = f_alloc_reg(r,0);
return answer;
}
STATIC_INLINE int f_writereg(int r)
{
int answer = -1;
if (f_isinreg(r)) {
answer = live.fate[r].realreg;
}
if (answer < 0) {
answer = f_alloc_reg(r,1);
}
live.fate[r].status = DIRTY;
return answer;
}
STATIC_INLINE int f_rmw(int r)
{
int n;
if (f_isinreg(r)) {
n = live.fate[r].realreg;
}
else
n = f_alloc_reg(r,0);
live.fate[r].status = DIRTY;
return n;
}
static void fflags_into_flags_internal(void)
{
int r;
r = f_readreg(FP_RESULT);
raw_fflags_into_flags(r);
f_unlock(r);
live_flags();
}
#if defined(CPU_arm)
@ -1379,6 +1524,7 @@ void sync_m68k_pc(void)
struct scratch_t {
uae_u32 regs[VREGS];
fpu_register fregs[VFREGS];
};
static scratch_t scratch;
@ -1479,6 +1625,12 @@ void init_comp(void)
set_status(i, UNDEF);
}
for (i=0;i<VFREGS;i++) {
live.fate[i].status = UNDEF;
live.fate[i].realreg = -1;
live.fate[i].needflush = NF_SCRATCH;
}
for (i=0; i<VREGS; i++) {
if (i < 16) { /* First 16 registers map to 68k registers */
live.state[i].mem = &regs.regs[i];
@ -1502,6 +1654,22 @@ void init_comp(void)
set_status(NEXT_HANDLER, UNDEF);
for (i = 0; i < VFREGS; i++) {
if (i < 8) { /* First 8 registers map to 68k FPU registers */
live.fate[i].mem = (uae_u32*)(&regs.fp[i].fp);
live.fate[i].needflush = NF_TOMEM;
live.fate[i].status = INMEM;
}
else if (i == FP_RESULT) {
live.fate[i].mem = (uae_u32*)(&regs.fp_result.fp);
live.fate[i].needflush = NF_TOMEM;
live.fate[i].status = INMEM;
}
else
live.fate[i].mem = (uae_u32*)(&scratch.fregs[i]);
}
for (i=0; i<N_REGS; i++) {
live.nat[i].touched = 0;
live.nat[i].nholds = 0;
@ -1512,6 +1680,10 @@ void init_comp(void)
}
}
for (i=0;i<N_FREGS;i++) {
live.fat[i].nholds = 0;
}
touchcnt = 1;
m68k_pc_offset = 0;
live.flags_in_flags = TRASH;
@ -1528,6 +1700,12 @@ void flush(int save_regs)
sync_m68k_pc(); /* mid level */
if (save_regs) {
for (i = 0; i < VFREGS; i++) {
if (live.fate[i].needflush == NF_SCRATCH ||
live.fate[i].status == CLEAN) {
f_disassociate(i);
}
}
for (i=0; i<=FLAGTMP; i++) {
switch(live.state[i].status) {
case INMEM:
@ -1548,6 +1726,11 @@ void flush(int save_regs)
break;
}
}
for (i = 0; i <= FP_RESULT; i++) {
if (live.fate[i].status == DIRTY) {
f_evict(i);
}
}
}
}
@ -1565,6 +1748,9 @@ void freescratch(void)
for (i = S1; i < VREGS; i++)
forget_about(i);
for (i = FS1; i <= FS3; i++) // only FS1-FS3
f_forget_about(i);
}
/********************************************************************
@ -1598,6 +1784,9 @@ static void flush_all(void)
tomem(i);
}
}
for (i = FP_RESULT; i <= FS3; i++) // only FP_RESULT and FS1-FS3, FP0-FP7 are call save
if (f_isinreg(i))
f_evict(i);
}
/* Make sure all registers that will get clobbered by a call are
@ -1619,6 +1808,10 @@ static void prepare_for_call_2(void)
free_nreg(i);
}
for (i = 0; i < 4; i++) // only FP_RESULT and FS1-FS3, FP0-FP7 are call save
if (live.fat[i].nholds > 0)
f_free_nreg(i);
live.flags_in_flags = TRASH; /* Note: We assume we already rescued the
flags at the very start of the call_r
functions! */
@ -2038,7 +2231,6 @@ STATIC_INLINE int block_check_checksum(blockinfo* bi)
means we have to move it into the needs-to-be-flushed list */
bi->handler_to_use = bi->handler;
set_dhtu(bi, bi->direct_handler);
bi->status = BI_CHECKING;
isgood = called_check_checksum(bi) != 0;
}
@ -2694,7 +2886,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
if (next_pc_p) { /* A branch was registered */
uintptr t1 = next_pc_p;
uintptr t2 = taken_pc_p;
int cc = branch_cc;
int cc = branch_cc; // this is native (ARM) condition code
uae_u32* branchadd;
uae_u32* tba;
@ -2707,7 +2899,10 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
the 68k branch is taken. */
t1 = taken_pc_p;
t2 = next_pc_p;
cc = branch_cc^1;
if(cc < NATIVE_CC_AL)
cc = branch_cc^1;
else if(cc > NATIVE_CC_AL)
cc = 0x10 | (branch_cc ^ 0xf);
}
tmp = live; /* ouch! This is big... */

View file

@ -1803,32 +1803,32 @@ extern const struct comptbl op_smalltbl_0_comp_ff[] = {
{ NULL, 0x00000001, 61488 }, /* MMUOP030 */
{ NULL, 0x00000001, 61496 }, /* MMUOP030 */
{ NULL, 0x00000001, 61497 }, /* MMUOP030 */
{ NULL, 0x00000022, 61952 }, /* FPP */
{ NULL, 0x00000022, 61960 }, /* FPP */
{ NULL, 0x00000022, 61968 }, /* FPP */
{ NULL, 0x00000022, 61976 }, /* FPP */
{ NULL, 0x00000022, 61984 }, /* FPP */
{ NULL, 0x00000022, 61992 }, /* FPP */
{ NULL, 0x00000022, 62000 }, /* FPP */
{ NULL, 0x00000022, 62008 }, /* FPP */
{ NULL, 0x00000022, 62009 }, /* FPP */
{ NULL, 0x00000022, 62010 }, /* FPP */
{ NULL, 0x00000022, 62011 }, /* FPP */
{ NULL, 0x00000022, 62012 }, /* FPP */
{ NULL, 0x00000006, 62016 }, /* FScc */
{ op_f200_0_comp_ff, 0x00000022, 61952 }, /* FPP */
{ op_f208_0_comp_ff, 0x00000022, 61960 }, /* FPP */
{ op_f210_0_comp_ff, 0x00000022, 61968 }, /* FPP */
{ op_f218_0_comp_ff, 0x00000022, 61976 }, /* FPP */
{ op_f220_0_comp_ff, 0x00000022, 61984 }, /* FPP */
{ op_f228_0_comp_ff, 0x00000022, 61992 }, /* FPP */
{ op_f230_0_comp_ff, 0x00000022, 62000 }, /* FPP */
{ op_f238_0_comp_ff, 0x00000022, 62008 }, /* FPP */
{ op_f239_0_comp_ff, 0x00000022, 62009 }, /* FPP */
{ op_f23a_0_comp_ff, 0x00000022, 62010 }, /* FPP */
{ op_f23b_0_comp_ff, 0x00000022, 62011 }, /* FPP */
{ op_f23c_0_comp_ff, 0x00000022, 62012 }, /* FPP */
{ op_f240_0_comp_ff, 0x00000006, 62016 }, /* FScc */
{ NULL, 0x00000021, 62024 }, /* FDBcc */
{ NULL, 0x00000006, 62032 }, /* FScc */
{ NULL, 0x00000006, 62040 }, /* FScc */
{ NULL, 0x00000006, 62048 }, /* FScc */
{ NULL, 0x00000006, 62056 }, /* FScc */
{ NULL, 0x00000006, 62064 }, /* FScc */
{ NULL, 0x00000006, 62072 }, /* FScc */
{ NULL, 0x00000006, 62073 }, /* FScc */
{ op_f250_0_comp_ff, 0x00000006, 62032 }, /* FScc */
{ op_f258_0_comp_ff, 0x00000006, 62040 }, /* FScc */
{ op_f260_0_comp_ff, 0x00000006, 62048 }, /* FScc */
{ op_f268_0_comp_ff, 0x00000006, 62056 }, /* FScc */
{ op_f270_0_comp_ff, 0x00000006, 62064 }, /* FScc */
{ op_f278_0_comp_ff, 0x00000006, 62072 }, /* FScc */
{ op_f279_0_comp_ff, 0x00000006, 62073 }, /* FScc */
{ NULL, 0x00000021, 62074 }, /* FTRAPcc */
{ NULL, 0x00000021, 62075 }, /* FTRAPcc */
{ NULL, 0x00000021, 62076 }, /* FTRAPcc */
{ NULL, 0x00000005, 62080 }, /* FBcc */
{ NULL, 0x00000005, 62144 }, /* FBcc */
{ op_f280_0_comp_ff, 0x00000005, 62080 }, /* FBcc */
{ op_f2c0_0_comp_ff, 0x00000005, 62144 }, /* FBcc */
{ NULL, 0x00000020, 62224 }, /* FSAVE */
{ NULL, 0x00000020, 62240 }, /* FSAVE */
{ NULL, 0x00000020, 62248 }, /* FSAVE */
@ -3675,32 +3675,32 @@ extern const struct comptbl op_smalltbl_0_comp_nf[] = {
{ NULL, 0x00000001, 61488 }, /* MMUOP030 */
{ NULL, 0x00000001, 61496 }, /* MMUOP030 */
{ NULL, 0x00000001, 61497 }, /* MMUOP030 */
{ NULL, 0x00000022, 61952 }, /* FPP */
{ NULL, 0x00000022, 61960 }, /* FPP */
{ NULL, 0x00000022, 61968 }, /* FPP */
{ NULL, 0x00000022, 61976 }, /* FPP */
{ NULL, 0x00000022, 61984 }, /* FPP */
{ NULL, 0x00000022, 61992 }, /* FPP */
{ NULL, 0x00000022, 62000 }, /* FPP */
{ NULL, 0x00000022, 62008 }, /* FPP */
{ NULL, 0x00000022, 62009 }, /* FPP */
{ NULL, 0x00000022, 62010 }, /* FPP */
{ NULL, 0x00000022, 62011 }, /* FPP */
{ NULL, 0x00000022, 62012 }, /* FPP */
{ NULL, 0x00000006, 62016 }, /* FScc */
{ op_f200_0_comp_nf, 0x00000022, 61952 }, /* FPP */
{ op_f208_0_comp_nf, 0x00000022, 61960 }, /* FPP */
{ op_f210_0_comp_nf, 0x00000022, 61968 }, /* FPP */
{ op_f218_0_comp_nf, 0x00000022, 61976 }, /* FPP */
{ op_f220_0_comp_nf, 0x00000022, 61984 }, /* FPP */
{ op_f228_0_comp_nf, 0x00000022, 61992 }, /* FPP */
{ op_f230_0_comp_nf, 0x00000022, 62000 }, /* FPP */
{ op_f238_0_comp_nf, 0x00000022, 62008 }, /* FPP */
{ op_f239_0_comp_nf, 0x00000022, 62009 }, /* FPP */
{ op_f23a_0_comp_nf, 0x00000022, 62010 }, /* FPP */
{ op_f23b_0_comp_nf, 0x00000022, 62011 }, /* FPP */
{ op_f23c_0_comp_nf, 0x00000022, 62012 }, /* FPP */
{ op_f240_0_comp_nf, 0x00000006, 62016 }, /* FScc */
{ NULL, 0x00000021, 62024 }, /* FDBcc */
{ NULL, 0x00000006, 62032 }, /* FScc */
{ NULL, 0x00000006, 62040 }, /* FScc */
{ NULL, 0x00000006, 62048 }, /* FScc */
{ NULL, 0x00000006, 62056 }, /* FScc */
{ NULL, 0x00000006, 62064 }, /* FScc */
{ NULL, 0x00000006, 62072 }, /* FScc */
{ NULL, 0x00000006, 62073 }, /* FScc */
{ op_f250_0_comp_nf, 0x00000006, 62032 }, /* FScc */
{ op_f258_0_comp_nf, 0x00000006, 62040 }, /* FScc */
{ op_f260_0_comp_nf, 0x00000006, 62048 }, /* FScc */
{ op_f268_0_comp_nf, 0x00000006, 62056 }, /* FScc */
{ op_f270_0_comp_nf, 0x00000006, 62064 }, /* FScc */
{ op_f278_0_comp_nf, 0x00000006, 62072 }, /* FScc */
{ op_f279_0_comp_nf, 0x00000006, 62073 }, /* FScc */
{ NULL, 0x00000021, 62074 }, /* FTRAPcc */
{ NULL, 0x00000021, 62075 }, /* FTRAPcc */
{ NULL, 0x00000021, 62076 }, /* FTRAPcc */
{ NULL, 0x00000005, 62080 }, /* FBcc */
{ NULL, 0x00000005, 62144 }, /* FBcc */
{ op_f280_0_comp_nf, 0x00000005, 62080 }, /* FBcc */
{ op_f2c0_0_comp_nf, 0x00000005, 62144 }, /* FBcc */
{ NULL, 0x00000020, 62224 }, /* FSAVE */
{ NULL, 0x00000020, 62240 }, /* FSAVE */
{ NULL, 0x00000020, 62248 }, /* FSAVE */

View file

@ -1446,6 +1446,28 @@ extern compop_func op_e7e8_0_comp_ff;
extern compop_func op_e7f0_0_comp_ff;
extern compop_func op_e7f8_0_comp_ff;
extern compop_func op_e7f9_0_comp_ff;
extern compop_func op_f200_0_comp_ff;
extern compop_func op_f208_0_comp_ff;
extern compop_func op_f210_0_comp_ff;
extern compop_func op_f218_0_comp_ff;
extern compop_func op_f220_0_comp_ff;
extern compop_func op_f228_0_comp_ff;
extern compop_func op_f230_0_comp_ff;
extern compop_func op_f238_0_comp_ff;
extern compop_func op_f239_0_comp_ff;
extern compop_func op_f23a_0_comp_ff;
extern compop_func op_f23b_0_comp_ff;
extern compop_func op_f23c_0_comp_ff;
extern compop_func op_f240_0_comp_ff;
extern compop_func op_f250_0_comp_ff;
extern compop_func op_f258_0_comp_ff;
extern compop_func op_f260_0_comp_ff;
extern compop_func op_f268_0_comp_ff;
extern compop_func op_f270_0_comp_ff;
extern compop_func op_f278_0_comp_ff;
extern compop_func op_f279_0_comp_ff;
extern compop_func op_f280_0_comp_ff;
extern compop_func op_f2c0_0_comp_ff;
extern compop_func op_f600_0_comp_ff;
extern compop_func op_f608_0_comp_ff;
extern compop_func op_f610_0_comp_ff;
@ -2893,6 +2915,28 @@ extern compop_func op_e7e8_0_comp_nf;
extern compop_func op_e7f0_0_comp_nf;
extern compop_func op_e7f8_0_comp_nf;
extern compop_func op_e7f9_0_comp_nf;
extern compop_func op_f200_0_comp_nf;
extern compop_func op_f208_0_comp_nf;
extern compop_func op_f210_0_comp_nf;
extern compop_func op_f218_0_comp_nf;
extern compop_func op_f220_0_comp_nf;
extern compop_func op_f228_0_comp_nf;
extern compop_func op_f230_0_comp_nf;
extern compop_func op_f238_0_comp_nf;
extern compop_func op_f239_0_comp_nf;
extern compop_func op_f23a_0_comp_nf;
extern compop_func op_f23b_0_comp_nf;
extern compop_func op_f23c_0_comp_nf;
extern compop_func op_f240_0_comp_nf;
extern compop_func op_f250_0_comp_nf;
extern compop_func op_f258_0_comp_nf;
extern compop_func op_f260_0_comp_nf;
extern compop_func op_f268_0_comp_nf;
extern compop_func op_f270_0_comp_nf;
extern compop_func op_f278_0_comp_nf;
extern compop_func op_f279_0_comp_nf;
extern compop_func op_f280_0_comp_nf;
extern compop_func op_f2c0_0_comp_nf;
extern compop_func op_f600_0_comp_nf;
extern compop_func op_f608_0_comp_nf;
extern compop_func op_f610_0_comp_nf;

View file

@ -1,52 +0,0 @@
/*
* compiler/flags_arm.h - Native flags definitions for ARM
*
* Copyright (c) 2013 Jens Heitmann of ARAnyM dev team (see AUTHORS)
*
* Inspired by Christian Bauer's Basilisk II
*
* Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
*
* Adaptation for Basilisk II and improvements, copyright 2000-2002
* Gwenole Beauchesne
*
* Basilisk II (C) 1997-2002 Christian Bauer
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef NATIVE_FLAGS_ARM_H
#define NATIVE_FLAGS_ARM_H
/* Native integer code conditions */
enum {
NATIVE_CC_EQ = 0,
NATIVE_CC_NE = 1,
NATIVE_CC_CS = 2,
NATIVE_CC_CC = 3,
NATIVE_CC_MI = 4,
NATIVE_CC_PL = 5,
NATIVE_CC_VS = 6,
NATIVE_CC_VC = 7,
NATIVE_CC_HI = 8,
NATIVE_CC_LS = 9,
NATIVE_CC_GE = 10,
NATIVE_CC_LT = 11,
NATIVE_CC_GT = 12,
NATIVE_CC_LE = 13,
NATIVE_CC_AL = 14
};
#endif /* NATIVE_FLAGS_ARM_H */

View file

@ -7,9 +7,6 @@
* Adaptation for ARAnyM/ARM, copyright 2001-2015
* Milan Jurik, Jens Heitmann
*
* Adaptation for Basilisk II and improvements, copyright 2000-2005
* Gwenole Beauchesne
*
* Basilisk II (C) 1997-2005 Christian Bauer
*
* This program is free software; you can redistribute it and/or modify
@ -121,13 +118,14 @@
#define DISABLE_I_ROXLW
#define DISABLE_I_ROXRW
//#define DISABLE_I_MULL
#define DISABLE_I_FPP
#define DISABLE_I_FBCC
#define DISABLE_I_FSCC
//#define DISABLE_I_FPP
//#define DISABLE_I_FBCC
//#define DISABLE_I_FSCC
//#define DISABLE_I_MOVE16
#define DISABLE_I_DIVU // DIVU works, but we have to think about exceptions. No big performance enhancement.
#define RETURN "return 0;"
#define BOOL_TYPE "int"
@ -1222,9 +1220,6 @@ static void gen_dbcc(uae_u32 opcode, struct instr *curi, char* ssize) {
comprintf("\tarm_ADD_l_ri(PC_P, m68k_pc_offset);\n");
comprintf("\tm68k_pc_offset=0;\n");
start_brace();
comprintf("\tint nsrc = scratchie++;\n");
if (curi->cc >= 2) {
comprintf("\tmake_flags_live();\n"); /* Load the flags */
}
@ -1262,7 +1257,6 @@ static void gen_dbcc(uae_u32 opcode, struct instr *curi, char* ssize) {
break;
default: abort();
}
genastore("src", curi->smode, "srcreg", curi->size, "src");
gen_update_next_handler();
}
@ -2071,7 +2065,6 @@ gen_opcode(unsigned long int opcode) {
case i_SBCD:
failure;
/* I don't think so! */
break;
case i_ADD:
@ -2097,7 +2090,6 @@ gen_opcode(unsigned long int opcode) {
case i_ABCD:
failure;
/* No BCD maths for me.... */
break;
case i_NEG:
@ -2116,7 +2108,6 @@ gen_opcode(unsigned long int opcode) {
case i_NBCD:
failure;
/* Nope! */
break;
case i_CLR:
@ -2362,7 +2353,8 @@ gen_opcode(unsigned long int opcode) {
isjump;
genamode(curi->smode, "srcreg", curi->size, "src", 0, 0);
start_brace();
comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
comprintf(
"\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
comprintf("\tint ret=scratchie++;\n"
"\tmov_l_ri(ret,retadd);\n"
"\tsub_l_ri(15,4);\n"
@ -2391,10 +2383,12 @@ gen_opcode(unsigned long int opcode) {
#ifdef DISABLE_I_BSR
failure;
#endif
is_const_jump;
is_const_jump
;
genamode(curi->smode, "srcreg", curi->size, "src", 1, 0);
start_brace();
comprintf("\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
comprintf(
"\tuae_u32 retadd=start_pc+((char *)comp_pc_p-(char *)start_pc_p)+m68k_pc_offset;\n");
comprintf("\tint ret=scratchie++;\n"
"\tmov_l_ri(ret,retadd);\n"
"\tsub_l_ri(15,4);\n"
@ -2427,9 +2421,10 @@ gen_opcode(unsigned long int opcode) {
comprintf("\tv2 = get_const(src);\n");
comprintf("\tregister_branch(v1, v2, %d);\n", cond_codes[curi->cc]);
comprintf("\tmake_flags_live();\n"); /* Load the flags */
isjump;
isjump;
} else {
is_const_jump;
is_const_jump
;
}
switch (curi->cc) {
@ -3124,11 +3119,16 @@ generate_one_opcode(int rp, int noflags)
fprintf(stblfile, "{ NULL, 0x%08x, %ld }, /* %s */\n", flags, opcode, name);
com_discard();
} else {
const char *tbl = noflags ? "nf" : "ff";
printf ("/* %s */\n", outopcode (opcode));
fprintf(stblfile, "{ op_%lx_%d_comp_%s, 0x%08x, %ld }, /* %s */\n", opcode, postfix, tbl, flags, opcode, name);
fprintf(headerfile, "extern compop_func op_%lx_%d_comp_%s;\n", opcode, postfix, tbl);
printf("uae_u32 REGPARAM2 op_%lx_%d_comp_%s(uae_u32 opcode)\n{\n", opcode, postfix, tbl);
if (noflags) {
fprintf(stblfile, "{ op_%lx_%d_comp_nf, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, name);
fprintf(headerfile, "extern compop_func op_%lx_%d_comp_nf;\n", opcode, postfix);
printf("uae_u32 REGPARAM2 op_%lx_%d_comp_nf(uae_u32 opcode)\n{\n", opcode, postfix);
} else {
fprintf(stblfile, "{ op_%lx_%d_comp_ff, 0x%08x, %ld }, /* %s */\n", opcode, postfix, flags, opcode, name);
fprintf(headerfile, "extern compop_func op_%lx_%d_comp_ff;\n", opcode, postfix);
printf("uae_u32 REGPARAM2 op_%lx_%d_comp_ff(uae_u32 opcode)\n{\n", opcode, postfix);
}
com_flush();
}
}