Resync with TomB's sources, regarding ARMv6T2 improvements

This commit is contained in:
Dimitris Panokostas 2018-03-03 19:01:28 +01:00
parent 6571dc247d
commit 9b64073b68
21 changed files with 327 additions and 274 deletions

View file

@ -471,8 +471,8 @@ LOWFUNC(WRITE,NONE,2,compemu_raw_MERGE8_rr,(RW4 d, RR4 s))
UBFX_rrii(REG_WORK1, s, 8, 24);
BFI_rrii(d, REG_WORK1, 8, 31);
#else
AND_rri(REG_WORK1, s, 0xffffff00);
BIC_rri(d, d, 0xffffff00);
BIC_rri(REG_WORK1, s, 0xff);
AND_rri(d, d, 0xff);
ORR_rrr(d, d, REG_WORK1);
#endif
}
@ -889,11 +889,17 @@ STATIC_INLINE void compemu_raw_jcc_l_oponly(int cc)
STATIC_INLINE void compemu_raw_handle_except(IMM cycles)
{
uae_u32* branchadd;
int offs;
clobber_flags();
#ifdef ARMV6T2
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
#else
offs = data_long_offs((uae_u32)(&jit_exception));
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
#endif
LDR_rR(REG_WORK1, REG_WORK2);
TST_rr(REG_WORK1, REG_WORK1);
@ -901,7 +907,7 @@ STATIC_INLINE void compemu_raw_handle_except(IMM cycles)
BEQ_i(0); // no exception, jump to next instruction
// countdown -= scaled_cycles(totcycles);
uae_s32 offs = (uae_u32)&countdown - (uae_u32)&regs;
offs = (uae_u32)&countdown - (uae_u32)&regs;
LDR_rRI(REG_WORK1, R_REGSTRUCT, offs);
if(CHECK32(cycles)) {
SUBS_rri(REG_WORK1, REG_WORK1, cycles);
@ -928,15 +934,10 @@ STATIC_INLINE void compemu_raw_handle_except(IMM cycles)
STATIC_INLINE void compemu_raw_maybe_recompile(uae_u32 t)
{
#ifdef ARMV6T2
BGE_i(2);
raw_pop_preserved_regs();
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
emit_long(t);
#else
uae_s32 offs = data_long_offs(t);
CC_LDR_rRI(NATIVE_CC_LT, RPC_INDEX, RPC_INDEX, offs);
#endif
}
STATIC_INLINE void compemu_raw_jmp(uae_u32 t)
@ -969,15 +970,10 @@ STATIC_INLINE void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
STATIC_INLINE void compemu_raw_maybe_cachemiss(uae_u32 t)
{
#ifdef ARMV6T2
BEQ_i(2);
raw_pop_preserved_regs();
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
emit_long(t);
#else
uae_s32 offs = data_long_offs(t);
CC_LDR_rRI(NATIVE_CC_NE, RPC_INDEX, RPC_INDEX, offs);
#endif
}
STATIC_INLINE void compemu_raw_jz_b_oponly(void)
@ -1101,6 +1097,8 @@ LENDFUNC(NONE,READ,2,compemu_raw_tag_pc,(W4 d, MEMR s))
* FPU stuff *
*************************************************************************/
#ifdef USE_JIT_FPU
LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
{
VMOV64_dd(d, s);
@ -1112,13 +1110,8 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s))
if(mem >= (uae_u32) &regs && mem < (uae_u32) &regs + 1020 && ((mem - (uae_u32) &regs) & 0x3) == 0) {
VSTR64_dRi(s, R_REGSTRUCT, (mem - (uae_u32) &regs));
} else {
#ifdef ARMV6T2
MOVW_ri16(REG_WORK1, mem);
MOVT_ri16(REG_WORK1, mem >> 16);
#else
auto offs = data_long_offs(mem);
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
#endif
VSTR64_dRi(s, REG_WORK1, 0);
}
}
@ -1130,13 +1123,8 @@ LOWFUNC(NONE,READ,2,compemu_raw_fmov_rm,(FW d, MEMR mem))
if(mem >= (uae_u32) &regs && mem < (uae_u32) &regs + 1020 && ((mem - (uae_u32) &regs) & 0x3) == 0) {
VLDR64_dRi(d, R_REGSTRUCT, (mem - (uae_u32) &regs));
} else {
#ifdef ARMV6T2
MOVW_ri16(REG_WORK1, mem);
MOVT_ri16(REG_WORK1, mem >> 16);
#else
auto offs = data_long_offs(mem);
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
#endif
VLDR64_dRi(d, REG_WORK1, 0);
}
}
@ -1235,26 +1223,16 @@ LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r))
LOWFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m))
{
#ifdef ARMV6T2
MOVW_ri16(REG_WORK1, m);
MOVT_ri16(REG_WORK1, m >> 16);
#else
auto offs = data_long_offs(m);
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
#endif
VLDR64_dRi(r, REG_WORK1, 0);
}
LENDFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m))
LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
{
#ifdef ARMV6T2
MOVW_ri16(REG_WORK1, m);
MOVT_ri16(REG_WORK1, m >> 16);
#else
auto offs = data_long_offs(m);
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
#endif
MOVW_ri16(REG_WORK1, m);
MOVT_ri16(REG_WORK1, m >> 16);
VLDR32_sRi(SCRATCH_F32_1, REG_WORK1, 0);
VCVT32to64_ds(r, SCRATCH_F32_1);
}
@ -1386,13 +1364,10 @@ LENDFUNC(NONE,NONE,2,raw_fmovs_rr,(FW d, FR s))
LOWFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s))
{
VMOV64_dd(0, s);
#ifdef ARMV6T2
MOVW_ri16(REG_WORK1, (uae_u32)func);
MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16);
#else
auto offs = data_long_offs(uae_u32(func));
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
#endif
PUSH(RLR_INDEX);
BLX_r(REG_WORK1);
POP(RLR_INDEX);
@ -1413,13 +1388,8 @@ LOWFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s))
VMOV64_dd(1, s);
#ifdef ARMV6T2
MOVW_ri16(REG_WORK1, (uae_u32)func);
MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16);
#else
auto offs = data_long_offs(uae_u32(func));
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
#endif
MOVW_ri16(REG_WORK1, (uae_u32)func);
MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16);
PUSH(RLR_INDEX);
BLX_r(REG_WORK1);
@ -1436,12 +1406,20 @@ LOWFUNC(NONE,WRITE,2,raw_fp_from_exten_mr,(RR4 adr, FR s))
VMOVi_to_ARM_rd(REG_WORK1, s, 1); // get high part of double
VCMP64_d0(s);
VMRS_CPSR();
#ifdef ARMV6T2
BEQ_i(20); // iszero
#else
BEQ_i(21);
#endif
UBFX_rrii(REG_WORK2, REG_WORK1, 20, 11); // get exponent
MOVW_ri16(REG_WORK3, 2047);
CMP_rr(REG_WORK2, REG_WORK3);
#ifdef ARMV6T2
BEQ_i(13); // isnan
#else
BEQ_i(14);
#endif
MOVW_ri16(REG_WORK3, 15360); // diff of bias between double and long double
ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK3); // exponent done
@ -1454,12 +1432,18 @@ LOWFUNC(NONE,WRITE,2,raw_fp_from_exten_mr,(RR4 adr, FR s))
REV_rr(REG_WORK2, REG_WORK2);
STRH_rR(REG_WORK2, REG_WORK3); // write exponent
VSHL64_ddi(SCRATCH_F64_1, s, 11); // shift mantissa to correct position
VSHL64_ddi(SCRATCH_F64_1, s, 11); // shift mantissa to correct position
VREV64_8_dd(SCRATCH_F64_1, SCRATCH_F64_1);
VMOV64_rrd(REG_WORK1, REG_WORK2, SCRATCH_F64_1);
ORR_rri(REG_WORK1, REG_WORK1, 0x80); // insert explicit 1
#ifdef ARMV6T2
STRD_rRI(REG_WORK1, REG_WORK3, 4);
B_i(9); // end_of_op
#else
STR_rRI(REG_WORK1, REG_WORK3, 4);
STR_rRI(REG_WORK2, REG_WORK3, 8);
B_i(10);
#endif
// isnan
MOVW_ri16(REG_WORK1, 0x7fff);
@ -1474,7 +1458,12 @@ LOWFUNC(NONE,WRITE,2,raw_fp_from_exten_mr,(RR4 adr, FR s))
ADD_rrr(REG_WORK3, adr, REG_WORK3);
REV_rr(REG_WORK1, REG_WORK1);
#ifdef ARMV6T2
STRD_rR(REG_WORK1, REG_WORK3);
#else
STR_rR(REG_WORK1, REG_WORK3);
STR_rRI(REG_WORK2, REG_WORK3, 4);
#endif
STR_rRI(REG_WORK2, REG_WORK3, 8);
// end_of_op
@ -1489,7 +1478,12 @@ LOWFUNC(NONE,READ,2,raw_fp_to_exten_rm,(FW d, RR4 adr))
LDR_rRI(REG_WORK3, R_REGSTRUCT, offs);
ADD_rrr(REG_WORK3, adr, REG_WORK3);
#ifdef ARMV6T2
LDRD_rRI(REG_WORK1, REG_WORK3, 4);
#else
LDR_rRI(REG_WORK1, REG_WORK3, 4);
LDR_rRI(REG_WORK2, REG_WORK3, 8);
#endif
BIC_rri(REG_WORK1, REG_WORK1, 0x80); // clear explicit 1
VMOV64_drr(d, REG_WORK1, REG_WORK2);
VREV64_8_dd(d, d);
@ -1527,6 +1521,30 @@ LOWFUNC(NONE,READ,2,raw_fp_to_exten_rm,(FW d, RR4 adr))
}
LENDFUNC(NONE,READ,2,raw_fp_to_exten_rm,(FW d, RR4 adr))
LOWFUNC(NONE,WRITE,2,raw_fp_from_double_mr,(RR4 adr, FR s))
{
uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) &regs;
LDR_rRI(REG_WORK3, R_REGSTRUCT, offs);
ADD_rrr(REG_WORK3, adr, REG_WORK3);
VREV64_8_dd(SCRATCH_F64_1, s);
VSTR64_dRi(SCRATCH_F64_1, REG_WORK3, 0);
}
LENDFUNC(NONE,WRITE,2,raw_fp_from_double_mr,(RR4 adr, FR s))
LOWFUNC(NONE,READ,2,raw_fp_to_double_rm,(FW d, RR4 adr))
{
uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) &regs;
LDR_rRI(REG_WORK3, R_REGSTRUCT, offs);
ADD_rrr(REG_WORK3, adr, REG_WORK3);
VLDR64_dRi(d, REG_WORK3, 0);
VREV64_8_dd(d, d);
}
LENDFUNC(NONE,READ,2,raw_fp_to_double_rm,(FW d, RR4 adr))
STATIC_INLINE void raw_fflags_into_flags(int r)
{
VCMP64_d0(r);
@ -1652,3 +1670,4 @@ LOWFUNC(NONE,NONE,1,raw_roundingmode,(IMM mode))
}
LENDFUNC(NONE,NONE,1,raw_roundingmode,(IMM mode))
#endif // USE_JIT_FPU

View file

@ -1327,7 +1327,7 @@ enum {
#define SMULxy_rrr(Rd,Rn,Rm,x,y) CC_SMULxy_rrr(NATIVE_CC_AL,Rd,Rn,Rm,x,y)
// ARMv6T2
//#ifdef ARMV6T2
#ifdef ARMV6T2
#define CC_BFI_rrii(cc,Rd,Rn,lsb,msb) _W(((cc) << 28) | (0x3e << 21) | ((msb) << 16) | (Rd << 12) | ((lsb) << 7) | (0x1 << 4) | (Rn))
#define BFI_rrii(Rd,Rn,lsb,msb) CC_BFI_rrii(NATIVE_CC_AL,Rd,Rn,lsb,msb)
@ -1353,7 +1353,7 @@ enum {
#define CC_UDIV_rrr(cc,Rd,Rn,Rm) _W(((cc) << 28) | (0x7 << 24) | (0x3 << 20) | (Rd << 16) | (0xf << 12) | (Rm << 8) | (0x1 << 4) | (Rn))
#define UDIV_rrr(Rd,Rn,Rm) CC_UDIV_rrr(NATIVE_CC_AL,Rd,Rn,Rm)
//#endif
#endif
// Floatingpoint
#define FADR_ADD(offs) ((1 << 23) | (offs) >> 2)

View file

@ -123,7 +123,7 @@ typedef union {
#if defined(CPU_arm)
//#define DEBUG_DATA_BUFFER
#define ALIGN_NOT_NEEDED
#define N_REGS 13 /* really 16, but 13 to 15 are SP, LR, PC */
#define N_REGS 11 /* really 16, but 13 to 15 are SP, LR, PC; 12 is scratch reg and 11 holds regs-struct */
#else
#define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */
#endif
@ -381,6 +381,12 @@ typedef struct blockinfo_t {
#define BI_COMPILING 5
#define BI_FINALIZING 6
#if defined(CPU_arm) && !defined(ARMV6T2)
const int POPALLSPACE_SIZE = 2048; /* That should be enough space */
#else
const int POPALLSPACE_SIZE = 512; /* That should be enough space */
#endif
void execute_normal(void);
void exec_nostats(void);
void do_nothing(void);

View file

@ -20,7 +20,7 @@
#include "compemu.h"
#include "flags_arm.h"
#if defined(JIT)
#if defined(USE_JIT_FPU)
extern void fpp_to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3);
@ -188,10 +188,7 @@ STATIC_INLINE int comp_fp_get (uae_u32 opcode, uae_u16 extra, int treg)
fmov_w_rr (treg, S2);
return 1;
case 5: /* Double */
readlong (S1, S2, S3);
add_l_ri (S1, 4);
readlong (S1, S4, S3);
fmov_d_rrr (treg, S4, S2);
fp_to_double_rm (treg, S1);
return 2;
case 6: /* Byte */
readbyte (S1, S2, S3);
@ -293,10 +290,7 @@ STATIC_INLINE int comp_fp_put (uae_u32 opcode, uae_u16 extra)
writeword_clobber (S1, S2, S3);
return 0;
case 5: /* Double */
fmov_to_d_rrr(S2, S3, sreg);
writelong_clobber (S1, S3, S4);
add_l_ri (S1, 4);
writelong_clobber (S1, S2, S4);
fp_from_double_mr(S1, sreg);
return 0;
case 6: /* Byte */
fmov_to_b_rr(S2, sreg);
@ -766,6 +760,11 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
sreg = FS1;
else /* one operand only, thus we can load the argument into dreg */
sreg = dreg;
if(opmode >= 0x30 && opmode <= 0x37) {
// get out early for unsupported ops
FAIL (1);
return;
}
if ((prec = comp_fp_get (opcode, extra, sreg)) < 0) {
FAIL (1);
return;

View file

@ -488,6 +488,8 @@ STATIC_INLINE void write_jmp_target(uae_u32* jmpaddr, uintptr a) {
* FPU stuff *
*************************************************************************/
#ifdef USE_JIT_FPU
MIDFUNC(1,f_forget_about,(FW r))
{
if (f_isinreg(r))
@ -935,6 +937,26 @@ MIDFUNC(2,fp_to_exten_rm,(FW d, RR4 adr))
}
MENDFUNC(2,fp_to_exten_rm,(FW d, RR4 adr))
MIDFUNC(2,fp_from_double_mr,(RR4 adr, FR s))
{
adr = readreg(adr, 4);
s = f_readreg(s);
raw_fp_from_double_mr(adr, s);
f_unlock(s);
unlock2(adr);
}
MENDFUNC(2,fp_from_double_mr,(RR4 adr, FR s))
MIDFUNC(2,fp_to_double_rm,(FW d, RR4 adr))
{
adr = readreg(adr, 4);
d = f_writereg(d);
raw_fp_to_double_rm(d, adr);
unlock2(adr);
f_unlock(d);
}
MENDFUNC(2,fp_to_double_rm,(FW d, RR4 adr))
MIDFUNC(2,fp_fscc_ri,(RW4 d, int cc))
{
d = rmw(d, 4, 4);
@ -950,4 +972,4 @@ MIDFUNC(1,roundingmode,(IMM mode))
MENDFUNC(1,roundingmode,(IMM mode))
#endif // USE_JIT_FPU

View file

@ -110,5 +110,7 @@ DECLARE_MIDFUNC(fpowx_rr(uae_u32 x, FW d, FR s));
DECLARE_MIDFUNC(fflags_into_flags());
DECLARE_MIDFUNC(fp_from_exten_mr(RR4 adr, FR s));
DECLARE_MIDFUNC(fp_to_exten_rm(FW d, RR4 adr));
DECLARE_MIDFUNC(fp_from_double_mr(RR4 adr, FR s));
DECLARE_MIDFUNC(fp_to_double_rm(FW d, RR4 adr));
DECLARE_MIDFUNC(fp_fscc_ri(RW4, int cc));
DECLARE_MIDFUNC(roundingmode(IMM mode));

View file

@ -1835,6 +1835,8 @@ MENDFUNC(2,jff_DBCC,(RR2 d, IMM cc))
* C Always cleared.
*
*/
#ifdef ARMV6T2
MIDFUNC(3,jnf_DIVU,(W4 d, RR4 s1, RR4 s2))
{
s1 = readreg(s1, 4);
@ -1847,13 +1849,8 @@ MIDFUNC(3,jnf_DIVU,(W4 d, RR4 s1, RR4 s2))
// Signal exception 5
MOV_ri(REG_WORK1, 5);
#ifdef ARMV6T2
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
#else
auto offs = data_long_offs((uae_u32)(&jit_exception));
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
#endif
STR_rR(REG_WORK1, REG_WORK2);
#ifdef ARM_HAS_DIV
B_i(4); // end_of_op
@ -1899,13 +1896,8 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
// Signal exception 5
MOV_ri(REG_WORK1, 5);
#ifdef ARMV6T2
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
#else
auto offs = data_long_offs((uae_u32)(&jit_exception));
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
#endif
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
STR_rR(REG_WORK1, REG_WORK2);
// simplified flag handling for div0: set Z and V (for signed DIV: Z only)
@ -1918,8 +1910,8 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
UDIV_rrr(REG_WORK1, s1, REG_WORK3);
#else
B_i(17); // end_of_op
// src is not 0
// src is not 0
VMOVi_from_ARM_dr(SCRATCH_F64_1, s1, 0);
VMOVi_from_ARM_dr(SCRATCH_F64_2, REG_WORK3, 0);
VCVTIuto64_ds(SCRATCH_F64_1, SCRATCH_F32_1);
@ -1929,7 +1921,7 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
VMOVi_to_ARM_rd(REG_WORK1, SCRATCH_F64_1, 0);
#endif
LSRS_rri(REG_WORK2, REG_WORK1, 16); // if result of this is not 0, DIVU overflows
LSRS_rri(REG_WORK2, REG_WORK1, 16); // if result of this is not 0, DIVU overflows
BEQ_i(2);
// Here we handle overflow
MOV_ri(REG_WORK1, ARM_V_FLAG | ARM_N_FLAG);
@ -1945,13 +1937,15 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
MLS_rrrr(REG_WORK2, REG_WORK1, REG_WORK3, s1);
PKHBT_rrrLSLi(d, REG_WORK1, REG_WORK2, 16);
// end_of_op
unlock2(d);
unlock2(s1);
unlock2(s2);
}
MENDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
#endif
/*
* DIVS
*
@ -1962,6 +1956,8 @@ MENDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
* C Always cleared.
*
*/
#ifdef ARMV6T2
MIDFUNC(3,jnf_DIVS,(W4 d, RR4 s1, RR4 s2))
{
s1 = readreg(s1, 4);
@ -1974,23 +1970,18 @@ MIDFUNC(3,jnf_DIVS,(W4 d, RR4 s1, RR4 s2))
// Signal exception 5
MOV_ri(REG_WORK1, 5);
#ifdef ARMV6T2
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
#else
auto offs = data_long_offs((uae_u32)(&jit_exception));
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
#endif
STR_rR(REG_WORK1, REG_WORK2);
#ifdef ARM_HAS_DIV
B_i(12); // end_of_op
// src is not 0
SDIV_rrr(REG_WORK1, s1, REG_WORK3);
#else
B_i(18); // end_of_op
// src is not 0
// src is not 0
VMOVi_from_ARM_dr(SCRATCH_F64_1, s1, 0);
VMOVi_from_ARM_dr(SCRATCH_F64_2, REG_WORK3, 0);
VCVTIto64_ds(SCRATCH_F64_1, SCRATCH_F32_1);
@ -2037,13 +2028,8 @@ MIDFUNC(3,jff_DIVS,(W4 d, RR4 s1, RR4 s2))
// Signal exception 5
MOV_ri(REG_WORK1, 5);
#ifdef ARMV6T2
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
#else
auto offs = data_long_offs((uae_u32)(&jit_exception));
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
#endif
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
STR_rR(REG_WORK1, REG_WORK2);
// simplified flag handling for div0: set Z and V (for signed DIV: Z only)
@ -2057,7 +2043,7 @@ MIDFUNC(3,jff_DIVS,(W4 d, RR4 s1, RR4 s2))
#else
B_i(25); // end_of_op
// src is not 0
// src is not 0
VMOVi_from_ARM_dr(SCRATCH_F64_1, s1, 0);
VMOVi_from_ARM_dr(SCRATCH_F64_2, REG_WORK3, 0);
VCVTIto64_ds(SCRATCH_F64_1, SCRATCH_F32_1);
@ -2299,6 +2285,8 @@ MIDFUNC(3,jff_DIVLS32,(RW4 d, RR4 s1, W4 rem))
}
MENDFUNC(3,jff_DIVLS32,(RW4 d, RR4 s1, W4 rem))
#endif
/*
* EOR
* Operand Syntax: Dn, <ea>
@ -3242,11 +3230,23 @@ MIDFUNC(2,jnf_MOVE16,(RR4 d, RR4 s))
ADD_rrr(s, s, REG_WORK1);
ADD_rrr(d, d, REG_WORK1);
#ifdef ARMV6T2
LDRD_rR(REG_WORK1, s);
STRD_rR(REG_WORK1, d);
LDRD_rRI(REG_WORK1, s, 8);
STRD_rRI(REG_WORK1, d, 8);
#else
LDR_rR(REG_WORK1, s);
LDR_rRI(REG_WORK2, s, 4);
STR_rR(REG_WORK1, d);
STR_rRI(REG_WORK2, d, 4);
LDR_rRI(REG_WORK1, s, 8);
LDR_rRI(REG_WORK2, s, 12);
STR_rRI(REG_WORK1, d, 8);
STR_rRI(REG_WORK2, d, 12);
#endif
POP_REGS((1 << s) | (1 << d));
@ -4499,7 +4499,12 @@ MIDFUNC(3,jff_ROXL_b,(W4 d, RR4 s, RR4 i))
CC_MOV_ri(NATIVE_CC_CC, x, 0);
// Calc N and Z
#ifdef ARMV6T2
BFI_rrii(d, x, 8, 8); // Make sure to set carry (last bit shifted out)
#else
BIC_rri(d, d, 0x100);
ORR_rrrLSLi(d, d, x, 8);
#endif
LSLS_rri(REG_WORK1, d, 24);
// end of op
@ -4549,7 +4554,12 @@ MIDFUNC(3,jff_ROXL_w,(W4 d, RR4 s, RR4 i))
CC_MOV_ri(NATIVE_CC_CC, x, 0);
// Calc N and Z
#ifdef ARMV6T2
BFI_rrii(d, x, 16, 16); // Make sure to set carry (last bit shifted out)
#else
BIC_rri(d, d, 0x10000);
ORR_rrrLSLi(d, d, x, 16);
#endif
LSLS_rri(REG_WORK1, d, 16);
// end of op

View file

@ -44,7 +44,6 @@
#include "compemu.h"
#include <SDL.h>
#define DEBUG 0
#if DEBUG
#define PROFILE_COMPILE_TIME 1
@ -134,7 +133,6 @@ static int redo_current_block;
uae_u8* current_compile_p = NULL;
static uae_u8* max_compile_start;
uae_u8* compiled_code = NULL;
const int POPALLSPACE_SIZE = 512; /* That should be enough space */
uae_u8 *popallspace = NULL;
void* pushall_call_handler = NULL;
@ -926,7 +924,7 @@ static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
bestreg = -1;
when = 2000000000;
for (i=0; i<N_REGS; i++) {
for (i = N_REGS - 1; i >= 0; i--) {
badness = live.nat[i].touched;
if (live.nat[i].nholds == 0)
badness = 0;
@ -1248,6 +1246,7 @@ static int rmw(int r, int wsize, int rsize)
/********************************************************************
* FPU register status handling. EMIT TIME! *
********************************************************************/
#ifdef USE_JIT_FPU
STATIC_INLINE void f_tomem_drop(int r)
{
@ -1382,6 +1381,7 @@ static void fflags_into_flags_internal(void)
live_flags();
}
#endif
#if defined(CPU_arm)
#include "compemu_midfunc_arm.cpp"
@ -2599,7 +2599,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
compemu_raw_mov_l_rm(0, (uintptr)specflags);
compemu_raw_test_l_rr(0, 0);
#if defined(CPU_arm) && !defined(ARMV6T2)
data_check_end(8, 56);
data_check_end(8, 64);
#endif
compemu_raw_jz_b_oponly();
branchadd = (uae_s8 *)get_target();
@ -2609,6 +2609,9 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
*(branchadd - 4) = (((uintptr)get_target() - (uintptr)branchadd) - 4) >> 2;
}
} else if(may_raise_exception) {
#if defined(CPU_arm) && !defined(ARMV6T2)
data_check_end(8, 64);
#endif
compemu_raw_handle_except(scaled_cycles(totcycles));
may_raise_exception = false;
}
@ -2637,6 +2640,9 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
}
tmp = live; /* ouch! This is big... */
#if defined(CPU_arm) && !defined(ARMV6T2)
data_check_end(8, 128);
#endif
compemu_raw_jcc_l_oponly(cc); // Last emitted opcode is branch to target
branchadd = (uae_u32*)get_target() - 1;
@ -2644,9 +2650,6 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
tbi = get_blockinfo_addr_new((void*)t1);
match_states(tbi);
#if defined(CPU_arm) && !defined(ARMV6T2)
data_check_end(4, 56);
#endif
tba = compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), t1);
write_jmp_target(tba, get_handler(t1));
create_jmpdep(bi, 0, tba, t1);
@ -2658,9 +2661,6 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
match_states(tbi);
//flush(1); /* Can only get here if was_comp==1 */
#if defined(CPU_arm) && !defined(ARMV6T2)
data_check_end(4, 56);
#endif
tba = compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), t2);
write_jmp_target(tba, get_handler(t2));
create_jmpdep(bi, 1, tba, t2);
@ -2674,7 +2674,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
/* Let's find out where next_handler is... */
if (was_comp && isinreg(PC_P)) {
#if defined(CPU_arm) && !defined(ARMV6T2)
data_check_end(4, 52);
data_check_end(4, 64);
#endif
r = live.state[PC_P].realreg;
compemu_raw_endblock_pc_inreg(r, scaled_cycles(totcycles));
@ -2688,7 +2688,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
match_states(tbi);
#if defined(CPU_arm) && !defined(ARMV6T2)
data_check_end(4, 56);
data_check_end(4, 64);
#endif
tba = compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), v);
write_jmp_target(tba, get_handler(v));
@ -2698,7 +2698,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
r = REG_PC_TMP;
compemu_raw_mov_l_rm(r, (uintptr)&regs.pc_p);
#if defined(CPU_arm) && !defined(ARMV6T2)
data_check_end(4, 52);
data_check_end(4, 64);
#endif
compemu_raw_endblock_pc_inreg(r, scaled_cycles(totcycles));
}