Resync with TomB's sources, regarding ARMv6T2 improvements
This commit is contained in:
parent
6571dc247d
commit
9b64073b68
21 changed files with 327 additions and 274 deletions
|
@ -471,8 +471,8 @@ LOWFUNC(WRITE,NONE,2,compemu_raw_MERGE8_rr,(RW4 d, RR4 s))
|
|||
UBFX_rrii(REG_WORK1, s, 8, 24);
|
||||
BFI_rrii(d, REG_WORK1, 8, 31);
|
||||
#else
|
||||
AND_rri(REG_WORK1, s, 0xffffff00);
|
||||
BIC_rri(d, d, 0xffffff00);
|
||||
BIC_rri(REG_WORK1, s, 0xff);
|
||||
AND_rri(d, d, 0xff);
|
||||
ORR_rrr(d, d, REG_WORK1);
|
||||
#endif
|
||||
}
|
||||
|
@ -889,11 +889,17 @@ STATIC_INLINE void compemu_raw_jcc_l_oponly(int cc)
|
|||
STATIC_INLINE void compemu_raw_handle_except(IMM cycles)
|
||||
{
|
||||
uae_u32* branchadd;
|
||||
int offs;
|
||||
|
||||
clobber_flags();
|
||||
|
||||
#ifdef ARMV6T2
|
||||
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
|
||||
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
|
||||
#else
|
||||
offs = data_long_offs((uae_u32)(&jit_exception));
|
||||
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
|
||||
#endif
|
||||
LDR_rR(REG_WORK1, REG_WORK2);
|
||||
TST_rr(REG_WORK1, REG_WORK1);
|
||||
|
||||
|
@ -901,7 +907,7 @@ STATIC_INLINE void compemu_raw_handle_except(IMM cycles)
|
|||
BEQ_i(0); // no exception, jump to next instruction
|
||||
|
||||
// countdown -= scaled_cycles(totcycles);
|
||||
uae_s32 offs = (uae_u32)&countdown - (uae_u32)®s;
|
||||
offs = (uae_u32)&countdown - (uae_u32)®s;
|
||||
LDR_rRI(REG_WORK1, R_REGSTRUCT, offs);
|
||||
if(CHECK32(cycles)) {
|
||||
SUBS_rri(REG_WORK1, REG_WORK1, cycles);
|
||||
|
@ -928,15 +934,10 @@ STATIC_INLINE void compemu_raw_handle_except(IMM cycles)
|
|||
|
||||
STATIC_INLINE void compemu_raw_maybe_recompile(uae_u32 t)
|
||||
{
|
||||
#ifdef ARMV6T2
|
||||
BGE_i(2);
|
||||
raw_pop_preserved_regs();
|
||||
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
|
||||
emit_long(t);
|
||||
#else
|
||||
uae_s32 offs = data_long_offs(t);
|
||||
CC_LDR_rRI(NATIVE_CC_LT, RPC_INDEX, RPC_INDEX, offs);
|
||||
#endif
|
||||
}
|
||||
|
||||
STATIC_INLINE void compemu_raw_jmp(uae_u32 t)
|
||||
|
@ -969,15 +970,10 @@ STATIC_INLINE void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
|
|||
|
||||
STATIC_INLINE void compemu_raw_maybe_cachemiss(uae_u32 t)
|
||||
{
|
||||
#ifdef ARMV6T2
|
||||
BEQ_i(2);
|
||||
raw_pop_preserved_regs();
|
||||
LDR_rRI(RPC_INDEX, RPC_INDEX, -4);
|
||||
emit_long(t);
|
||||
#else
|
||||
uae_s32 offs = data_long_offs(t);
|
||||
CC_LDR_rRI(NATIVE_CC_NE, RPC_INDEX, RPC_INDEX, offs);
|
||||
#endif
|
||||
}
|
||||
|
||||
STATIC_INLINE void compemu_raw_jz_b_oponly(void)
|
||||
|
@ -1101,6 +1097,8 @@ LENDFUNC(NONE,READ,2,compemu_raw_tag_pc,(W4 d, MEMR s))
|
|||
* FPU stuff *
|
||||
*************************************************************************/
|
||||
|
||||
#ifdef USE_JIT_FPU
|
||||
|
||||
LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
|
||||
{
|
||||
VMOV64_dd(d, s);
|
||||
|
@ -1112,13 +1110,8 @@ LOWFUNC(NONE,WRITE,2,compemu_raw_fmov_mr_drop,(MEMW mem, FR s))
|
|||
if(mem >= (uae_u32) ®s && mem < (uae_u32) ®s + 1020 && ((mem - (uae_u32) ®s) & 0x3) == 0) {
|
||||
VSTR64_dRi(s, R_REGSTRUCT, (mem - (uae_u32) ®s));
|
||||
} else {
|
||||
#ifdef ARMV6T2
|
||||
MOVW_ri16(REG_WORK1, mem);
|
||||
MOVT_ri16(REG_WORK1, mem >> 16);
|
||||
#else
|
||||
auto offs = data_long_offs(mem);
|
||||
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
||||
#endif
|
||||
VSTR64_dRi(s, REG_WORK1, 0);
|
||||
}
|
||||
}
|
||||
|
@ -1130,13 +1123,8 @@ LOWFUNC(NONE,READ,2,compemu_raw_fmov_rm,(FW d, MEMR mem))
|
|||
if(mem >= (uae_u32) ®s && mem < (uae_u32) ®s + 1020 && ((mem - (uae_u32) ®s) & 0x3) == 0) {
|
||||
VLDR64_dRi(d, R_REGSTRUCT, (mem - (uae_u32) ®s));
|
||||
} else {
|
||||
#ifdef ARMV6T2
|
||||
MOVW_ri16(REG_WORK1, mem);
|
||||
MOVT_ri16(REG_WORK1, mem >> 16);
|
||||
#else
|
||||
auto offs = data_long_offs(mem);
|
||||
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
||||
#endif
|
||||
VLDR64_dRi(d, REG_WORK1, 0);
|
||||
}
|
||||
}
|
||||
|
@ -1235,26 +1223,16 @@ LENDFUNC(NONE,NONE,1,raw_fmov_d_ri_10,(FW r))
|
|||
|
||||
LOWFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m))
|
||||
{
|
||||
#ifdef ARMV6T2
|
||||
MOVW_ri16(REG_WORK1, m);
|
||||
MOVT_ri16(REG_WORK1, m >> 16);
|
||||
#else
|
||||
auto offs = data_long_offs(m);
|
||||
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
||||
#endif
|
||||
VLDR64_dRi(r, REG_WORK1, 0);
|
||||
}
|
||||
LENDFUNC(NONE,READ,2,raw_fmov_d_rm,(FW r, MEMR m))
|
||||
|
||||
LOWFUNC(NONE,READ,2,raw_fmovs_rm,(FW r, MEMR m))
|
||||
{
|
||||
#ifdef ARMV6T2
|
||||
MOVW_ri16(REG_WORK1, m);
|
||||
MOVT_ri16(REG_WORK1, m >> 16);
|
||||
#else
|
||||
auto offs = data_long_offs(m);
|
||||
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
||||
#endif
|
||||
MOVW_ri16(REG_WORK1, m);
|
||||
MOVT_ri16(REG_WORK1, m >> 16);
|
||||
VLDR32_sRi(SCRATCH_F32_1, REG_WORK1, 0);
|
||||
VCVT32to64_ds(r, SCRATCH_F32_1);
|
||||
}
|
||||
|
@ -1386,13 +1364,10 @@ LENDFUNC(NONE,NONE,2,raw_fmovs_rr,(FW d, FR s))
|
|||
LOWFUNC(NONE,NONE,3,raw_ffunc_rr,(double (*func)(double), FW d, FR s))
|
||||
{
|
||||
VMOV64_dd(0, s);
|
||||
#ifdef ARMV6T2
|
||||
|
||||
MOVW_ri16(REG_WORK1, (uae_u32)func);
|
||||
MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16);
|
||||
#else
|
||||
auto offs = data_long_offs(uae_u32(func));
|
||||
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
||||
#endif
|
||||
|
||||
PUSH(RLR_INDEX);
|
||||
BLX_r(REG_WORK1);
|
||||
POP(RLR_INDEX);
|
||||
|
@ -1413,13 +1388,8 @@ LOWFUNC(NONE,NONE,3,raw_fpowx_rr,(uae_u32 x, FW d, FR s))
|
|||
|
||||
VMOV64_dd(1, s);
|
||||
|
||||
#ifdef ARMV6T2
|
||||
MOVW_ri16(REG_WORK1, (uae_u32)func);
|
||||
MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16);
|
||||
#else
|
||||
auto offs = data_long_offs(uae_u32(func));
|
||||
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
||||
#endif
|
||||
MOVW_ri16(REG_WORK1, (uae_u32)func);
|
||||
MOVT_ri16(REG_WORK1, ((uae_u32)func) >> 16);
|
||||
|
||||
PUSH(RLR_INDEX);
|
||||
BLX_r(REG_WORK1);
|
||||
|
@ -1436,12 +1406,20 @@ LOWFUNC(NONE,WRITE,2,raw_fp_from_exten_mr,(RR4 adr, FR s))
|
|||
VMOVi_to_ARM_rd(REG_WORK1, s, 1); // get high part of double
|
||||
VCMP64_d0(s);
|
||||
VMRS_CPSR();
|
||||
#ifdef ARMV6T2
|
||||
BEQ_i(20); // iszero
|
||||
#else
|
||||
BEQ_i(21);
|
||||
#endif
|
||||
|
||||
UBFX_rrii(REG_WORK2, REG_WORK1, 20, 11); // get exponent
|
||||
MOVW_ri16(REG_WORK3, 2047);
|
||||
CMP_rr(REG_WORK2, REG_WORK3);
|
||||
#ifdef ARMV6T2
|
||||
BEQ_i(13); // isnan
|
||||
#else
|
||||
BEQ_i(14);
|
||||
#endif
|
||||
|
||||
MOVW_ri16(REG_WORK3, 15360); // diff of bias between double and long double
|
||||
ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK3); // exponent done
|
||||
|
@ -1454,12 +1432,18 @@ LOWFUNC(NONE,WRITE,2,raw_fp_from_exten_mr,(RR4 adr, FR s))
|
|||
REV_rr(REG_WORK2, REG_WORK2);
|
||||
STRH_rR(REG_WORK2, REG_WORK3); // write exponent
|
||||
|
||||
VSHL64_ddi(SCRATCH_F64_1, s, 11); // shift mantissa to correct position
|
||||
VSHL64_ddi(SCRATCH_F64_1, s, 11); // shift mantissa to correct position
|
||||
VREV64_8_dd(SCRATCH_F64_1, SCRATCH_F64_1);
|
||||
VMOV64_rrd(REG_WORK1, REG_WORK2, SCRATCH_F64_1);
|
||||
ORR_rri(REG_WORK1, REG_WORK1, 0x80); // insert explicit 1
|
||||
#ifdef ARMV6T2
|
||||
STRD_rRI(REG_WORK1, REG_WORK3, 4);
|
||||
B_i(9); // end_of_op
|
||||
#else
|
||||
STR_rRI(REG_WORK1, REG_WORK3, 4);
|
||||
STR_rRI(REG_WORK2, REG_WORK3, 8);
|
||||
B_i(10);
|
||||
#endif
|
||||
|
||||
// isnan
|
||||
MOVW_ri16(REG_WORK1, 0x7fff);
|
||||
|
@ -1474,7 +1458,12 @@ LOWFUNC(NONE,WRITE,2,raw_fp_from_exten_mr,(RR4 adr, FR s))
|
|||
ADD_rrr(REG_WORK3, adr, REG_WORK3);
|
||||
|
||||
REV_rr(REG_WORK1, REG_WORK1);
|
||||
#ifdef ARMV6T2
|
||||
STRD_rR(REG_WORK1, REG_WORK3);
|
||||
#else
|
||||
STR_rR(REG_WORK1, REG_WORK3);
|
||||
STR_rRI(REG_WORK2, REG_WORK3, 4);
|
||||
#endif
|
||||
STR_rRI(REG_WORK2, REG_WORK3, 8);
|
||||
|
||||
// end_of_op
|
||||
|
@ -1489,7 +1478,12 @@ LOWFUNC(NONE,READ,2,raw_fp_to_exten_rm,(FW d, RR4 adr))
|
|||
LDR_rRI(REG_WORK3, R_REGSTRUCT, offs);
|
||||
ADD_rrr(REG_WORK3, adr, REG_WORK3);
|
||||
|
||||
#ifdef ARMV6T2
|
||||
LDRD_rRI(REG_WORK1, REG_WORK3, 4);
|
||||
#else
|
||||
LDR_rRI(REG_WORK1, REG_WORK3, 4);
|
||||
LDR_rRI(REG_WORK2, REG_WORK3, 8);
|
||||
#endif
|
||||
BIC_rri(REG_WORK1, REG_WORK1, 0x80); // clear explicit 1
|
||||
VMOV64_drr(d, REG_WORK1, REG_WORK2);
|
||||
VREV64_8_dd(d, d);
|
||||
|
@ -1527,6 +1521,30 @@ LOWFUNC(NONE,READ,2,raw_fp_to_exten_rm,(FW d, RR4 adr))
|
|||
}
|
||||
LENDFUNC(NONE,READ,2,raw_fp_to_exten_rm,(FW d, RR4 adr))
|
||||
|
||||
LOWFUNC(NONE,WRITE,2,raw_fp_from_double_mr,(RR4 adr, FR s))
|
||||
{
|
||||
uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s;
|
||||
|
||||
LDR_rRI(REG_WORK3, R_REGSTRUCT, offs);
|
||||
ADD_rrr(REG_WORK3, adr, REG_WORK3);
|
||||
|
||||
VREV64_8_dd(SCRATCH_F64_1, s);
|
||||
VSTR64_dRi(SCRATCH_F64_1, REG_WORK3, 0);
|
||||
}
|
||||
LENDFUNC(NONE,WRITE,2,raw_fp_from_double_mr,(RR4 adr, FR s))
|
||||
|
||||
LOWFUNC(NONE,READ,2,raw_fp_to_double_rm,(FW d, RR4 adr))
|
||||
{
|
||||
uae_s32 offs = (uae_u32)&NATMEM_OFFSETX - (uae_u32) ®s;
|
||||
|
||||
LDR_rRI(REG_WORK3, R_REGSTRUCT, offs);
|
||||
ADD_rrr(REG_WORK3, adr, REG_WORK3);
|
||||
|
||||
VLDR64_dRi(d, REG_WORK3, 0);
|
||||
VREV64_8_dd(d, d);
|
||||
}
|
||||
LENDFUNC(NONE,READ,2,raw_fp_to_double_rm,(FW d, RR4 adr))
|
||||
|
||||
STATIC_INLINE void raw_fflags_into_flags(int r)
|
||||
{
|
||||
VCMP64_d0(r);
|
||||
|
@ -1652,3 +1670,4 @@ LOWFUNC(NONE,NONE,1,raw_roundingmode,(IMM mode))
|
|||
}
|
||||
LENDFUNC(NONE,NONE,1,raw_roundingmode,(IMM mode))
|
||||
|
||||
#endif // USE_JIT_FPU
|
||||
|
|
|
@ -1327,7 +1327,7 @@ enum {
|
|||
#define SMULxy_rrr(Rd,Rn,Rm,x,y) CC_SMULxy_rrr(NATIVE_CC_AL,Rd,Rn,Rm,x,y)
|
||||
|
||||
// ARMv6T2
|
||||
//#ifdef ARMV6T2
|
||||
#ifdef ARMV6T2
|
||||
|
||||
#define CC_BFI_rrii(cc,Rd,Rn,lsb,msb) _W(((cc) << 28) | (0x3e << 21) | ((msb) << 16) | (Rd << 12) | ((lsb) << 7) | (0x1 << 4) | (Rn))
|
||||
#define BFI_rrii(Rd,Rn,lsb,msb) CC_BFI_rrii(NATIVE_CC_AL,Rd,Rn,lsb,msb)
|
||||
|
@ -1353,7 +1353,7 @@ enum {
|
|||
#define CC_UDIV_rrr(cc,Rd,Rn,Rm) _W(((cc) << 28) | (0x7 << 24) | (0x3 << 20) | (Rd << 16) | (0xf << 12) | (Rm << 8) | (0x1 << 4) | (Rn))
|
||||
#define UDIV_rrr(Rd,Rn,Rm) CC_UDIV_rrr(NATIVE_CC_AL,Rd,Rn,Rm)
|
||||
|
||||
//#endif
|
||||
#endif
|
||||
|
||||
// Floatingpoint
|
||||
#define FADR_ADD(offs) ((1 << 23) | (offs) >> 2)
|
||||
|
|
|
@ -123,7 +123,7 @@ typedef union {
|
|||
#if defined(CPU_arm)
|
||||
//#define DEBUG_DATA_BUFFER
|
||||
#define ALIGN_NOT_NEEDED
|
||||
#define N_REGS 13 /* really 16, but 13 to 15 are SP, LR, PC */
|
||||
#define N_REGS 11 /* really 16, but 13 to 15 are SP, LR, PC; 12 is scratch reg and 11 holds regs-struct */
|
||||
#else
|
||||
#define N_REGS 8 /* really only 7, but they are numbered 0,1,2,3,5,6,7 */
|
||||
#endif
|
||||
|
@ -381,6 +381,12 @@ typedef struct blockinfo_t {
|
|||
#define BI_COMPILING 5
|
||||
#define BI_FINALIZING 6
|
||||
|
||||
#if defined(CPU_arm) && !defined(ARMV6T2)
|
||||
const int POPALLSPACE_SIZE = 2048; /* That should be enough space */
|
||||
#else
|
||||
const int POPALLSPACE_SIZE = 512; /* That should be enough space */
|
||||
#endif
|
||||
|
||||
void execute_normal(void);
|
||||
void exec_nostats(void);
|
||||
void do_nothing(void);
|
||||
|
|
|
@ -20,7 +20,7 @@
|
|||
#include "compemu.h"
|
||||
#include "flags_arm.h"
|
||||
|
||||
#if defined(JIT)
|
||||
#if defined(USE_JIT_FPU)
|
||||
|
||||
extern void fpp_to_exten(fpdata *fpd, uae_u32 wrd1, uae_u32 wrd2, uae_u32 wrd3);
|
||||
|
||||
|
@ -188,10 +188,7 @@ STATIC_INLINE int comp_fp_get (uae_u32 opcode, uae_u16 extra, int treg)
|
|||
fmov_w_rr (treg, S2);
|
||||
return 1;
|
||||
case 5: /* Double */
|
||||
readlong (S1, S2, S3);
|
||||
add_l_ri (S1, 4);
|
||||
readlong (S1, S4, S3);
|
||||
fmov_d_rrr (treg, S4, S2);
|
||||
fp_to_double_rm (treg, S1);
|
||||
return 2;
|
||||
case 6: /* Byte */
|
||||
readbyte (S1, S2, S3);
|
||||
|
@ -293,10 +290,7 @@ STATIC_INLINE int comp_fp_put (uae_u32 opcode, uae_u16 extra)
|
|||
writeword_clobber (S1, S2, S3);
|
||||
return 0;
|
||||
case 5: /* Double */
|
||||
fmov_to_d_rrr(S2, S3, sreg);
|
||||
writelong_clobber (S1, S3, S4);
|
||||
add_l_ri (S1, 4);
|
||||
writelong_clobber (S1, S2, S4);
|
||||
fp_from_double_mr(S1, sreg);
|
||||
return 0;
|
||||
case 6: /* Byte */
|
||||
fmov_to_b_rr(S2, sreg);
|
||||
|
@ -766,6 +760,11 @@ void comp_fpp_opp (uae_u32 opcode, uae_u16 extra)
|
|||
sreg = FS1;
|
||||
else /* one operand only, thus we can load the argument into dreg */
|
||||
sreg = dreg;
|
||||
if(opmode >= 0x30 && opmode <= 0x37) {
|
||||
// get out early for unsupported ops
|
||||
FAIL (1);
|
||||
return;
|
||||
}
|
||||
if ((prec = comp_fp_get (opcode, extra, sreg)) < 0) {
|
||||
FAIL (1);
|
||||
return;
|
||||
|
|
|
@ -488,6 +488,8 @@ STATIC_INLINE void write_jmp_target(uae_u32* jmpaddr, uintptr a) {
|
|||
* FPU stuff *
|
||||
*************************************************************************/
|
||||
|
||||
#ifdef USE_JIT_FPU
|
||||
|
||||
MIDFUNC(1,f_forget_about,(FW r))
|
||||
{
|
||||
if (f_isinreg(r))
|
||||
|
@ -935,6 +937,26 @@ MIDFUNC(2,fp_to_exten_rm,(FW d, RR4 adr))
|
|||
}
|
||||
MENDFUNC(2,fp_to_exten_rm,(FW d, RR4 adr))
|
||||
|
||||
MIDFUNC(2,fp_from_double_mr,(RR4 adr, FR s))
|
||||
{
|
||||
adr = readreg(adr, 4);
|
||||
s = f_readreg(s);
|
||||
raw_fp_from_double_mr(adr, s);
|
||||
f_unlock(s);
|
||||
unlock2(adr);
|
||||
}
|
||||
MENDFUNC(2,fp_from_double_mr,(RR4 adr, FR s))
|
||||
|
||||
MIDFUNC(2,fp_to_double_rm,(FW d, RR4 adr))
|
||||
{
|
||||
adr = readreg(adr, 4);
|
||||
d = f_writereg(d);
|
||||
raw_fp_to_double_rm(d, adr);
|
||||
unlock2(adr);
|
||||
f_unlock(d);
|
||||
}
|
||||
MENDFUNC(2,fp_to_double_rm,(FW d, RR4 adr))
|
||||
|
||||
MIDFUNC(2,fp_fscc_ri,(RW4 d, int cc))
|
||||
{
|
||||
d = rmw(d, 4, 4);
|
||||
|
@ -950,4 +972,4 @@ MIDFUNC(1,roundingmode,(IMM mode))
|
|||
MENDFUNC(1,roundingmode,(IMM mode))
|
||||
|
||||
|
||||
|
||||
#endif // USE_JIT_FPU
|
||||
|
|
|
@ -110,5 +110,7 @@ DECLARE_MIDFUNC(fpowx_rr(uae_u32 x, FW d, FR s));
|
|||
DECLARE_MIDFUNC(fflags_into_flags());
|
||||
DECLARE_MIDFUNC(fp_from_exten_mr(RR4 adr, FR s));
|
||||
DECLARE_MIDFUNC(fp_to_exten_rm(FW d, RR4 adr));
|
||||
DECLARE_MIDFUNC(fp_from_double_mr(RR4 adr, FR s));
|
||||
DECLARE_MIDFUNC(fp_to_double_rm(FW d, RR4 adr));
|
||||
DECLARE_MIDFUNC(fp_fscc_ri(RW4, int cc));
|
||||
DECLARE_MIDFUNC(roundingmode(IMM mode));
|
||||
|
|
|
@ -1835,6 +1835,8 @@ MENDFUNC(2,jff_DBCC,(RR2 d, IMM cc))
|
|||
* C Always cleared.
|
||||
*
|
||||
*/
|
||||
#ifdef ARMV6T2
|
||||
|
||||
MIDFUNC(3,jnf_DIVU,(W4 d, RR4 s1, RR4 s2))
|
||||
{
|
||||
s1 = readreg(s1, 4);
|
||||
|
@ -1847,13 +1849,8 @@ MIDFUNC(3,jnf_DIVU,(W4 d, RR4 s1, RR4 s2))
|
|||
|
||||
// Signal exception 5
|
||||
MOV_ri(REG_WORK1, 5);
|
||||
#ifdef ARMV6T2
|
||||
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
|
||||
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
|
||||
#else
|
||||
auto offs = data_long_offs((uae_u32)(&jit_exception));
|
||||
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
|
||||
#endif
|
||||
STR_rR(REG_WORK1, REG_WORK2);
|
||||
#ifdef ARM_HAS_DIV
|
||||
B_i(4); // end_of_op
|
||||
|
@ -1899,13 +1896,8 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
|
|||
|
||||
// Signal exception 5
|
||||
MOV_ri(REG_WORK1, 5);
|
||||
#ifdef ARMV6T2
|
||||
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
|
||||
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
|
||||
#else
|
||||
auto offs = data_long_offs((uae_u32)(&jit_exception));
|
||||
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
|
||||
#endif
|
||||
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
|
||||
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
|
||||
STR_rR(REG_WORK1, REG_WORK2);
|
||||
|
||||
// simplified flag handling for div0: set Z and V (for signed DIV: Z only)
|
||||
|
@ -1918,8 +1910,8 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
|
|||
UDIV_rrr(REG_WORK1, s1, REG_WORK3);
|
||||
#else
|
||||
B_i(17); // end_of_op
|
||||
|
||||
// src is not 0
|
||||
|
||||
// src is not 0
|
||||
VMOVi_from_ARM_dr(SCRATCH_F64_1, s1, 0);
|
||||
VMOVi_from_ARM_dr(SCRATCH_F64_2, REG_WORK3, 0);
|
||||
VCVTIuto64_ds(SCRATCH_F64_1, SCRATCH_F32_1);
|
||||
|
@ -1929,7 +1921,7 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
|
|||
VMOVi_to_ARM_rd(REG_WORK1, SCRATCH_F64_1, 0);
|
||||
#endif
|
||||
|
||||
LSRS_rri(REG_WORK2, REG_WORK1, 16); // if result of this is not 0, DIVU overflows
|
||||
LSRS_rri(REG_WORK2, REG_WORK1, 16); // if result of this is not 0, DIVU overflows
|
||||
BEQ_i(2);
|
||||
// Here we handle overflow
|
||||
MOV_ri(REG_WORK1, ARM_V_FLAG | ARM_N_FLAG);
|
||||
|
@ -1945,13 +1937,15 @@ MIDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
|
|||
MLS_rrrr(REG_WORK2, REG_WORK1, REG_WORK3, s1);
|
||||
PKHBT_rrrLSLi(d, REG_WORK1, REG_WORK2, 16);
|
||||
// end_of_op
|
||||
|
||||
|
||||
unlock2(d);
|
||||
unlock2(s1);
|
||||
unlock2(s2);
|
||||
}
|
||||
MENDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* DIVS
|
||||
*
|
||||
|
@ -1962,6 +1956,8 @@ MENDFUNC(3,jff_DIVU,(W4 d, RR4 s1, RR4 s2))
|
|||
* C Always cleared.
|
||||
*
|
||||
*/
|
||||
#ifdef ARMV6T2
|
||||
|
||||
MIDFUNC(3,jnf_DIVS,(W4 d, RR4 s1, RR4 s2))
|
||||
{
|
||||
s1 = readreg(s1, 4);
|
||||
|
@ -1974,23 +1970,18 @@ MIDFUNC(3,jnf_DIVS,(W4 d, RR4 s1, RR4 s2))
|
|||
|
||||
// Signal exception 5
|
||||
MOV_ri(REG_WORK1, 5);
|
||||
#ifdef ARMV6T2
|
||||
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
|
||||
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
|
||||
#else
|
||||
auto offs = data_long_offs((uae_u32)(&jit_exception));
|
||||
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
|
||||
#endif
|
||||
STR_rR(REG_WORK1, REG_WORK2);
|
||||
#ifdef ARM_HAS_DIV
|
||||
B_i(12); // end_of_op
|
||||
|
||||
|
||||
// src is not 0
|
||||
SDIV_rrr(REG_WORK1, s1, REG_WORK3);
|
||||
#else
|
||||
B_i(18); // end_of_op
|
||||
|
||||
// src is not 0
|
||||
|
||||
// src is not 0
|
||||
VMOVi_from_ARM_dr(SCRATCH_F64_1, s1, 0);
|
||||
VMOVi_from_ARM_dr(SCRATCH_F64_2, REG_WORK3, 0);
|
||||
VCVTIto64_ds(SCRATCH_F64_1, SCRATCH_F32_1);
|
||||
|
@ -2037,13 +2028,8 @@ MIDFUNC(3,jff_DIVS,(W4 d, RR4 s1, RR4 s2))
|
|||
|
||||
// Signal exception 5
|
||||
MOV_ri(REG_WORK1, 5);
|
||||
#ifdef ARMV6T2
|
||||
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
|
||||
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
|
||||
#else
|
||||
auto offs = data_long_offs((uae_u32)(&jit_exception));
|
||||
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
|
||||
#endif
|
||||
MOVW_ri16(REG_WORK2, (uae_u32)(&jit_exception));
|
||||
MOVT_ri16(REG_WORK2, ((uae_u32)(&jit_exception)) >> 16);
|
||||
STR_rR(REG_WORK1, REG_WORK2);
|
||||
|
||||
// simplified flag handling for div0: set Z and V (for signed DIV: Z only)
|
||||
|
@ -2057,7 +2043,7 @@ MIDFUNC(3,jff_DIVS,(W4 d, RR4 s1, RR4 s2))
|
|||
#else
|
||||
B_i(25); // end_of_op
|
||||
|
||||
// src is not 0
|
||||
// src is not 0
|
||||
VMOVi_from_ARM_dr(SCRATCH_F64_1, s1, 0);
|
||||
VMOVi_from_ARM_dr(SCRATCH_F64_2, REG_WORK3, 0);
|
||||
VCVTIto64_ds(SCRATCH_F64_1, SCRATCH_F32_1);
|
||||
|
@ -2299,6 +2285,8 @@ MIDFUNC(3,jff_DIVLS32,(RW4 d, RR4 s1, W4 rem))
|
|||
}
|
||||
MENDFUNC(3,jff_DIVLS32,(RW4 d, RR4 s1, W4 rem))
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* EOR
|
||||
* Operand Syntax: Dn, <ea>
|
||||
|
@ -3242,11 +3230,23 @@ MIDFUNC(2,jnf_MOVE16,(RR4 d, RR4 s))
|
|||
ADD_rrr(s, s, REG_WORK1);
|
||||
ADD_rrr(d, d, REG_WORK1);
|
||||
|
||||
#ifdef ARMV6T2
|
||||
LDRD_rR(REG_WORK1, s);
|
||||
STRD_rR(REG_WORK1, d);
|
||||
|
||||
LDRD_rRI(REG_WORK1, s, 8);
|
||||
STRD_rRI(REG_WORK1, d, 8);
|
||||
#else
|
||||
LDR_rR(REG_WORK1, s);
|
||||
LDR_rRI(REG_WORK2, s, 4);
|
||||
STR_rR(REG_WORK1, d);
|
||||
STR_rRI(REG_WORK2, d, 4);
|
||||
|
||||
LDR_rRI(REG_WORK1, s, 8);
|
||||
LDR_rRI(REG_WORK2, s, 12);
|
||||
STR_rRI(REG_WORK1, d, 8);
|
||||
STR_rRI(REG_WORK2, d, 12);
|
||||
#endif
|
||||
|
||||
POP_REGS((1 << s) | (1 << d));
|
||||
|
||||
|
@ -4499,7 +4499,12 @@ MIDFUNC(3,jff_ROXL_b,(W4 d, RR4 s, RR4 i))
|
|||
CC_MOV_ri(NATIVE_CC_CC, x, 0);
|
||||
|
||||
// Calc N and Z
|
||||
#ifdef ARMV6T2
|
||||
BFI_rrii(d, x, 8, 8); // Make sure to set carry (last bit shifted out)
|
||||
#else
|
||||
BIC_rri(d, d, 0x100);
|
||||
ORR_rrrLSLi(d, d, x, 8);
|
||||
#endif
|
||||
LSLS_rri(REG_WORK1, d, 24);
|
||||
|
||||
// end of op
|
||||
|
@ -4549,7 +4554,12 @@ MIDFUNC(3,jff_ROXL_w,(W4 d, RR4 s, RR4 i))
|
|||
CC_MOV_ri(NATIVE_CC_CC, x, 0);
|
||||
|
||||
// Calc N and Z
|
||||
#ifdef ARMV6T2
|
||||
BFI_rrii(d, x, 16, 16); // Make sure to set carry (last bit shifted out)
|
||||
#else
|
||||
BIC_rri(d, d, 0x10000);
|
||||
ORR_rrrLSLi(d, d, x, 16);
|
||||
#endif
|
||||
LSLS_rri(REG_WORK1, d, 16);
|
||||
|
||||
// end of op
|
||||
|
|
|
@ -44,7 +44,6 @@
|
|||
#include "compemu.h"
|
||||
#include <SDL.h>
|
||||
|
||||
#define DEBUG 0
|
||||
|
||||
#if DEBUG
|
||||
#define PROFILE_COMPILE_TIME 1
|
||||
|
@ -134,7 +133,6 @@ static int redo_current_block;
|
|||
uae_u8* current_compile_p = NULL;
|
||||
static uae_u8* max_compile_start;
|
||||
uae_u8* compiled_code = NULL;
|
||||
const int POPALLSPACE_SIZE = 512; /* That should be enough space */
|
||||
uae_u8 *popallspace = NULL;
|
||||
|
||||
void* pushall_call_handler = NULL;
|
||||
|
@ -926,7 +924,7 @@ static int alloc_reg_hinted(int r, int size, int willclobber, int hint)
|
|||
bestreg = -1;
|
||||
when = 2000000000;
|
||||
|
||||
for (i=0; i<N_REGS; i++) {
|
||||
for (i = N_REGS - 1; i >= 0; i--) {
|
||||
badness = live.nat[i].touched;
|
||||
if (live.nat[i].nholds == 0)
|
||||
badness = 0;
|
||||
|
@ -1248,6 +1246,7 @@ static int rmw(int r, int wsize, int rsize)
|
|||
/********************************************************************
|
||||
* FPU register status handling. EMIT TIME! *
|
||||
********************************************************************/
|
||||
#ifdef USE_JIT_FPU
|
||||
|
||||
STATIC_INLINE void f_tomem_drop(int r)
|
||||
{
|
||||
|
@ -1382,6 +1381,7 @@ static void fflags_into_flags_internal(void)
|
|||
live_flags();
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(CPU_arm)
|
||||
#include "compemu_midfunc_arm.cpp"
|
||||
|
@ -2599,7 +2599,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
|
|||
compemu_raw_mov_l_rm(0, (uintptr)specflags);
|
||||
compemu_raw_test_l_rr(0, 0);
|
||||
#if defined(CPU_arm) && !defined(ARMV6T2)
|
||||
data_check_end(8, 56);
|
||||
data_check_end(8, 64);
|
||||
#endif
|
||||
compemu_raw_jz_b_oponly();
|
||||
branchadd = (uae_s8 *)get_target();
|
||||
|
@ -2609,6 +2609,9 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
|
|||
*(branchadd - 4) = (((uintptr)get_target() - (uintptr)branchadd) - 4) >> 2;
|
||||
}
|
||||
} else if(may_raise_exception) {
|
||||
#if defined(CPU_arm) && !defined(ARMV6T2)
|
||||
data_check_end(8, 64);
|
||||
#endif
|
||||
compemu_raw_handle_except(scaled_cycles(totcycles));
|
||||
may_raise_exception = false;
|
||||
}
|
||||
|
@ -2637,6 +2640,9 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
|
|||
}
|
||||
|
||||
tmp = live; /* ouch! This is big... */
|
||||
#if defined(CPU_arm) && !defined(ARMV6T2)
|
||||
data_check_end(8, 128);
|
||||
#endif
|
||||
compemu_raw_jcc_l_oponly(cc); // Last emitted opcode is branch to target
|
||||
branchadd = (uae_u32*)get_target() - 1;
|
||||
|
||||
|
@ -2644,9 +2650,6 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
|
|||
tbi = get_blockinfo_addr_new((void*)t1);
|
||||
match_states(tbi);
|
||||
|
||||
#if defined(CPU_arm) && !defined(ARMV6T2)
|
||||
data_check_end(4, 56);
|
||||
#endif
|
||||
tba = compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), t1);
|
||||
write_jmp_target(tba, get_handler(t1));
|
||||
create_jmpdep(bi, 0, tba, t1);
|
||||
|
@ -2658,9 +2661,6 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
|
|||
match_states(tbi);
|
||||
|
||||
//flush(1); /* Can only get here if was_comp==1 */
|
||||
#if defined(CPU_arm) && !defined(ARMV6T2)
|
||||
data_check_end(4, 56);
|
||||
#endif
|
||||
tba = compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), t2);
|
||||
write_jmp_target(tba, get_handler(t2));
|
||||
create_jmpdep(bi, 1, tba, t2);
|
||||
|
@ -2674,7 +2674,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
|
|||
/* Let's find out where next_handler is... */
|
||||
if (was_comp && isinreg(PC_P)) {
|
||||
#if defined(CPU_arm) && !defined(ARMV6T2)
|
||||
data_check_end(4, 52);
|
||||
data_check_end(4, 64);
|
||||
#endif
|
||||
r = live.state[PC_P].realreg;
|
||||
compemu_raw_endblock_pc_inreg(r, scaled_cycles(totcycles));
|
||||
|
@ -2688,7 +2688,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
|
|||
match_states(tbi);
|
||||
|
||||
#if defined(CPU_arm) && !defined(ARMV6T2)
|
||||
data_check_end(4, 56);
|
||||
data_check_end(4, 64);
|
||||
#endif
|
||||
tba = compemu_raw_endblock_pc_isconst(scaled_cycles(totcycles), v);
|
||||
write_jmp_target(tba, get_handler(v));
|
||||
|
@ -2698,7 +2698,7 @@ void compile_block(cpu_history* pc_hist, int blocklen, int totcycles)
|
|||
r = REG_PC_TMP;
|
||||
compemu_raw_mov_l_rm(r, (uintptr)®s.pc_p);
|
||||
#if defined(CPU_arm) && !defined(ARMV6T2)
|
||||
data_check_end(4, 52);
|
||||
data_check_end(4, 64);
|
||||
#endif
|
||||
compemu_raw_endblock_pc_inreg(r, scaled_cycles(totcycles));
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue