916 lines
24 KiB
C++
916 lines
24 KiB
C++
/*
|
|
* compiler/codegen_arm.cpp - ARM code generator
|
|
*
|
|
* Copyright (c) 2013 Jens Heitmann of ARAnyM dev team (see AUTHORS)
|
|
*
|
|
* Inspired by Christian Bauer's Basilisk II
|
|
*
|
|
* This file is part of the ARAnyM project which builds a new and powerful
|
|
* TOS/FreeMiNT compatible virtual machine running on almost any hardware.
|
|
*
|
|
* JIT compiler m68k -> ARM
|
|
*
|
|
* Original 68040 JIT compiler for UAE, copyright 2000-2002 Bernd Meyer
|
|
* Adaptation for Basilisk II and improvements, copyright 2000-2004 Gwenole Beauchesne
|
|
* Portions related to CPU detection come from linux/arch/i386/kernel/setup.c
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 2 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
*
|
|
* Current state:
|
|
* - Experimental
|
|
* - Still optimizable
|
|
* - Not clock cycle optimized
|
|
* - as a first step this compiler emulates x86 instruction to be compatible
|
|
* with gencomp. Better would be a specialized version of gencomp compiling
|
|
* 68k instructions to ARM compatible instructions. This is a step for the
|
|
* future
|
|
*
|
|
*/
|
|
|
|
#include "flags_arm.h"
|
|
|
|
// Declare the built-in __clear_cache function.
|
|
extern void __clear_cache (char*, char*);
|
|
|
|
/*************************************************************************
|
|
* Some basic information about the the target CPU *
|
|
*************************************************************************/
|
|
|
|
#define R0_INDEX 0
|
|
#define R1_INDEX 1
|
|
#define R2_INDEX 2
|
|
#define R3_INDEX 3
|
|
#define R4_INDEX 4
|
|
#define R5_INDEX 5
|
|
#define R6_INDEX 6
|
|
#define R7_INDEX 7
|
|
#define R8_INDEX 8
|
|
#define R9_INDEX 9
|
|
#define R10_INDEX 10
|
|
#define R11_INDEX 11
|
|
#define R12_INDEX 12
|
|
#define R13_INDEX 13
|
|
#define R14_INDEX 14
|
|
#define R15_INDEX 15
|
|
|
|
#define RSP_INDEX 13
|
|
#define RLR_INDEX 14
|
|
#define RPC_INDEX 15
|
|
|
|
/* The register in which subroutines return an integer return value */
|
|
#define REG_RESULT R0_INDEX
|
|
|
|
/* The registers subroutines take their first and second argument in */
|
|
#define REG_PAR1 R0_INDEX
|
|
#define REG_PAR2 R1_INDEX
|
|
|
|
#define REG_WORK1 R2_INDEX
|
|
#define REG_WORK2 R3_INDEX
|
|
|
|
//#define REG_DATAPTR R10_INDEX
|
|
|
|
#define REG_PC_PRE R0_INDEX /* The register we use for preloading regs.pc_p */
|
|
#define REG_PC_TMP R1_INDEX /* Another register that is not the above */
|
|
|
|
#define MUL_NREG1 R0_INDEX /* %r4 will hold the low 32 bits after a 32x32 mul */
|
|
#define MUL_NREG2 R1_INDEX /* %r5 will hold the high 32 bits */
|
|
|
|
#define STACK_ALIGN 4
|
|
#define STACK_OFFSET sizeof(void *)
|
|
|
|
#define R_REGSTRUCT 11
|
|
uae_s8 always_used[]={2,3,R_REGSTRUCT,12,-1}; // r12 is scratch register in C functions calls, I don't think it's save to use it here...
|
|
|
|
uae_u8 call_saved[]={0,0,0,0, 1,1,1,1, 1,1,1,1, 0,1,1,1};
|
|
|
|
/* This *should* be the same as call_saved. But:
|
|
- We might not really know which registers are saved, and which aren't,
|
|
so we need to preserve some, but don't want to rely on everyone else
|
|
also saving those registers
|
|
- Special registers (such like the stack pointer) should not be "preserved"
|
|
by pushing, even though they are "saved" across function calls
|
|
*/
|
|
static const uae_u8 need_to_preserve[]={0,0,0,0, 1,1,1,1, 1,1,1,1, 1,0,0,0};
|
|
static const uae_u32 PRESERVE_MASK = ((1<<R4_INDEX)|(1<<R5_INDEX)|(1<<R6_INDEX)|(1<<R7_INDEX)|(1<<R8_INDEX)|(1<<R9_INDEX)
|
|
|(1<<R10_INDEX)|(1<<R11_INDEX)|(1<<R12_INDEX));
|
|
|
|
#include "codegen_arm.h"
|
|
|
|
|
|
static inline void UNSIGNED8_IMM_2_REG(W4 r, IMM v) {
|
|
MOV_ri8(r, (uae_u8) v);
|
|
}
|
|
|
|
static inline void SIGNED8_IMM_2_REG(W4 r, IMM v) {
|
|
if (v & 0x80) {
|
|
MVN_ri8(r, (uae_u8) ~v);
|
|
} else {
|
|
MOV_ri8(r, (uae_u8) v);
|
|
}
|
|
}
|
|
|
|
static inline void UNSIGNED16_IMM_2_REG(W4 r, IMM v) {
|
|
MOV_ri8(r, (uae_u8) v);
|
|
ORR_rri8RORi(r, r, (uae_u8)(v >> 8), 24);
|
|
}
|
|
|
|
static inline void SIGNED16_IMM_2_REG(W4 r, IMM v) {
|
|
uae_s32 offs = data_long_offs((uae_s32)(uae_s16) v);
|
|
LDR_rRI(r, RPC_INDEX, offs);
|
|
}
|
|
|
|
static inline void UNSIGNED8_REG_2_REG(W4 d, RR4 s) {
|
|
UXTB_rr(d, s);
|
|
}
|
|
|
|
static inline void SIGNED8_REG_2_REG(W4 d, RR4 s) {
|
|
SXTB_rr(d, s);
|
|
}
|
|
|
|
static inline void UNSIGNED16_REG_2_REG(W4 d, RR4 s) {
|
|
UXTH_rr(d, s);
|
|
}
|
|
|
|
static inline void SIGNED16_REG_2_REG(W4 d, RR4 s) {
|
|
SXTH_rr(d, s);
|
|
}
|
|
|
|
#define ZERO_EXTEND_8_REG_2_REG(d,s) UNSIGNED8_REG_2_REG(d,s)
|
|
#define ZERO_EXTEND_16_REG_2_REG(d,s) UNSIGNED16_REG_2_REG(d,s)
|
|
#define SIGN_EXTEND_8_REG_2_REG(d,s) SIGNED8_REG_2_REG(d,s)
|
|
#define SIGN_EXTEND_16_REG_2_REG(d,s) SIGNED16_REG_2_REG(d,s)
|
|
|
|
|
|
#define jit_unimplemented(fmt, ...) do{ panicbug("**** Unimplemented ****\n"); panicbug(fmt, ## __VA_ARGS__); abort(); }while (0)
|
|
|
|
LOWFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, RR4 s))
|
|
{
|
|
ADD_rrr(d, d, s);
|
|
}
|
|
LENDFUNC(WRITE,NONE,2,raw_add_l,(RW4 d, RR4 s))
|
|
|
|
LOWFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
|
|
{
|
|
if(i >= 0 && i < 256) {
|
|
ADD_rri(d, d, i);
|
|
} else {
|
|
uae_s32 offs = data_long_offs(i);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
ADD_rrr(d, d, REG_WORK1);
|
|
}
|
|
}
|
|
LENDFUNC(WRITE,NONE,2,raw_add_l_ri,(RW4 d, IMM i))
|
|
|
|
LOWFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, RR4 s, IMM offset))
|
|
{
|
|
uae_s32 offs = data_long_offs(offset);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs]
|
|
ADD_rrr(d, s, REG_WORK1); // add r7, r6, r2
|
|
}
|
|
LENDFUNC(NONE,NONE,3,raw_lea_l_brr,(W4 d, RR4 s, IMM offset))
|
|
|
|
LOWFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset))
|
|
{
|
|
int shft;
|
|
switch(factor) {
|
|
case 1: shft=0; break;
|
|
case 2: shft=1; break;
|
|
case 4: shft=2; break;
|
|
case 8: shft=3; break;
|
|
default: abort();
|
|
}
|
|
|
|
SIGNED8_IMM_2_REG(REG_WORK1, offset);
|
|
|
|
ADD_rrr(REG_WORK1, s, REG_WORK1);
|
|
ADD_rrrLSLi(d, REG_WORK1, index, shft);
|
|
}
|
|
LENDFUNC(NONE,NONE,5,raw_lea_l_brr_indexed,(W4 d, RR4 s, RR4 index, IMM factor, IMM offset))
|
|
|
|
LOWFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor))
|
|
{
|
|
int shft;
|
|
switch(factor) {
|
|
case 1: shft=0; break;
|
|
case 2: shft=1; break;
|
|
case 4: shft=2; break;
|
|
case 8: shft=3; break;
|
|
default: abort();
|
|
}
|
|
|
|
ADD_rrrLSLi(d, s, index, shft);
|
|
}
|
|
LENDFUNC(NONE,NONE,4,raw_lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor))
|
|
|
|
LOWFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
|
|
{
|
|
BIC_rri(d, d, 0xff); // bic %[d], %[d], #0xff
|
|
ORR_rri(d, d, (s & 0xff)); // orr %[d], %[d], #%[s]
|
|
}
|
|
LENDFUNC(NONE,NONE,2,raw_mov_b_ri,(W1 d, IMM s))
|
|
|
|
LOWFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, RR1 s))
|
|
{
|
|
BFI_rrii(d, s, 0, 7); // bfi %[d], %[s], 0, 7
|
|
}
|
|
LENDFUNC(NONE,NONE,2,raw_mov_b_rr,(W1 d, RR1 s))
|
|
|
|
LOWFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
|
|
{
|
|
if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) {
|
|
uae_s32 offs = data_long_offs(s);
|
|
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
|
|
uae_s32 idx = d - (uae_u32) & regs;
|
|
STR_rRI(REG_WORK2, R_REGSTRUCT, idx);
|
|
} else {
|
|
data_check_end(8, 12);
|
|
uae_s32 offs = data_long_offs(d);
|
|
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d
|
|
|
|
offs = data_long_offs(s);
|
|
LDR_rRI(REG_WORK2, RPC_INDEX, offs); // ldr r3, [pc, #offs] ; s
|
|
|
|
STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2]
|
|
}
|
|
}
|
|
LENDFUNC(NONE,WRITE,2,raw_mov_l_mi,(MEMW d, IMM s))
|
|
|
|
LOWFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
|
|
{
|
|
uae_s32 offs = data_word_offs(s);
|
|
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
|
|
|
|
PKHBT_rrr(d, REG_WORK2, d);
|
|
}
|
|
LENDFUNC(NONE,NONE,2,raw_mov_w_ri,(W2 d, IMM s))
|
|
|
|
LOWFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s))
|
|
{
|
|
if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) {
|
|
uae_s32 idx = d - (uae_u32) ®s;
|
|
STR_rRI(s, R_REGSTRUCT, idx);
|
|
} else {
|
|
uae_s32 offs = data_long_offs(d);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs]
|
|
STR_rR(s, REG_WORK1); // str r3, [r2]
|
|
}
|
|
}
|
|
LENDFUNC(NONE,WRITE,2,raw_mov_l_mr,(IMM d, RR4 s))
|
|
|
|
LOWFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, RR2 s))
|
|
{
|
|
PKHBT_rrr(d, s, d);
|
|
}
|
|
LENDFUNC(NONE,NONE,2,raw_mov_w_rr,(W2 d, RR2 s))
|
|
|
|
LOWFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
|
|
{
|
|
LSL_rri(r,r, i & 0x1f);
|
|
}
|
|
LENDFUNC(WRITE,NONE,2,raw_shll_l_ri,(RW4 r, IMM i))
|
|
|
|
LOWFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
|
|
{
|
|
if(i >= 0 && i<256) {
|
|
SUB_rri(d, d, i);
|
|
} else {
|
|
uae_s32 offs = data_long_offs(i);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
|
|
SUB_rrr(d, d, REG_WORK1);
|
|
}
|
|
}
|
|
LENDFUNC(WRITE,NONE,2,raw_sub_l_ri,(RW4 d, IMM i))
|
|
|
|
LOWFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
|
|
{
|
|
// This function is only called with i = 1
|
|
// Caller needs flags...
|
|
|
|
LSL_rri(REG_WORK2, d, 16);
|
|
|
|
SUBS_rri(REG_WORK2, REG_WORK2, (i & 0xff) << 16);
|
|
PKHTB_rrrASRi(d, d, REG_WORK2, 16);
|
|
|
|
MRS_CPSR(REG_WORK1); // mrs r2, CPSR
|
|
EOR_rri(REG_WORK1, REG_WORK1, ARM_C_FLAG); // eor r2, r2, #0x20000000
|
|
MSR_CPSR_r(REG_WORK1); // msr CPSR_fc, r2
|
|
}
|
|
LENDFUNC(WRITE,NONE,2,raw_sub_w_ri,(RW2 d, IMM i))
|
|
|
|
LOWFUNC(WRITE,NONE,2,raw_test_eq_w_rr,(RR2 d, RR2 s))
|
|
{
|
|
// Only EQ/NE relevant for caller -> compare without handle carry
|
|
LSL_rri(REG_WORK2, d, 16);
|
|
TST_rrLSLi(REG_WORK2, s, 16);
|
|
}
|
|
LENDFUNC(WRITE,NONE,2,raw_test_eq_w_rr,(RR2 d, RR2 s))
|
|
|
|
|
|
static inline void raw_dec_sp(int off)
|
|
{
|
|
if (off) {
|
|
if(off >= 0 && off < 256) {
|
|
SUB_rri(RSP_INDEX, RSP_INDEX, off);
|
|
} else {
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, 4);
|
|
SUB_rrr(RSP_INDEX, RSP_INDEX, REG_WORK1);
|
|
B_i(0);
|
|
//<value>:
|
|
emit_long(off);
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline void raw_inc_sp(int off)
|
|
{
|
|
if (off) {
|
|
if(off >= 0 && off < 256) {
|
|
ADD_rri(RSP_INDEX, RSP_INDEX, off);
|
|
} else {
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, 4);
|
|
ADD_rrr(RSP_INDEX, RSP_INDEX, REG_WORK1);
|
|
B_i(0);
|
|
//<value>:
|
|
emit_long(off);
|
|
}
|
|
}
|
|
}
|
|
|
|
static inline void raw_push_regs_to_preserve(void) {
|
|
PUSH_REGS(PRESERVE_MASK);
|
|
}
|
|
|
|
static inline void raw_pop_preserved_regs(void) {
|
|
POP_REGS(PRESERVE_MASK);
|
|
}
|
|
|
|
static inline void raw_load_flagx(uae_u32 t, uae_u32 r)
|
|
{
|
|
LDR_rRI(t, R_REGSTRUCT, 17 * 4); // X flag are next to 8 Dregs, 8 Aregs and CPSR in struct regstruct
|
|
}
|
|
|
|
static inline void raw_flags_evicted(int r)
|
|
{
|
|
live.state[FLAGTMP].status = INMEM;
|
|
live.state[FLAGTMP].realreg = -1;
|
|
/* We just "evicted" FLAGTMP. */
|
|
if (live.nat[r].nholds != 1) {
|
|
/* Huh? */
|
|
abort();
|
|
}
|
|
live.nat[r].nholds = 0;
|
|
}
|
|
|
|
static inline void raw_flags_init(void) {
|
|
}
|
|
|
|
static inline void raw_flags_to_reg(int r)
|
|
{
|
|
MRS_CPSR(r);
|
|
STR_rRI(r, R_REGSTRUCT, 16 * 4);
|
|
raw_flags_evicted(r);
|
|
}
|
|
|
|
static inline void raw_reg_to_flags(int r)
|
|
{
|
|
MSR_CPSR_r(r);
|
|
}
|
|
|
|
static inline void raw_load_flagreg(uae_u32 t, uae_u32 r)
|
|
{
|
|
LDR_rRI(t, R_REGSTRUCT, 16 * 4); // Flags are next to 8 Dregs and 8 Aregs in struct regstruct
|
|
}
|
|
|
|
/* %eax register is clobbered if target processor doesn't support fucomi */
|
|
#define FFLAG_NREG_CLOBBER_CONDITION 0
|
|
#define FFLAG_NREG R0_INDEX
|
|
#define FLAG_NREG2 -1
|
|
#define FLAG_NREG1 -1
|
|
#define FLAG_NREG3 -1
|
|
|
|
static inline void raw_fflags_into_flags(int r)
|
|
{
|
|
jit_unimplemented("raw_fflags_into_flags %x", r);
|
|
}
|
|
|
|
static inline void raw_fp_init(void)
|
|
{
|
|
#ifdef USE_JIT_FPU
|
|
int i;
|
|
|
|
for (i=0;i<N_FREGS;i++)
|
|
live.spos[i]=-2;
|
|
live.tos=-1; /* Stack is empty */
|
|
#endif
|
|
}
|
|
|
|
// Verify
|
|
static inline void raw_fp_cleanup_drop(void)
|
|
{
|
|
#ifdef USE_JIT_FPU
|
|
D(panicbug("raw_fp_cleanup_drop"));
|
|
|
|
while (live.tos>=1) {
|
|
// emit_byte(0xde);
|
|
// emit_byte(0xd9);
|
|
live.tos-=2;
|
|
}
|
|
while (live.tos>=0) {
|
|
// emit_byte(0xdd);
|
|
// emit_byte(0xd8);
|
|
live.tos--;
|
|
}
|
|
raw_fp_init();
|
|
#endif
|
|
}
|
|
|
|
LOWFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
|
|
{
|
|
jit_unimplemented("raw_fmov_mr_drop %x %x", m, r);
|
|
}
|
|
LENDFUNC(NONE,WRITE,2,raw_fmov_mr_drop,(MEMW m, FR r))
|
|
|
|
LOWFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
|
|
{
|
|
jit_unimplemented("raw_fmov_mr %x %x", m, r);
|
|
}
|
|
LENDFUNC(NONE,WRITE,2,raw_fmov_mr,(MEMW m, FR r))
|
|
|
|
LOWFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
|
|
{
|
|
jit_unimplemented("raw_fmov_rm %x %x", r, m);
|
|
}
|
|
LENDFUNC(NONE,READ,2,raw_fmov_rm,(FW r, MEMR m))
|
|
|
|
LOWFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
|
|
{
|
|
jit_unimplemented("raw_fmov_rr %x %x", d, s);
|
|
}
|
|
LENDFUNC(NONE,NONE,2,raw_fmov_rr,(FW d, FR s))
|
|
|
|
static inline void raw_emit_nop_filler(int nbytes)
|
|
{
|
|
nbytes >>= 2;
|
|
while(nbytes--) { NOP(); }
|
|
}
|
|
|
|
static inline void raw_emit_nop(void)
|
|
{
|
|
NOP();
|
|
}
|
|
|
|
static void raw_init_cpu(void)
|
|
{
|
|
align_loops = 0;
|
|
align_jumps = 0;
|
|
|
|
raw_flags_init();
|
|
}
|
|
|
|
//
|
|
// Arm instructions
|
|
//
|
|
LOWFUNC(WRITE,NONE,2,raw_ADD_l_rr,(RW4 d, RR4 s))
|
|
{
|
|
ADD_rrr(d, d, s);
|
|
}
|
|
LENDFUNC(WRITE,NONE,2,raw_ADD_l_rr,(RW4 d, RR4 s))
|
|
|
|
LOWFUNC(WRITE,NONE,2,raw_ADD_l_rri,(RW4 d, RR4 s, IMM i))
|
|
{
|
|
ADD_rri(d, s, i);
|
|
}
|
|
LENDFUNC(WRITE,NONE,2,raw_ADD_l_rri,(RW4 d, RR4 s, IMM i))
|
|
|
|
LOWFUNC(WRITE,NONE,2,raw_SUB_l_rri,(RW4 d, RR4 s, IMM i))
|
|
{
|
|
SUB_rri(d, s, i);
|
|
}
|
|
LENDFUNC(WRITE,NONE,2,raw_SUB_l_rri,(RW4 d, RR4 s, IMM i))
|
|
|
|
LOWFUNC(WRITE,NONE,2,raw_LDR_l_ri,(RW4 d, IMM i))
|
|
{
|
|
uae_s32 offs = data_long_offs(i);
|
|
LDR_rRI(d, RPC_INDEX, offs);
|
|
}
|
|
LENDFUNC(WRITE,NONE,2,raw_LDR_l_ri,(RW4 d, IMM i))
|
|
|
|
//
|
|
// compuemu_support used raw calls
|
|
//
|
|
LOWFUNC(WRITE,RMW,2,compemu_raw_add_l_mi,(IMM d, IMM s))
|
|
{
|
|
data_check_end(8, 24);
|
|
uae_s32 target = data_long(d, 24);
|
|
uae_s32 offs = get_data_offset(target);
|
|
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d
|
|
LDR_rR(REG_WORK2, REG_WORK1); // ldr r3, [r2]
|
|
|
|
offs = data_long_offs(s);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; s
|
|
|
|
ADD_rrr(REG_WORK2, REG_WORK2, REG_WORK1); // adds r3, r3, r2
|
|
|
|
offs = get_data_offset(target);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs); // ldr r2, [pc, #offs] ; d
|
|
STR_rR(REG_WORK2, REG_WORK1); // str r3, [r2]
|
|
}
|
|
LENDFUNC(WRITE,RMW,2,compemu_raw_add_l_mi,(IMM d, IMM s))
|
|
|
|
LOWFUNC(WRITE,NONE,2,compemu_raw_and_TAGMASK,(RW4 d))
|
|
{
|
|
// TAGMASK is 0x0000ffff
|
|
BFC_rii(d, 16, 31);
|
|
}
|
|
LENDFUNC(WRITE,NONE,2,compemu_raw_and_TAGMASK,(RW4 d))
|
|
|
|
LOWFUNC(NONE,NONE,1,compemu_raw_bswap_32,(RW4 r))
|
|
{
|
|
REV_rr(r,r); // rev %[r],%[r]
|
|
}
|
|
LENDFUNC(NONE,NONE,1,compemu_raw_bswap_32,(RW4 r))
|
|
|
|
LOWFUNC(WRITE,READ,2,compemu_raw_cmp_l_mi,(MEMR d, IMM s))
|
|
{
|
|
clobber_flags();
|
|
|
|
if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) {
|
|
uae_s32 offs = data_long_offs(s);
|
|
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
|
|
uae_s32 idx = d - (uae_u32) & regs;
|
|
LDR_rRI(REG_WORK1, R_REGSTRUCT, idx);
|
|
} else {
|
|
data_check_end(8, 16);
|
|
uae_s32 offs = data_long_offs(d);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
LDR_rR(REG_WORK1, REG_WORK1);
|
|
|
|
offs = data_long_offs(s);
|
|
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
|
|
}
|
|
CMP_rr(REG_WORK1, REG_WORK2);
|
|
}
|
|
LENDFUNC(WRITE,READ,2,compemu_raw_cmp_l_mi,(MEMR d, IMM s))
|
|
|
|
LOWFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset))
|
|
{
|
|
if(offset >= 0 && offset < 256) {
|
|
ADD_rri(d, s, offset);
|
|
} else {
|
|
uae_s32 offs = data_long_offs(offset);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
ADD_rrr(d, s, REG_WORK1);
|
|
}
|
|
}
|
|
LENDFUNC(NONE,NONE,3,compemu_raw_lea_l_brr,(W4 d, RR4 s, IMM offset))
|
|
|
|
LOWFUNC(NONE,NONE,4,compemu_raw_lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor))
|
|
{
|
|
int shft;
|
|
switch(factor) {
|
|
case 1: shft=0; break;
|
|
case 2: shft=1; break;
|
|
case 4: shft=2; break;
|
|
case 8: shft=3; break;
|
|
default: abort();
|
|
}
|
|
|
|
ADD_rrrLSLi(d, s, index, shft);
|
|
}
|
|
LENDFUNC(NONE,NONE,4,compemu_raw_lea_l_rr_indexed,(W4 d, RR4 s, RR4 index, IMM factor))
|
|
|
|
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s))
|
|
{
|
|
if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) {
|
|
uae_s32 idx = d - (uae_u32) & regs;
|
|
STRB_rRI(s, R_REGSTRUCT, idx);
|
|
} else {
|
|
uae_s32 offs = data_long_offs(d);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
STRB_rR(s, REG_WORK1);
|
|
}
|
|
}
|
|
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_b_mr,(IMM d, RR1 s))
|
|
|
|
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s))
|
|
{
|
|
if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) {
|
|
uae_s32 offs = data_long_offs(s);
|
|
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
|
|
uae_s32 idx = d - (uae_u32) & regs;
|
|
STR_rRI(REG_WORK2, R_REGSTRUCT, idx);
|
|
} else {
|
|
data_check_end(8, 12);
|
|
uae_s32 offs = data_long_offs(d);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
offs = data_long_offs(s);
|
|
LDR_rRI(REG_WORK2, RPC_INDEX, offs);
|
|
STR_rR(REG_WORK2, REG_WORK1);
|
|
}
|
|
}
|
|
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mi,(MEMW d, IMM s))
|
|
|
|
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s))
|
|
{
|
|
if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) {
|
|
uae_s32 idx = d - (uae_u32) & regs;
|
|
STR_rRI(s, R_REGSTRUCT, idx);
|
|
} else {
|
|
uae_s32 offs = data_long_offs(d);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
STR_rR(s, REG_WORK1);
|
|
}
|
|
}
|
|
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_l_mr,(IMM d, RR4 s))
|
|
|
|
LOWFUNC(NONE,NONE,2,compemu_raw_mov_l_ri,(W4 d, IMM s))
|
|
{
|
|
uae_s32 offs = data_long_offs(s);
|
|
LDR_rRI(d, RPC_INDEX, offs);
|
|
}
|
|
LENDFUNC(NONE,NONE,2,compemu_raw_mov_l_ri,(W4 d, IMM s))
|
|
|
|
LOWFUNC(NONE,READ,2,compemu_raw_mov_l_rm,(W4 d, MEMR s))
|
|
{
|
|
if(s >= (uae_u32) ®s && s < ((uae_u32) ®s) + sizeof(struct regstruct)) {
|
|
uae_s32 idx = s - (uae_u32) & regs;
|
|
LDR_rRI(d, R_REGSTRUCT, idx);
|
|
} else {
|
|
uae_s32 offs = data_long_offs(s);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
LDR_rR(d, REG_WORK1);
|
|
}
|
|
}
|
|
LENDFUNC(NONE,READ,2,compemu_raw_mov_l_rm,(W4 d, MEMR s))
|
|
|
|
LOWFUNC(NONE,NONE,2,compemu_raw_mov_l_rr,(W4 d, RR4 s))
|
|
{
|
|
MOV_rr(d, s);
|
|
}
|
|
LENDFUNC(NONE,NONE,2,compemu_raw_mov_l_rr,(W4 d, RR4 s))
|
|
|
|
LOWFUNC(NONE,WRITE,2,compemu_raw_mov_w_mr,(IMM d, RR2 s))
|
|
{
|
|
if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) {
|
|
uae_s32 idx = d - (uae_u32) & regs;
|
|
STRH_rRI(s, R_REGSTRUCT, idx);
|
|
} else {
|
|
uae_s32 offs = data_long_offs(d);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
STRH_rR(s, REG_WORK1);
|
|
}
|
|
}
|
|
LENDFUNC(NONE,WRITE,2,compemu_raw_mov_w_mr,(IMM d, RR2 s))
|
|
|
|
LOWFUNC(WRITE,RMW,2,compemu_raw_sub_l_mi,(MEMRW d, IMM s))
|
|
{
|
|
clobber_flags();
|
|
|
|
if(s >= 0 && s < 254) {
|
|
if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) {
|
|
uae_s32 idx = d - (uae_u32) & regs;
|
|
LDR_rRI(REG_WORK2, R_REGSTRUCT, idx);
|
|
SUBS_rri(REG_WORK2, REG_WORK2, s);
|
|
STR_rRI(REG_WORK2, R_REGSTRUCT, idx);
|
|
} else {
|
|
uae_s32 offs = data_long_offs(d);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
LDR_rR(REG_WORK2, REG_WORK1);
|
|
SUBS_rri(REG_WORK2, REG_WORK2, s);
|
|
STR_rR(REG_WORK2, REG_WORK1);
|
|
}
|
|
} else {
|
|
if(d >= (uae_u32) ®s && d < ((uae_u32) ®s) + sizeof(struct regstruct)) {
|
|
uae_s32 idx = d - (uae_u32) & regs;
|
|
LDR_rRI(REG_WORK2, R_REGSTRUCT, idx);
|
|
|
|
uae_s32 offs = data_long_offs(s);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
SUBS_rrr(REG_WORK2, REG_WORK2, REG_WORK1);
|
|
|
|
STR_rRI(REG_WORK2, R_REGSTRUCT, idx);
|
|
} else {
|
|
data_check_end(8, 24);
|
|
uae_s32 target = data_long(d, 24);
|
|
uae_s32 offs = get_data_offset(target);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
LDR_rR(REG_WORK2, REG_WORK1);
|
|
|
|
offs = data_long_offs(s);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
|
|
SUBS_rrr(REG_WORK2, REG_WORK2, REG_WORK1);
|
|
|
|
offs = get_data_offset(target);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
STR_rR(REG_WORK2, REG_WORK1);
|
|
}
|
|
}
|
|
}
|
|
LENDFUNC(WRITE,RMW,2,compemu_raw_sub_l_mi,(MEMRW d, IMM s))
|
|
|
|
LOWFUNC(WRITE,NONE,2,compemu_raw_test_l_rr,(RR4 d, RR4 s))
|
|
{
|
|
clobber_flags();
|
|
TST_rr(d, s);
|
|
}
|
|
LENDFUNC(WRITE,NONE,2,compemu_raw_test_l_rr,(RR4 d, RR4 s))
|
|
|
|
LOWFUNC(NONE,NONE,2,compemu_raw_zero_extend_16_rr,(W4 d, RR2 s))
|
|
{
|
|
UXTH_rr(d, s);
|
|
}
|
|
LENDFUNC(NONE,NONE,2,compemu_raw_zero_extend_16_rr,(W4 d, RR2 s))
|
|
|
|
static inline void compemu_raw_call(uae_u32 t)
|
|
{
|
|
uae_s32 offs = data_long_offs(t);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
PUSH(RLR_INDEX);
|
|
BLX_r(REG_WORK1);
|
|
POP(RLR_INDEX);
|
|
}
|
|
|
|
static inline void compemu_raw_call_r(RR4 r)
|
|
{
|
|
PUSH(RLR_INDEX);
|
|
BLX_r(r);
|
|
POP(RLR_INDEX);
|
|
}
|
|
|
|
static inline void compemu_raw_jcc_l_oponly(int cc)
|
|
{
|
|
switch (cc) {
|
|
case 9: // LS
|
|
BEQ_i(0); // beq <dojmp>
|
|
BCC_i(2); // bcc <jp>
|
|
|
|
//<dojmp>:
|
|
LDR_rR(REG_WORK1, RPC_INDEX); // ldr r2, [pc] ; <value>
|
|
BX_r(REG_WORK1); // bx r2
|
|
break;
|
|
|
|
case 8: // HI
|
|
BEQ_i(3); // beq <jp>
|
|
BCS_i(2); // bcs <jp>
|
|
|
|
//<dojmp>:
|
|
LDR_rR(REG_WORK1, RPC_INDEX); // ldr r2, [pc] ; <value>
|
|
BX_r(REG_WORK1); // bx r2
|
|
break;
|
|
|
|
default:
|
|
CC_LDR_rRI(cc, REG_WORK1, RPC_INDEX, 4); // ldrlt r2, [pc, #4] ; <value>
|
|
CC_BX_r(cc, REG_WORK1); // bxlt r2
|
|
B_i(0); // b <jp>
|
|
break;
|
|
}
|
|
// emit of target will be done by caller
|
|
}
|
|
|
|
static inline void compemu_raw_jl(uae_u32 t)
|
|
{
|
|
uae_s32 offs = data_long_offs(t);
|
|
CC_LDR_rRI(NATIVE_CC_LT, RPC_INDEX, RPC_INDEX, offs);
|
|
}
|
|
|
|
static inline void compemu_raw_jmp(uae_u32 t)
|
|
{
|
|
LDR_rR(REG_WORK1, RPC_INDEX);
|
|
BX_r(REG_WORK1);
|
|
emit_long(t);
|
|
}
|
|
|
|
static inline void compemu_raw_jmp_m_indexed(uae_u32 base, uae_u32 r, uae_u32 m)
|
|
{
|
|
int shft;
|
|
switch(m) {
|
|
case 1: shft=0; break;
|
|
case 2: shft=1; break;
|
|
case 4: shft=2; break;
|
|
case 8: shft=3; break;
|
|
default: abort();
|
|
}
|
|
|
|
LDR_rR(REG_WORK1, RPC_INDEX);
|
|
LDR_rRR_LSLi(RPC_INDEX, REG_WORK1, r, shft);
|
|
emit_long(base);
|
|
}
|
|
|
|
static inline void compemu_raw_jmp_r(RR4 r)
|
|
{
|
|
BX_r(r);
|
|
}
|
|
|
|
static inline void compemu_raw_jnz(uae_u32 t)
|
|
{
|
|
uae_s32 offs = data_long_offs(t);
|
|
CC_LDR_rRI(NATIVE_CC_NE, RPC_INDEX, RPC_INDEX, offs); // ldrne pc, [pc, offs]
|
|
}
|
|
|
|
static inline void compemu_raw_jz_b_oponly(void)
|
|
{
|
|
BNE_i(2); // bne jp
|
|
LDRSB_rRI(REG_WORK1, RPC_INDEX, 3); // ldrsb r2,[pc,#3]
|
|
ADD_rrr(RPC_INDEX, RPC_INDEX, REG_WORK1); // add pc,pc,r2
|
|
|
|
emit_byte(0);
|
|
emit_byte(0);
|
|
emit_byte(0);
|
|
|
|
// <jp:>
|
|
}
|
|
|
|
static inline void compemu_raw_branch(IMM d)
|
|
{
|
|
B_i((d >> 2) - 1);
|
|
}
|
|
|
|
|
|
// Optimize access to struct regstruct with r11
|
|
|
|
LOWFUNC(NONE,NONE,1,compemu_raw_init_r_regstruct,(IMM s))
|
|
{
|
|
uae_s32 offs = data_long_offs(s);
|
|
LDR_rRI(R_REGSTRUCT, RPC_INDEX, offs);
|
|
}
|
|
LENDFUNC(NONE,NONE,1,compemu_raw_init_r_regstruct,(IMM s))
|
|
|
|
|
|
// Handle end of compiled block
|
|
LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_inreg,(RR4 rr_pc, IMM cycles))
|
|
{
|
|
clobber_flags();
|
|
|
|
// countdown -= scaled_cycles(totcycles);
|
|
uae_s32 offs = (uae_u32)&countdown - (uae_u32)®s;
|
|
LDR_rRI(REG_WORK1, R_REGSTRUCT, offs);
|
|
if(cycles >= 0 && cycles < 256) {
|
|
SUBS_rri(REG_WORK1, REG_WORK1, cycles);
|
|
} else {
|
|
int offs2 = data_long_offs(cycles);
|
|
LDR_rRI(REG_WORK2, RPC_INDEX, offs2);
|
|
SUBS_rrr(REG_WORK1, REG_WORK1, REG_WORK2);
|
|
}
|
|
STR_rRI(REG_WORK1, R_REGSTRUCT, offs);
|
|
CC_B_i(NATIVE_CC_MI, 3);
|
|
|
|
BFC_rii(rr_pc, 16, 31); // apply TAGMASK
|
|
offs = data_long_offs((uintptr)cache_tags);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
LDR_rRR_LSLi(REG_WORK1, REG_WORK1, rr_pc, 2);
|
|
BX_r(REG_WORK1);
|
|
|
|
offs = data_long_offs((uintptr)popall_do_nothing);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
BX_r(REG_WORK1);
|
|
}
|
|
LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_inreg,(RR4 rr_pc, IMM cycles))
|
|
|
|
|
|
LOWFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
|
|
{
|
|
clobber_flags();
|
|
|
|
// countdown -= scaled_cycles(totcycles);
|
|
uae_s32 offs = (uae_u32)&countdown - (uae_u32)®s;
|
|
LDR_rRI(REG_WORK1, R_REGSTRUCT, offs);
|
|
if(cycles >= 0 && cycles < 256) {
|
|
SUBS_rri(REG_WORK1, REG_WORK1, cycles);
|
|
} else {
|
|
int offs2 = data_long_offs(cycles);
|
|
LDR_rRI(REG_WORK2, RPC_INDEX, offs2);
|
|
SUBS_rrr(REG_WORK1, REG_WORK1, REG_WORK2);
|
|
}
|
|
STR_rRI(REG_WORK1, R_REGSTRUCT, offs);
|
|
CC_B_i(NATIVE_CC_MI, 1);
|
|
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, 16); // get target
|
|
BX_r(REG_WORK1);
|
|
|
|
offs = data_long_offs(v);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
offs = (uae_u32)®s.pc_p - (uae_u32)®s;
|
|
STR_rRI(REG_WORK1, R_REGSTRUCT, offs);
|
|
offs = data_long_offs((uintptr)popall_do_nothing);
|
|
LDR_rRI(REG_WORK1, RPC_INDEX, offs);
|
|
BX_r(REG_WORK1);
|
|
|
|
// <target emitted by caller>
|
|
}
|
|
LENDFUNC(NONE,NONE,2,compemu_raw_endblock_pc_isconst,(IMM cycles, IMM v))
|