Compare commits

...
Sign in to create a new pull request.

7 commits

Author SHA1 Message Date
Flyinghead
187edde155 upgrade libretro savestate to v8. spg clean up 2019-11-05 23:18:36 +01:00
Flyinghead
8dc35a3916 use doubles to emulate FIPR on x86
fixes Sonic Adventure falling off the track in Windy Valley
2019-11-05 16:07:56 +01:00
Flyinghead
8766195f75 Merge remote-tracking branch 'origin/master' into fh/rec-doublefp 2019-11-04 22:38:47 +01:00
Flyinghead
e2c590c8a3 regalloc: convert 64-bit regs to 32-bit as needed
add size() method to shil_opcode
2019-11-02 20:28:08 +01:00
Flyinghead
06f61ef9a0 regalloc: allocate 64-bit registers for x64 and arm64 arch 2019-11-02 16:03:55 +01:00
Flyinghead
87c1840010 optimize read and write area7 handler. Simplify mem handlers template
Fix likely/unlikely macros. Add some to mmu and blockmanager
Fix verify macro
inline sh4_sched_now() and sh4_sched_now64()
shil: get rid of unused V2 and V3
2019-11-02 12:02:39 +01:00
Flyinghead
8de28dbe74 clean up unnecessary dtors, volatile. Add const. Missing init 2019-11-02 11:55:17 +01:00
36 changed files with 841 additions and 805 deletions

View file

@ -1627,20 +1627,20 @@ bool channel_unserialize(void **data, unsigned int *total_size, serialize_versio
REICAST_US(Chans[i].CA) ;
REICAST_US(Chans[i].step) ;
if (ver < V7)
if (ver != V8_LIBRETRO && ver < V7)
REICAST_US(dum); // Chans[i].update_rate
Chans[i].UpdatePitch();
REICAST_US(Chans[i].s0) ;
REICAST_US(Chans[i].s1) ;
REICAST_US(Chans[i].loop.looped);
if (ver < V7)
if (ver != V8_LIBRETRO && ver < V7)
{
REICAST_US(dum); // Chans[i].loop.LSA
REICAST_US(dum); // Chans[i].loop.LEA
}
Chans[i].UpdateLoop();
REICAST_US(Chans[i].adpcm.last_quant) ;
if (ver >= V7)
if (ver == V8_LIBRETRO || ver >= V7)
{
REICAST_US(Chans[i].adpcm.loopstart_quant);
REICAST_US(Chans[i].adpcm.loopstart_prev_sample);
@ -1653,21 +1653,21 @@ bool channel_unserialize(void **data, unsigned int *total_size, serialize_versio
Chans[i].adpcm.loopstart_prev_sample = 0;
}
REICAST_US(Chans[i].noise_state) ;
if (ver < V7)
if (ver != V8_LIBRETRO && ver < V7)
{
REICAST_US(dum); // Chans[i].VolMix.DLAtt
REICAST_US(dum); // Chans[i].VolMix.DRAtt
REICAST_US(dum); // Chans[i].VolMix.DSPAtt
}
Chans[i].UpdateAtts();
if (ver < V7)
if (ver != V8_LIBRETRO && ver < V7)
REICAST_US(dum); // Chans[i].VolMix.DSPOut
Chans[i].UpdateDSPMIX();
REICAST_US(Chans[i].AEG.val) ;
REICAST_US(Chans[i].AEG.state) ;
Chans[i].SetAegState(Chans[i].AEG.state);
if (ver < V7)
if (ver != V8_LIBRETRO && ver < V7)
{
REICAST_US(dum); // Chans[i].AEG.AttackRate
REICAST_US(dum); // Chans[i].AEG.Decay1Rate
@ -1678,7 +1678,7 @@ bool channel_unserialize(void **data, unsigned int *total_size, serialize_versio
Chans[i].UpdateAEG();
REICAST_US(Chans[i].FEG.value);
REICAST_US(Chans[i].FEG.state);
if (ver >= V7)
if (ver == V8_LIBRETRO || ver >= V7)
{
REICAST_US(Chans[i].FEG.prev1);
REICAST_US(Chans[i].FEG.prev2);
@ -1690,7 +1690,7 @@ bool channel_unserialize(void **data, unsigned int *total_size, serialize_versio
}
Chans[i].SetFegState(Chans[i].FEG.state);
Chans[i].UpdateFEG();
if (ver < V7)
if (ver != V8_LIBRETRO && ver < V7)
{
u8 dumu8;
REICAST_US(dumu8); // Chans[i].step_stream_lut1
@ -1700,10 +1700,10 @@ bool channel_unserialize(void **data, unsigned int *total_size, serialize_versio
Chans[i].UpdateStreamStep();
REICAST_US(Chans[i].lfo.counter) ;
if (ver < V7)
if (ver != V8_LIBRETRO && ver < V7)
REICAST_US(dum); // Chans[i].lfo.start_value
REICAST_US(Chans[i].lfo.state) ;
if (ver < V7)
if (ver != V8_LIBRETRO && ver < V7)
{
u8 dumu8;
REICAST_US(dumu8); // Chans[i].lfo.alfo
@ -1715,7 +1715,7 @@ bool channel_unserialize(void **data, unsigned int *total_size, serialize_versio
}
Chans[i].UpdateLFO();
REICAST_US(Chans[i].enabled) ;
if (ver < V7)
if (ver != V8_LIBRETRO && ver < V7)
REICAST_US(dum); // Chans[i].ChannelNumber
}

View file

@ -258,9 +258,10 @@ static void WriteBios(u32 addr,u32 data,u32 sz)
//use unified size handler for registers
//it really makes no sense to use different size handlers on em -> especially when we can use templates :p
template<u32 sz, class T>
template<typename T>
T DYNACALL ReadMem_area0(u32 addr)
{
const u32 sz = (u32)sizeof(T);
addr &= 0x01FFFFFF;//to get rid of non needed bits
const u32 base=(addr>>16);
//map 0x0000 to 0x01FF to Default handler
@ -343,9 +344,10 @@ T DYNACALL ReadMem_area0(u32 addr)
return 0;
}
template<u32 sz, class T>
template<class T>
void DYNACALL WriteMem_area0(u32 addr,T data)
{
const u32 sz = (u32)sizeof(T);
addr &= 0x01FFFFFF;//to get rid of non needed bits
const u32 base=(addr>>16);
@ -494,7 +496,7 @@ static _vmem_handler area0_handler;
void map_area0_init()
{
area0_handler = _vmem_register_handler_Template(ReadMem_area0,WriteMem_area0);
area0_handler = _vmem_register_handler_Template(ReadMem_area0, WriteMem_area0);
}
void map_area0(u32 base)
{

View file

@ -117,8 +117,8 @@ bool IsOnSh4Ram(u32 addr)
static void maple_DoDma()
{
verify(SB_MDEN &1)
verify(SB_MDST &1)
verify(SB_MDEN &1);
verify(SB_MDST &1);
DEBUG_LOG(MAPLE, "Maple: DoMapleDma SB_MDSTAR=%x", SB_MDSTAR);
u32 addr = SB_MDSTAR;

View file

@ -60,17 +60,8 @@ void _vmem_init_mappings();
//functions to register and map handlers/memory
_vmem_handler _vmem_register_handler(_vmem_ReadMem8FP* read8,_vmem_ReadMem16FP* read16,_vmem_ReadMem32FP* read32, _vmem_WriteMem8FP* write8,_vmem_WriteMem16FP* write16,_vmem_WriteMem32FP* write32);
#define _vmem_register_handler_Template(read,write) _vmem_register_handler \
(read<1,u8>,read<2,u16>,read<4,u32>, \
write<1,u8>,write<2,u16>,write<4,u32>)
#define _vmem_register_handler_Template1(read,write,extra_Tparam) _vmem_register_handler \
(read<1,u8,extra_Tparam>,read<2,u16,extra_Tparam>,read<4,u32,extra_Tparam>, \
write<1,u8,extra_Tparam>,write<2,u16,extra_Tparam>,write<4,u32,extra_Tparam>)
#define _vmem_register_handler_Template2(read,write,etp1,etp2) _vmem_register_handler \
(read<1,u8,etp1,etp2>,read<2,u16,etp1,etp2>,read<4,u32,etp1,etp2>, \
write<1,u8,etp1,etp2>,write<2,u16,etp1,etp2>,write<4,u32,etp1,etp2>)
#define _vmem_register_handler_Template(read, write) _vmem_register_handler(read<u8>, read<u16>, read<u32>, \
write<u8>, write<u16>, write<u32>)
void _vmem_map_handler(_vmem_handler Handler,u32 start,u32 end);
void _vmem_map_block(void* base,u32 start,u32 end,u32 mask);

View file

@ -268,7 +268,7 @@ static u32 vmem32_map_mmu(u32 address, bool write)
const vector<vram_lock>& blocks = vram_blocks[start / VRAM_PROT_SEGMENT];
vramlist_lock.Lock();
for (int i = blocks.size() - 1; i >= 0; i--)
for (int i = (int)blocks.size() - 1; i >= 0; i--)
{
if (blocks[i].start < end && blocks[i].end >= start)
{
@ -344,7 +344,7 @@ bool vmem32_handle_signal(void *fault_addr, bool write, u32 exception_pc)
if (!vmem32_inited || (u8*)fault_addr < virt_ram_base || (u8*)fault_addr >= virt_ram_base + VMEM32_SIZE)
return false;
//vmem32_page_faults++;
u32 guest_addr = (u8*)fault_addr - virt_ram_base;
u32 guest_addr = (u32)((u8*)fault_addr - virt_ram_base);
u32 rv = vmem32_map_address(guest_addr, write);
DEBUG_LOG(VMEM, "vmem32_handle_signal handled signal %s @ %p -> %08x rv=%d", write ? "W" : "R", fault_addr, guest_addr, rv);
if (rv == MMU_ERROR_NONE)

View file

@ -51,7 +51,7 @@ protected:
bool RomPioAutoIncrement;
u32 DmaOffset;
u32 DmaCount;
u32 key;
u32 key =0;
// Naomi 840-0001E communication board
u16 comm_ctrl = 0xC000;
u16 comm_offset = 0;

View file

@ -1,3 +1,7 @@
//SPG emulation; Scanline/Raster beam registers & interrupts
//Time to emulate that stuff correctly ;)
//
//
#include "spg.h"
#include "Renderer_if.h"
#include "pvr_regs.h"
@ -6,38 +10,31 @@
#include "hw/sh4/sh4_sched.h"
#include "input/gamepad_device.h"
//SPG emulation; Scanline/Raster beam registers & interrupts
//Time to emulate that stuff correctly ;)
u32 in_vblank=0;
u32 in_vblank;
u32 clc_pvr_scanline;
u32 pvr_numscanlines=512;
u32 prv_cur_scanline=-1;
u32 vblk_cnt=0;
static u32 pvr_numscanlines = 512;
static u32 prv_cur_scanline = -1;
static u32 vblk_cnt;
float last_fps=0;
//54 mhz pixel clock :)
#define PIXEL_CLOCK (54*1000*1000/2)
u32 Line_Cycles=0;
u32 Frame_Cycles=0;
static u32 Line_Cycles;
static u32 Frame_Cycles;
int render_end_schid;
int vblank_schid;
void CalculateSync()
{
u32 pixel_clock;
float scale_x=1,scale_y=1;
u32 pixel_clock = PIXEL_CLOCK / (FB_R_CTRL.vclk_div ? 1 : 2);
pixel_clock=PIXEL_CLOCK / (FB_R_CTRL.vclk_div?1:2);
//We need to calculate the pixel clock
u32 sync_cycles=(SPG_LOAD.hcount+1)*(SPG_LOAD.vcount+1);
pvr_numscanlines=SPG_LOAD.vcount+1;
Line_Cycles=(u32)((u64)SH4_MAIN_CLOCK*(u64)(SPG_LOAD.hcount+1)/(u64)pixel_clock);
float scale_x = 1;
float scale_y = 1;
if (SPG_CONTROL.interlace)
{
//this is a temp hack
@ -59,19 +56,15 @@ void CalculateSync()
rend_set_fb_scale(scale_x,scale_y);
//Frame_Cycles=(u64)DCclock*(u64)sync_cycles/(u64)pixel_clock;
Frame_Cycles=pvr_numscanlines*Line_Cycles;
prv_cur_scanline=0;
sh4_sched_request(vblank_schid,Line_Cycles);
}
double speed_load_mspdf;
int mips_counter;
double full_rps;
static double full_rps;
static u32 lightgun_line = 0xffff;
static u32 lightgun_hpos;

View file

@ -57,54 +57,52 @@ DynarecCodeEntryPtr DYNACALL bm_GetCodeByVAddr(u32 addr)
if (!mmu_enabled())
#endif
return bm_GetCode(addr);
#ifndef NO_MMU
else
{
if (addr & 1)
{
switch (addr)
{
#ifdef USE_WINCE_HACK
case 0xfffffde7: // GetTickCount
// This should make this syscall faster
r[0] = sh4_sched_now64() * 1000 / SH4_MAIN_CLOCK;
next_pc = pr;
break;
case 0xfffffd05: // QueryPerformanceCounter(u64 *)
#ifndef NO_MMU
if (unlikely(addr & 1))
{
switch (addr)
{
#ifdef USE_WINCE_HACK
case 0xfffffde7: // GetTickCount
// This should make this syscall faster
r[0] = sh4_sched_now64() * 1000 / SH4_MAIN_CLOCK;
next_pc = pr;
break;
case 0xfffffd05: // QueryPerformanceCounter(u64 *)
{
u32 paddr;
if (mmu_data_translation<MMU_TT_DWRITE, u64>(r[4], paddr) == MMU_ERROR_NONE)
{
u32 paddr;
if (mmu_data_translation<MMU_TT_DWRITE, u64>(r[4], paddr) == MMU_ERROR_NONE)
{
_vmem_WriteMem64(paddr, sh4_sched_now64() >> 4);
r[0] = 1;
next_pc = pr;
}
else
{
Do_Exception(addr, 0xE0, 0x100);
}
_vmem_WriteMem64(paddr, sh4_sched_now64() >> 4);
r[0] = 1;
next_pc = pr;
}
break;
else
{
Do_Exception(addr, 0xE0, 0x100);
}
}
break;
#endif
default:
Do_Exception(addr, 0xE0, 0x100);
break;
}
addr = next_pc;
default:
Do_Exception(addr, 0xE0, 0x100);
break;
}
u32 paddr;
u32 rv = mmu_instruction_translation(addr, paddr);
if (rv != MMU_ERROR_NONE)
{
DoMMUException(addr, rv, MMU_TT_IREAD);
mmu_instruction_translation(next_pc, paddr);
}
return bm_GetCode(paddr);
addr = next_pc;
}
u32 paddr;
u32 rv = mmu_instruction_translation(addr, paddr);
if (unlikely(rv != MMU_ERROR_NONE))
{
DoMMUException(addr, rv, MMU_TT_IREAD);
mmu_instruction_translation(next_pc, paddr);
}
return bm_GetCode(paddr);
#endif
}
@ -115,7 +113,7 @@ RuntimeBlockInfoPtr DYNACALL bm_GetBlock(u32 addr)
DynarecCodeEntryPtr cde = bm_GetCode(addr); // Returns RX ptr
if (cde == ngen_FailedToFindBlock)
return NULL;
return nullptr;
else
return bm_GetBlock((void*)cde); // Returns RX pointer
}
@ -124,18 +122,18 @@ RuntimeBlockInfoPtr DYNACALL bm_GetBlock(u32 addr)
RuntimeBlockInfoPtr bm_GetBlock(void* dynarec_code)
{
if (blkmap.empty())
return NULL;
return nullptr;
void *dynarecrw = CC_RX2RW(dynarec_code);
// Returns a block who's code addr is bigger than dynarec_code (or end)
auto iter = blkmap.upper_bound(dynarecrw);
if (iter == blkmap.begin())
return NULL;
return nullptr;
iter--; // Need to go back to find the potential candidate
// However it might be out of bounds, check for that
if ((u8*)iter->second->code + iter->second->host_code_size < (u8*)dynarec_code)
return NULL;
return nullptr;
verify(iter->second->contains_code((u8*)dynarecrw));
return iter->second;
@ -151,7 +149,7 @@ RuntimeBlockInfoPtr bm_GetStaleBlock(void* dynarec_code)
{
void *dynarecrw = CC_RX2RW(dynarec_code);
if (del_blocks.empty())
return NULL;
return nullptr;
// Start from the end to get the youngest one
auto it = del_blocks.end();
do
@ -161,7 +159,7 @@ RuntimeBlockInfoPtr bm_GetStaleBlock(void* dynarec_code)
return *it;
} while (it != del_blocks.begin());
return NULL;
return nullptr;
}
void bm_AddBlock(RuntimeBlockInfo* blk)
@ -587,8 +585,7 @@ void bm_RamWriteAccess(u32 addr)
unprotected_pages[addr / PAGE_SIZE] = true;
bm_UnlockPage(addr);
set<RuntimeBlockInfo*>& block_list = blocks_per_page[addr / PAGE_SIZE];
vector<RuntimeBlockInfo*> list_copy;
list_copy.insert(list_copy.begin(), block_list.begin(), block_list.end());
vector<RuntimeBlockInfo*> list_copy(block_list.begin(), block_list.end());
if (!list_copy.empty())
DEBUG_LOG(DYNAREC, "bm_RamWriteAccess write access to %08x pc %08x", addr, next_pc);
for (auto& block : list_copy)

View file

@ -393,7 +393,7 @@ struct RegAlloc
}
else
{
verify(regs.type==FMT_V4 || regs.type==FMT_V2 || regs.type==FMT_F64);
verify(regs.type==FMT_V4 || regs.type==FMT_F64);
for (u32 i=0; i<regs.count(); i++)
{

View file

@ -14,16 +14,12 @@ enum shil_param_type
FMT_F32,
FMT_F64,
FMT_V2,
FMT_V3,
FMT_V4,
FMT_V8,
FMT_V16,
FMT_REG_BASE=FMT_I32,
FMT_VECTOR_BASE=FMT_V2,
FMT_MASK=0xFFFF,
FMT_REG_BASE = FMT_I32,
FMT_VECTOR_BASE = FMT_V4,
};
/*
@ -123,11 +119,12 @@ struct shil_param
u32 imm_value() const { verify(is_imm()); return _imm; }
bool is_vector() const { return type>=FMT_VECTOR_BASE; }
bool is_vector() const { return type >= FMT_VECTOR_BASE; }
u32 count() const { return type==FMT_F64?2:type==FMT_V2?2:
type==FMT_V3?3:type==FMT_V4?4:type==FMT_V8?8:
type==FMT_V16?16:1; } //count of hardware regs
u32 count() const { return type == FMT_F64 ? 2
: type == FMT_V4 ? 4
: type == FMT_V8 ? 8
: type == FMT_V16 ? 16 : 1; } //count of hardware regs
/*
Imms:
@ -145,9 +142,7 @@ struct shil_param
struct shil_opcode
{
shilop op;
u32 Flow;
u32 flags;
u32 flags2;
shil_param rd,rd2;
shil_param rs1,rs2,rs3;
@ -157,6 +152,7 @@ struct shil_opcode
bool delay_slot;
string dissasm() const;
u32 size() const { return flags & 0x7f; }
};
const char* shil_opcode_name(int op);

View file

@ -914,18 +914,32 @@ shil_opc_end()
//shop_fipr
shil_opc(fipr)
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
shil_canonical
(
f32,f1,(float* fn, float* fm),
float idp;
idp=fn[0]*fm[0];
idp+=fn[1]*fm[1];
idp+=fn[2]*fm[2];
idp+=fn[3]*fm[3];
// Using double for better precision on x86 (Sonic Adventure 2)
double idp = (double)fn[0] * fm[0];
idp += (double)fn[1] * fm[1];
idp += (double)fn[2] * fm[2];
idp += (double)fn[3] * fm[3];
return fixNaN((float)idp);
)
#else
shil_canonical
(
f32,f1,(float* fn, float* fm),
float idp = fn[0] * fm[0];
idp += fn[1] * fm[1];
idp += fn[2] * fm[2];
idp += fn[3] * fm[3];
return fixNaN(idp);
)
#endif
shil_compile
(

View file

@ -223,7 +223,7 @@ private:
if (op.rs1.is_imm() && op.op == shop_readm && block->read_only
&& (op.rs1._imm >> 12) >= (block->vaddr >> 12)
&& (op.rs1._imm >> 12) <= ((block->vaddr + block->sh4_code_size - 1) >> 12)
&& (op.flags & 0x7f) <= 4)
&& op.size() <= 4)
{
bool doit = false;
if (mmu_enabled())
@ -240,7 +240,7 @@ private:
if (doit)
{
u32 v;
switch (op.flags & 0x7f)
switch (op.size())
{
case 1:
v = (s32)(::s8)ReadMem8(op.rs1._imm);
@ -566,7 +566,7 @@ private:
}
// Attempt to eliminate them
for (auto& alias : aliases)
for (const auto& alias : aliases)
{
if (writeback_values.count(alias.first) > 0)
continue;

View file

@ -28,7 +28,7 @@
#define ssa_printf(...) DEBUG_LOG(DYNAREC, __VA_ARGS__)
template<typename nreg_t, typename nregf_t, bool explode_spans = true>
template<typename nreg_t, typename nregf_t, bool _64bits = true>
class RegAlloc
{
public:
@ -52,6 +52,7 @@ public:
void OpBegin(shil_opcode* op, int opid)
{
// TODO dup code with NeedsWriteBack
opnum = opid;
if (op->op == shop_ifb)
{
@ -79,17 +80,17 @@ public:
FlushReg((Sh4RegType)i, true);
}
// Flush regs used by vector ops
if (op->rs1.is_reg() && op->rs1.count() > 1)
if (IsVector(op->rs1))
{
for (int i = 0; i < op->rs1.count(); i++)
FlushReg((Sh4RegType)(op->rs1._reg + i), false);
}
if (op->rs2.is_reg() && op->rs2.count() > 1)
if (IsVector(op->rs2))
{
for (int i = 0; i < op->rs2.count(); i++)
FlushReg((Sh4RegType)(op->rs2._reg + i), false);
}
if (op->rs3.is_reg() && op->rs3.count() > 1)
if (IsVector(op->rs3))
{
for (int i = 0; i < op->rs3.count(); i++)
FlushReg((Sh4RegType)(op->rs3._reg + i), false);
@ -101,7 +102,7 @@ public:
AllocSourceReg(op->rs3);
// Hard flush vector ops destination regs
// Note that this is incorrect if a reg is both src (scalar) and dest (vec). However such an op doesn't exist.
if (op->rd.is_reg() && op->rd.count() > 1)
if (IsVector(op->rd))
{
for (int i = 0; i < op->rd.count(); i++)
{
@ -109,7 +110,7 @@ public:
FlushReg((Sh4RegType)(op->rd._reg + i), true);
}
}
if (op->rd2.is_reg() && op->rd2.count() > 1)
if (IsVector(op->rd2))
{
for (int i = 0; i < op->rd2.count(); i++)
{
@ -171,41 +172,26 @@ public:
bool IsAllocAny(const shil_param& prm)
{
if (prm.is_reg())
{
bool rv = IsAllocAny(prm._reg);
if (prm.count() != 1)
{
for (u32 i = 1;i < prm.count(); i++)
verify(IsAllocAny((Sh4RegType)(prm._reg + i)) == rv);
}
return rv;
}
else
{
return false;
}
return IsAllocg(prm) || IsAllocf(prm);
}
bool IsAllocg(const shil_param& prm)
{
if (prm.is_reg())
if (prm.is_reg() && IsAllocg(prm._reg))
{
verify(prm.count() == 1);
return IsAllocg(prm._reg);
}
else
{
return false;
return true;
}
return false;
}
bool IsAllocf(const shil_param& prm)
{
if (prm.is_reg())
{
verify(prm.count() == 1);
return IsAllocf(prm._reg);
if (!_64bits && prm.is_r64f())
return false;
return IsAllocf(prm._reg, prm.count());
}
else
{
@ -223,7 +209,10 @@ public:
nregf_t mapf(const shil_param& prm)
{
verify(IsAllocf(prm));
verify(prm.count() == 1);
if (_64bits)
verify(prm.count() <= 2);
else
verify(prm.count() == 1);
return mapf(prm._reg);
}
@ -257,15 +246,20 @@ public:
virtual void Preload(u32 reg, nreg_t nreg) = 0;
virtual void Writeback(u32 reg, nreg_t nreg) = 0;
virtual void Preload_FPU(u32 reg, nregf_t nreg) = 0;
virtual void Writeback_FPU(u32 reg, nregf_t nreg) = 0;
virtual void Preload_FPU(u32 reg, nregf_t nreg, bool _64bit) = 0;
virtual void Writeback_FPU(u32 reg, nregf_t nreg, bool _64bit) = 0;
// merge reg1 (least significant 32 bits) and reg2 (most significant 32 bits) into reg1 (64-bit result)
virtual void Merge_FPU(nregf_t reg1, nregf_t reg2) { die("not implemented"); }
// shift given 64-bit reg right by 32 bits
virtual void Shift_FPU(nregf_t reg) { die("not implemented"); }
private:
struct reg_alloc {
u32 host_reg;
u16 version;
u16 version[2];
bool write_back;
bool dirty;
bool _64bit;
};
bool IsFloat(Sh4RegType reg)
@ -285,11 +279,15 @@ private:
return (nregf_t)reg_alloced[reg].host_reg;
}
bool IsAllocf(Sh4RegType reg)
bool IsAllocf(Sh4RegType reg, int size)
{
if (!IsFloat(reg))
return false;
return reg_alloced.find(reg) != reg_alloced.end();
auto it = reg_alloced.find(reg);
if (it == reg_alloced.end())
return false;
verify(it->second._64bit == (size == 2));
return true;
}
bool IsAllocg(Sh4RegType reg)
@ -299,9 +297,14 @@ private:
return reg_alloced.find(reg) != reg_alloced.end();
}
bool IsAllocAny(Sh4RegType reg)
bool IsVector(const shil_param& param)
{
return IsAllocg(reg) || IsAllocf(reg);
return param.is_reg() && param.count() > (_64bits ? 2 : 1);
}
bool ContainsReg(const shil_param& param, Sh4RegType reg)
{
return param.is_reg() && reg >= param._reg && reg < (Sh4RegType)(param._reg + param.count());
}
void WriteBackReg(Sh4RegType reg_num, struct reg_alloc& reg_alloc)
@ -310,9 +313,9 @@ private:
{
if (!fast_forwarding)
{
ssa_printf("WB %s.%d <- %cx", name_reg(reg_num).c_str(), reg_alloc.version, 'a' + reg_alloc.host_reg);
ssa_printf("WB %s.%d <- %cx", name_reg(reg_num).c_str(), reg_alloc.version[0], 'a' + reg_alloc.host_reg);
if (IsFloat(reg_num))
Writeback_FPU(reg_num, (nregf_t)reg_alloc.host_reg);
Writeback_FPU(reg_num, (nregf_t)reg_alloc.host_reg, reg_alloc._64bit);
else
Writeback(reg_num, (nreg_t)reg_alloc.host_reg);
}
@ -320,12 +323,14 @@ private:
reg_alloc.dirty = false;
}
}
void FlushReg(Sh4RegType reg_num, bool hard)
protected:
void FlushReg(Sh4RegType reg_num, bool hard, bool write_if_dirty = false)
{
auto reg = reg_alloced.find(reg_num);
if (reg != reg_alloced.end())
{
if (write_if_dirty && reg->second.dirty)
reg->second.write_back = true;
WriteBackReg(reg->first, reg->second);
if (hard)
{
@ -339,6 +344,7 @@ private:
}
}
private:
void FlushAllRegs(bool hard)
{
if (hard)
@ -355,8 +361,11 @@ private:
void AllocSourceReg(const shil_param& param)
{
if (param.is_reg() && param.count() == 1) // TODO EXPLODE_SPANS?
if (param.is_reg()
&& ((_64bits && param.count() <= 2) || (!_64bits && param.count() == 1)))
{
Handle64bitRegisters(param, true);
auto it = reg_alloced.find(param._reg);
if (it == reg_alloced.end())
{
@ -381,16 +390,24 @@ private:
host_reg = host_fregs.back();
host_fregs.pop_back();
}
reg_alloced[param._reg] = { host_reg, param.version[0], false, false };
if (param.is_r64f())
reg_alloced[param._reg] = { host_reg, { param.version[0], param.version[1] }, false, false, true };
else
reg_alloced[param._reg] = { host_reg, { param.version[0] }, false, false, false };
if (!fast_forwarding)
{
ssa_printf("PL %s.%d -> %cx", name_reg(param._reg).c_str(), param.version[0], 'a' + host_reg);
if (IsFloat(param._reg))
Preload_FPU(param._reg, (nregf_t)host_reg);
Preload_FPU(param._reg, (nregf_t)host_reg, param.count() == 2);
else
Preload(param._reg, (nreg_t)host_reg);
}
}
else
{
verify(it->second._64bit == (param.count() == 2));
}
verify(param.count() == 1 || reg_alloced.find((Sh4RegType)(param._reg + 1)) == reg_alloced.end());
}
}
@ -400,14 +417,29 @@ private:
{
shil_opcode* op = &block->oplist[i];
// if a subsequent op needs all or some regs flushed to mem
switch (op->op)
{
// TODO we could look at the ifb op to optimize what to flush
if (op->op == shop_ifb || (mmu_enabled() && (op->op == shop_readm || op->op == shop_writem || op->op == shop_pref)))
return true;
if (op->op == shop_sync_sr && (/*reg == reg_sr_T ||*/ reg == reg_sr_status || reg == reg_old_sr_status || (reg >= reg_r0 && reg <= reg_r7)
|| (reg >= reg_r0_Bank && reg <= reg_r7_Bank)))
return true;
if (op->op == shop_sync_fpscr && (reg == reg_fpscr || reg == reg_old_fpscr || (reg >= reg_fr_0 && reg <= reg_xf_15)))
case shop_ifb:
return true;
case shop_readm:
case shop_writem:
case shop_pref:
if (mmu_enabled())
return true;
break;
case shop_sync_sr:
if (/*reg == reg_sr_T ||*/ reg == reg_sr_status || reg == reg_old_sr_status || (reg >= reg_r0 && reg <= reg_r7)
|| (reg >= reg_r0_Bank && reg <= reg_r7_Bank))
return true;
break;
case shop_sync_fpscr:
if (reg == reg_fpscr || reg == reg_old_fpscr || (reg >= reg_fr_0 && reg <= reg_xf_15))
return true;
break;
default:
break;
}
// if reg is used by a subsequent vector op that doesn't use reg allocation
if (UsesReg(op, reg, version, true))
return true;
@ -423,8 +455,11 @@ private:
void AllocDestReg(const shil_param& param)
{
if (param.is_reg() && param.count() == 1) // TODO EXPLODE_SPANS?
if (param.is_reg()
&& ((_64bits && param.count() <= 2) || (!_64bits && param.count() == 1)))
{
Handle64bitRegisters(param, false);
auto it = reg_alloced.find(param._reg);
if (it == reg_alloced.end())
{
@ -449,7 +484,21 @@ private:
host_reg = host_fregs.back();
host_fregs.pop_back();
}
reg_alloced[param._reg] = { host_reg, param.version[0], NeedsWriteBack(param._reg, param.version[0]), true };
if (param.is_r64f())
reg_alloced[param._reg] = {
host_reg,
{ param.version[0], param.version[1] },
NeedsWriteBack(param._reg, param.version[0])
|| NeedsWriteBack((Sh4RegType)(param._reg + 1), param.version[1]),
true,
true };
else
reg_alloced[param._reg] = {
host_reg,
{ param.version[0] },
NeedsWriteBack(param._reg, param.version[0]),
true,
false };
ssa_printf(" %s.%d -> %cx %s", name_reg(param._reg).c_str(), param.version[0], 'a' + host_reg, reg_alloced[param._reg].write_back ? "(wb)" : "");
}
else
@ -458,9 +507,17 @@ private:
verify(!reg.write_back);
reg.write_back = NeedsWriteBack(param._reg, param.version[0]);
reg.dirty = true;
reg.version = param.version[0];
reg.version[0] = param.version[0];
verify(reg._64bit == param.is_r64f());
if (param.is_r64f())
{
reg.version[1] = param.version[1];
// TODO this is handled by Handle64BitsRegisters()
reg.write_back = reg.write_back || NeedsWriteBack((Sh4RegType)(param._reg + 1), param.version[1]);
}
}
verify(reg_alloced[param._reg].dirty);
verify(param.count() == 1 || reg_alloced.find((Sh4RegType)(param._reg + 1)) == reg_alloced.end());
}
}
@ -495,7 +552,8 @@ private:
{
op = &block->oplist[i];
// Vector ops don't use reg alloc
if (UsesReg(op, reg.first, reg.second.version, false))
if (UsesReg(op, reg.first, reg.second.version[0], false)
|| (reg.second._64bit && UsesReg(op, (Sh4RegType)(reg.first + 1), reg.second.version[1], false)))
{
first_use = i;
break;
@ -531,8 +589,9 @@ private:
// It's possible that the same host reg is allocated to a source operand
// and to the (future) dest operand. In this case we want to keep both mappings
// until the current op is done.
WriteBackReg(spilled_reg, reg_alloced[spilled_reg]);
u32 host_reg = reg_alloced[spilled_reg].host_reg;
reg_alloc& alloc = reg_alloced[spilled_reg];
WriteBackReg(spilled_reg, alloc);
u32 host_reg = alloc.host_reg;
if (IsFloat(spilled_reg))
host_fregs.push_front((nregf_t)host_reg);
else
@ -541,24 +600,19 @@ private:
}
}
bool IsVectorOp(shil_opcode* op)
{
return op->rs1.count() > 1 || op->rs2.count() > 1 || op->rs3.count() > 1 || op->rd.count() > 1 || op->rd2.count() > 1;
}
bool UsesReg(shil_opcode* op, Sh4RegType reg, u32 version, bool vector)
{
if (op->rs1.is_reg() && reg >= op->rs1._reg && reg < (Sh4RegType)(op->rs1._reg + op->rs1.count())
if (ContainsReg(op->rs1, reg)
&& version == op->rs1.version[reg - op->rs1._reg]
&& vector == (op->rs1.count() > 1))
&& vector == IsVector(op->rs1))
return true;
if (op->rs2.is_reg() && reg >= op->rs2._reg && reg < (Sh4RegType)(op->rs2._reg + op->rs2.count())
if (ContainsReg(op->rs2, reg)
&& version == op->rs2.version[reg - op->rs2._reg]
&& vector == (op->rs2.count() > 1))
&& vector == IsVector(op->rs2))
return true;
if (op->rs3.is_reg() && reg >= op->rs3._reg && reg < (Sh4RegType)(op->rs3._reg + op->rs3.count())
if (ContainsReg(op->rs3, reg)
&& version == op->rs3.version[reg - op->rs3._reg]
&& vector == (op->rs3.count() > 1))
&& vector == IsVector(op->rs3))
return true;
return false;
@ -566,14 +620,68 @@ private:
bool DefsReg(shil_opcode* op, Sh4RegType reg, bool vector)
{
if (op->rd.is_reg() && reg >= op->rd._reg && reg < (Sh4RegType)(op->rd._reg + op->rd.count())
&& vector == (op->rd.count() > 1))
if (ContainsReg(op->rd, reg) && vector == IsVector(op->rd))
return true;
if (op->rd2.is_reg() && reg >= op->rd2._reg && reg < (Sh4RegType)(op->rd2._reg + op->rd2.count())
&& vector == (op->rd2.count() > 1))
if (ContainsReg(op->rd2, reg) && vector == IsVector(op->rd2))
return true;
return false;
}
void Handle64bitRegisters(const shil_param& param, bool source)
{
if (!(_64bits && (param.is_r32f() || param.is_r64f())))
return;
auto it = reg_alloced.find(param._reg);
if (it != reg_alloced.end() && it->second._64bit != param.is_r64f())
{
if (param.is_r64f())
{
// Try to merge existing halves
auto it2 = reg_alloced.find((Sh4RegType)(param._reg + 1));
if (it2 != reg_alloced.end())
{
if (source)
it->second.dirty = it->second.dirty || it2->second.dirty;
else
it->second.dirty = false;
it->second._64bit = true;
nregf_t host_reg2 = (nregf_t)it2->second.host_reg;
reg_alloced.erase(it2);
Merge_FPU((nregf_t)it->second.host_reg, host_reg2);
return;
}
}
// Write back the 64-bit register even if used as destination because the other half needs to be saved
FlushReg(it->first, param.is_r64f(), source || it->second._64bit);
if (!param.is_r64f())
{
// Reuse existing reg
it->second._64bit = false;
}
}
if (param.is_r64f())
{
auto it2 = reg_alloced.find((Sh4RegType)(param._reg + 1));
if (it2 != reg_alloced.end())
FlushReg(it2->first, true, source);
}
else if (param._reg & 1)
{
auto it2 = reg_alloced.find((Sh4RegType)(param._reg - 1));
if (it2 != reg_alloced.end() && it2->second._64bit)
{
// Write back even when used as destination because the other half needs to be saved
FlushReg(it2->first, false, true);
reg_alloc alloc = it2->second;
Shift_FPU((nregf_t)alloc.host_reg);
alloc._64bit = false;
alloc.version[0] = alloc.version[1];
reg_alloced.erase(it2);
reg_alloced[param._reg] = alloc;
}
}
}
#if 0
// Currently unused. Doesn't seem to help much
bool DefsReg(int from, int to, Sh4RegType reg)

View file

@ -73,7 +73,7 @@ INLINE void Denorm32(float &value)
#define CHECK_FPU_32(v) v = fixNaN(v)
#define CHECK_FPU_64(v)
#define CHECK_FPU_64(v) v = fixNaN64(v)
//fadd <FREG_M>,<FREG_N>
@ -116,7 +116,7 @@ sh4op(i1111_nnnn_mmmm_0001)
double drn=GetDR(n), drm=GetDR(m);
drn-=drm;
//dr[n] -= dr[m];
CHECK_FPU_64(drn);
SetDR(n,drn);
}
}
@ -137,7 +137,7 @@ sh4op(i1111_nnnn_mmmm_0010)
double drn=GetDR(n), drm=GetDR(m);
drn*=drm;
//dr[n] *= dr[m];
CHECK_FPU_64(drn);
SetDR(n,drn);
}
}
@ -160,6 +160,7 @@ sh4op(i1111_nnnn_mmmm_0011)
double drn=GetDR(n), drm=GetDR(m);
drn/=drm;
CHECK_FPU_64(drn);
SetDR(n,drn);
}
}
@ -506,14 +507,20 @@ sh4op(i1111_nnmm_1110_1101)
int m=(GetN(op)&0x3)<<2;
if(fpscr.PR ==0)
{
float idp;
idp=fr[n+0]*fr[m+0];
idp+=fr[n+1]*fr[m+1];
idp+=fr[n+2]*fr[m+2];
idp+=fr[n+3]*fr[m+3];
CHECK_FPU_32(idp);
fr[n+3]=idp;
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
double idp = (double)fr[n + 0] * fr[m + 0];
idp += (double)fr[n + 1] * fr[m + 1];
idp += (double)fr[n + 2] * fr[m + 2];
idp += (double)fr[n + 3] * fr[m + 3];
float rv = (float)idp;
#else
float rv = fr[n + 0] * fr[m + 0];
rv += fr[n + 1] * fr[m + 1];
rv += fr[n + 2] * fr[m + 2];
rv += fr[n + 3] * fr[m + 3];
#endif
CHECK_FPU_32(rv);
fr[n + 3] = rv;
}
else
{
@ -598,7 +605,6 @@ sh4op(i1111_1011_1111_1101)
//fschg
sh4op(i1111_0011_1111_1101)
{
//iNimp("fschg");
fpscr.SZ = 1 - fpscr.SZ;
}
@ -616,8 +622,9 @@ sh4op(i1111_nnnn_0110_1101)
{
//Operation _can_ be done on sh4
u32 n = GetN(op)>>1;
SetDR(n,sqrt(GetDR(n)));
f64 v = sqrt(GetDR(n));
CHECK_FPU_64(v);
SetDR(n, v);
}
}
@ -656,7 +663,6 @@ sh4op(i1111_nnnn_0011_1101)
//fmac <FREG_0>,<FREG_M>,<FREG_N>
sh4op(i1111_nnnn_mmmm_1110)
{
//iNimp("fmac <FREG_0>,<FREG_M>,<FREG_N>");
if (fpscr.PR==0)
{
u32 n = GetN(op);
@ -675,8 +681,6 @@ sh4op(i1111_nnnn_mmmm_1110)
//ftrv xmtrx,<FV_N>
sh4op(i1111_nn01_1111_1101)
{
//iNimp("ftrv xmtrx,<FV_N>");
/*
XF[0] XF[4] XF[8] XF[12] FR[n] FR[n]
XF[1] XF[5] XF[9] XF[13] * FR[n+1] -> FR[n+1]

View file

@ -62,7 +62,7 @@ u32 mmu_full_lookup(u32 va, const TLB_Entry **entry, u32& rv);
#ifdef FAST_MMU
static INLINE u32 mmu_instruction_translation(u32 va, u32& rv)
{
if (va & 1)
if (unlikely(va & 1))
return MMU_ERROR_BADADDR;
if (fast_reg_lut[va >> 29] != 0)
{
@ -100,7 +100,7 @@ void DoMMUException(u32 addr, u32 error_code, u32 access_type);
{
u32 addr;
u32 rv = mmu_data_translation<MMU_TT_DREAD, T>(adr, addr);
if (rv != MMU_ERROR_NONE)
if (unlikely(rv != MMU_ERROR_NONE))
{
DoMMUException(adr, rv, MMU_TT_DREAD);
*exception_occurred = 1;
@ -118,7 +118,7 @@ void DoMMUException(u32 addr, u32 error_code, u32 access_type);
{
u32 addr;
u32 rv = mmu_data_translation<MMU_TT_DWRITE, T>(adr, addr);
if (rv != MMU_ERROR_NONE)
if (unlikely(rv != MMU_ERROR_NONE))
{
DoMMUException(adr, rv, MMU_TT_DWRITE);
return 1;

View file

@ -33,6 +33,7 @@ void ubc_term();
void tmu_init();
void tmu_reset(bool hard);
void tmu_term();
u32 read_TMU_TCNTch(u32 ch);
void ccn_init();
void ccn_reset();

View file

@ -276,7 +276,7 @@ struct Sh4Context
sr_status_t old_sr;
fpscr_t old_fpscr;
volatile u32 CpuRunning;
u32 CpuRunning;
int sh4_sched_next;
u32 interrupt_pend;

View file

@ -84,23 +84,23 @@ static void map_area4(u32 base)
//AREA 5 -- Ext. Device
//Read Ext.Device
template <u32 sz,class T>
template <typename T>
T DYNACALL ReadMem_extdev_T(u32 addr)
{
return (T)libExtDevice_ReadMem_A5(addr,sz);
return (T)libExtDevice_ReadMem_A5(addr, sizeof(T));
}
//Write Ext.Device
template <u32 sz,class T>
void DYNACALL WriteMem_extdev_T(u32 addr,T data)
template <typename T>
void DYNACALL WriteMem_extdev_T(u32 addr, T data)
{
libExtDevice_WriteMem_A5(addr,data,sz);
libExtDevice_WriteMem_A5(addr, data, sizeof(T));
}
_vmem_handler area5_handler;
static void map_area5_init()
{
area5_handler = _vmem_register_handler_Template(ReadMem_extdev_T,WriteMem_extdev_T);
area5_handler = _vmem_register_handler_Template(ReadMem_extdev_T, WriteMem_extdev_T);
}
static void map_area5(u32 base)

View file

@ -155,7 +155,7 @@ offset>>=2;
//Region P4
//Read P4
template <u32 sz,class T>
template <class T>
T DYNACALL ReadMem_P4(u32 addr)
{
switch((addr>>24)&0xFF)
@ -228,7 +228,7 @@ T DYNACALL ReadMem_P4(u32 addr)
}
//Write P4
template <u32 sz,class T>
template <class T>
void DYNACALL WriteMem_P4(u32 addr,T data)
{
/*if (((addr>>26)&0x7)==7)
@ -406,37 +406,40 @@ void DYNACALL WriteMem_sq(u32 addr,T data)
#define OUT_OF_RANGE(reg) INFO_LOG(SH4, "Out of range on register %s index %x", reg, addr)
//Read Area7
template <u32 sz,class T>
template <typename T>
T DYNACALL ReadMem_area7(u32 addr)
{
/*
if (likely(addr==0xffd80024))
// TMU TCNT0 is by far the most frequently read register (x100 the second most read)
if (likely(addr == 0xFFD8000C))
{
return TMU_TCNT(2);
//return (T)sh4_rio_read<sizeof(T)>(TMU, 0xC);
return (T)read_TMU_TCNTch(0);
}
else if (likely(addr==0xFFD8000C))
{
return TMU_TCNT(0);
}
else */if (likely(addr==0xFF000028))
else if (likely(addr == 0xFF000028))
{
return CCN_INTEVT;
}
else if (likely(addr==0xFFA0002C))
{
return DMAC_CHCR(2).full;
}
//else if (addr==)
//printf("%08X\n",addr);
addr&=0x1FFFFFFF;
u32 map_base=addr>>16;
u32 map_base = addr >> 16;
addr &= 0xFF;
switch (map_base & 0x1FFF)
{
case A7_REG_HASH(CCN_BASE_addr):
if (addr<=0x1F000044)
case A7_REG_HASH(TMU_BASE_addr):
if (addr <= 0x2C)
{
return (T)sh4_rio_read<sz>(CCN,addr & 0xFF);
return (T)sh4_rio_read<sizeof(T)>(TMU, addr);
}
else
{
OUT_OF_RANGE("TMU");
return 0;
}
break;
case A7_REG_HASH(CCN_BASE_addr):
if (addr <= 0x44)
{
return (T)sh4_rio_read<sizeof(T)>(CCN, addr);
}
else
{
@ -445,10 +448,46 @@ T DYNACALL ReadMem_area7(u32 addr)
}
break;
case A7_REG_HASH(UBC_BASE_addr):
if (addr<=0x1F200020)
case A7_REG_HASH(DMAC_BASE_addr):
if (addr <= 0x40)
{
return (T)sh4_rio_read<sz>(UBC,addr & 0xFF);
return (T)sh4_rio_read<sizeof(T)>(DMAC, addr);
}
else
{
OUT_OF_RANGE("DMAC");
return 0;
}
break;
case A7_REG_HASH(INTC_BASE_addr):
if (addr <= 0x10)
{
return (T)sh4_rio_read<sizeof(T)>(INTC, addr);
}
else
{
OUT_OF_RANGE("INTC");
return 0;
}
break;
case A7_REG_HASH(RTC_BASE_addr):
if (addr <= 0x3C)
{
return (T)sh4_rio_read<sizeof(T)>(RTC, addr);
}
else
{
OUT_OF_RANGE("RTC");
return 0;
}
break;
case A7_REG_HASH(UBC_BASE_addr):
if (addr <= 0x20)
{
return (T)sh4_rio_read<sizeof(T)>(UBC, addr);
}
else
{
@ -458,9 +497,9 @@ T DYNACALL ReadMem_area7(u32 addr)
break;
case A7_REG_HASH(BSC_BASE_addr):
if (addr<=0x1F800048)
if (addr <= 0x48)
{
return (T)sh4_rio_read<sz>(BSC,addr & 0xFF);
return (T)sh4_rio_read<sizeof(T)>(BSC, addr);
}
else
{
@ -477,24 +516,10 @@ T DYNACALL ReadMem_area7(u32 addr)
INFO_LOG(SH4, "Read from write-only registers [dram settings 3]");
return 0;
case A7_REG_HASH(DMAC_BASE_addr):
if (addr<=0x1FA00040)
{
return (T)sh4_rio_read<sz>(DMAC,addr & 0xFF);
}
else
{
OUT_OF_RANGE("DMAC");
return 0;
}
break;
case A7_REG_HASH(CPG_BASE_addr):
if (addr<=0x1FC00010)
if (addr <= 0x10)
{
return (T)sh4_rio_read<sz>(CPG,addr & 0xFF);
return (T)sh4_rio_read<sizeof(T)>(CPG, addr);
}
else
{
@ -503,46 +528,10 @@ T DYNACALL ReadMem_area7(u32 addr)
}
break;
case A7_REG_HASH(RTC_BASE_addr):
if (addr<=0x1FC8003C)
{
return (T)sh4_rio_read<sz>(RTC,addr & 0xFF);
}
else
{
OUT_OF_RANGE("RTC");
return 0;
}
break;
case A7_REG_HASH(INTC_BASE_addr):
if (addr<=0x1FD00010)
{
return (T)sh4_rio_read<sz>(INTC,addr & 0xFF);
}
else
{
OUT_OF_RANGE("INTC");
return 0;
}
break;
case A7_REG_HASH(TMU_BASE_addr):
if (addr<=0x1FD8002C)
{
return (T)sh4_rio_read<sz>(TMU,addr & 0xFF);
}
else
{
OUT_OF_RANGE("TMU");
return 0;
}
break;
case A7_REG_HASH(SCI_BASE_addr):
if (addr<=0x1FE0001C)
if (addr <= 0x1C)
{
return (T)sh4_rio_read<sz>(SCI,addr & 0xFF);
return (T)sh4_rio_read<sizeof(T)>(SCI, addr);
}
else
{
@ -552,9 +541,9 @@ T DYNACALL ReadMem_area7(u32 addr)
break;
case A7_REG_HASH(SCIF_BASE_addr):
if (addr<=0x1FE80024)
if (addr <= 0x24)
{
return (T)sh4_rio_read<sz>(SCIF,addr & 0xFF);
return (T)sh4_rio_read<sizeof(T)>(SCIF, addr);
}
else
{
@ -568,48 +557,66 @@ T DYNACALL ReadMem_area7(u32 addr)
switch(addr)
{
//UDI SDIR 0x1FF00000 0x1FF00000 16 0xFFFF Held Held Held Pclk
case UDI_SDIR_addr :
case (UDI_SDIR_addr & 0xff):
break;
//UDI SDDR 0x1FF00008 0x1FF00008 32 Held Held Held Held Pclk
case UDI_SDDR_addr :
case (UDI_SDDR_addr & 0xff):
break;
}
break;
}
INFO_LOG(SH4, "Unknown Read from Area7 - addr=%x", addr);
INFO_LOG(SH4, "Unknown Read from Area7 - addr=%x", (map_base << 16) | addr);
return 0;
}
//Write Area7
template <u32 sz,class T>
void DYNACALL WriteMem_area7(u32 addr,T data)
template <typename T>
void DYNACALL WriteMem_area7(u32 addr, T data)
{
if (likely(addr==0xFF000038))
if (likely(addr == 0xFF000038))
{
CCN_QACR_write<0>(addr,data);
CCN_QACR_write<0>(addr, data);
return;
}
else if (likely(addr==0xFF00003C))
else if (likely(addr == 0xFF00003C))
{
CCN_QACR_write<1>(addr,data);
CCN_QACR_write<1>(addr, data);
return;
}
//printf("%08X\n",addr);
addr&=0x1FFFFFFF;
u32 map_base=addr>>16;
u32 map_base = addr >> 16;
addr &= 0xFF;
switch (map_base & 0x1FFF)
{
case A7_REG_HASH(DMAC_BASE_addr):
if (addr <= 0x40)
{
sh4_rio_write<sizeof(T)>(DMAC, addr, data);
}
else
{
OUT_OF_RANGE("DMAC");
}
return;
case A7_REG_HASH(TMU_BASE_addr):
if (addr <= 0x2C)
{
sh4_rio_write<sizeof(T)>(TMU, addr, data);
}
else
{
OUT_OF_RANGE("TMU");
}
return;
case A7_REG_HASH(CCN_BASE_addr):
if (addr<=0x1F00003C)
if (addr <= 0x3C)
{
sh4_rio_write<sz>(CCN,addr & 0xFF,data);
sh4_rio_write<sizeof(T)>(CCN, addr, data);
}
else
{
@ -617,10 +624,21 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
}
return;
case A7_REG_HASH(UBC_BASE_addr):
if (addr<=0x1F200020)
case A7_REG_HASH(INTC_BASE_addr):
if (addr <= 0x0C)
{
sh4_rio_write<sz>(UBC,addr & 0xFF,data);
sh4_rio_write<sizeof(T)>(INTC, addr, data);
}
else
{
OUT_OF_RANGE("INTC");
}
return;
case A7_REG_HASH(UBC_BASE_addr):
if (addr <= 0x20)
{
sh4_rio_write<sizeof(T)>(UBC, addr, data);
}
else
{
@ -629,9 +647,9 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
return;
case A7_REG_HASH(BSC_BASE_addr):
if (addr<=0x1F800048)
if (addr <= 0x48)
{
sh4_rio_write<sz>(BSC,addr & 0xFF,data);
sh4_rio_write<sizeof(T)>(BSC, addr, data);
}
else
{
@ -646,21 +664,10 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
//dram settings 3 / write only
return;
case A7_REG_HASH(DMAC_BASE_addr):
if (addr<=0x1FA00040)
{
sh4_rio_write<sz>(DMAC,addr & 0xFF,data);
}
else
{
OUT_OF_RANGE("DMAC");
}
return;
case A7_REG_HASH(CPG_BASE_addr):
if (addr<=0x1FC00010)
if (addr <= 0x10)
{
sh4_rio_write<sz>(CPG,addr & 0xFF,data);
sh4_rio_write<sizeof(T)>(CPG, addr, data);
}
else
{
@ -669,9 +676,9 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
return;
case A7_REG_HASH(RTC_BASE_addr):
if (addr<=0x1FC8003C)
if (addr <= 0x3C)
{
sh4_rio_write<sz>(RTC,addr & 0xFF,data);
sh4_rio_write<sizeof(T)>(RTC, addr, data);
}
else
{
@ -679,32 +686,10 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
}
return;
case A7_REG_HASH(INTC_BASE_addr):
if (addr<=0x1FD0000C)
{
sh4_rio_write<sz>(INTC,addr & 0xFF,data);
}
else
{
OUT_OF_RANGE("INTC");
}
return;
case A7_REG_HASH(TMU_BASE_addr):
if (addr<=0x1FD8002C)
{
sh4_rio_write<sz>(TMU,addr & 0xFF,data);
}
else
{
OUT_OF_RANGE("TMU");
}
return;
case A7_REG_HASH(SCI_BASE_addr):
if (addr<=0x1FE0001C)
if (addr <= 0x1C)
{
sh4_rio_write<sz>(SCI,addr & 0xFF,data);
sh4_rio_write<sizeof(T)>(SCI, addr, data);
}
else
{
@ -713,9 +698,9 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
return;
case A7_REG_HASH(SCIF_BASE_addr):
if (addr<=0x1FE80024)
if (addr <= 0x24)
{
sh4_rio_write<sz>(SCIF,addr & 0xFF,data);
sh4_rio_write<sizeof(T)>(SCIF, addr, data);
}
else
{
@ -728,18 +713,18 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
switch(addr)
{
//UDI SDIR 0xFFF00000 0x1FF00000 16 0xFFFF Held Held Held Pclk
case UDI_SDIR_addr :
case (UDI_SDIR_addr & 0xff):
break;
//UDI SDDR 0xFFF00008 0x1FF00008 32 Held Held Held Held Pclk
case UDI_SDDR_addr :
case (UDI_SDDR_addr & 0xff):
break;
}
break;
}
INFO_LOG(SH4, "Write to Area7 not implemented, addr=%x, data=%x", addr, data);
INFO_LOG(SH4, "Write to Area7 not implemented, addr=%x, data=%x", (map_base << 16) | addr, data);
}
@ -747,22 +732,12 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
//On Chip Ram
//***********
//Read OCR
template <u32 sz,class T>
template <typename T>
T DYNACALL ReadMem_area7_OCR_T(u32 addr)
{
if (CCN_CCR.ORA)
{
if (sz==1)
return (T)OnChipRAM[addr&OnChipRAM_MASK];
else if (sz==2)
return (T)*(u16*)&OnChipRAM[addr&OnChipRAM_MASK];
else if (sz==4)
return (T)*(u32*)&OnChipRAM[addr&OnChipRAM_MASK];
else
{
ERROR_LOG(SH4, "ReadMem_area7_OCR_T: template SZ is wrong = %d", sz);
return 0xDE;
}
return *(T*)&OnChipRAM[addr & OnChipRAM_MASK];
}
else
{
@ -772,21 +747,12 @@ T DYNACALL ReadMem_area7_OCR_T(u32 addr)
}
//Write OCR
template <u32 sz,class T>
void DYNACALL WriteMem_area7_OCR_T(u32 addr,T data)
template <typename T>
void DYNACALL WriteMem_area7_OCR_T(u32 addr, T data)
{
if (CCN_CCR.ORA)
{
if (sz==1)
OnChipRAM[addr&OnChipRAM_MASK]=(u8)data;
else if (sz==2)
*(u16*)&OnChipRAM[addr&OnChipRAM_MASK]=(u16)data;
else if (sz==4)
*(u32*)&OnChipRAM[addr&OnChipRAM_MASK]=data;
else
{
ERROR_LOG(SH4, "WriteMem_area7_OCR_T: template SZ is wrong = %d", sz);
}
*(T*)&OnChipRAM[addr & OnChipRAM_MASK] = data;
}
else
{
@ -874,9 +840,9 @@ void map_area7_init()
// WriteMem8_area7,WriteMem16_area7,WriteMem32_area7);
//default area7 handler
area7_handler= _vmem_register_handler_Template(ReadMem_area7,WriteMem_area7);
area7_handler= _vmem_register_handler_Template(ReadMem_area7, WriteMem_area7);
area7_orc_handler= _vmem_register_handler_Template(ReadMem_area7_OCR_T,WriteMem_area7_OCR_T);
area7_orc_handler= _vmem_register_handler_Template(ReadMem_area7_OCR_T, WriteMem_area7_OCR_T);
}
void map_area7(u32 base)
{
@ -894,7 +860,7 @@ void map_area7(u32 base)
void map_p4()
{
//P4 Region :
_vmem_handler p4_handler = _vmem_register_handler_Template(ReadMem_P4,WriteMem_P4);
_vmem_handler p4_handler = _vmem_register_handler_Template(ReadMem_P4, WriteMem_P4);
//register this before area7 and SQ , so they overwrite it and handle em :)
//default P4 handler

View file

@ -75,21 +75,6 @@ int sh4_sched_register(int tag, sh4_sched_callback* ssc)
return sch_list.size()-1;
}
/*
Return current cycle count, in 32 bits (wraps after 21 dreamcast seconds)
*/
u32 sh4_sched_now()
{
return sh4_sched_ffb-Sh4cntx.sh4_sched_next;
}
/*
Return current cycle count, in 64 bits (effectively never wraps)
*/
u64 sh4_sched_now64()
{
return sh4_sched_ffb-Sh4cntx.sh4_sched_next;
}
void sh4_sched_request(int id, int cycles)
{
verify(cycles== -1 || (cycles >= 0 && cycles <= SH4_MAIN_CLOCK));

View file

@ -3,6 +3,8 @@
#include "types.h"
extern u64 sh4_sched_ffb;
/*
tag, as passed on sh4_sched_register
sch_cycles, the cycle duration that the callback requested (sh4_sched_request)
@ -17,16 +19,20 @@ typedef int sh4_sched_callback(int tag, int sch_cycl, int jitter);
int sh4_sched_register(int tag, sh4_sched_callback* ssc);
/*
current time in SH4 cycles, referenced to boot.
Wraps every ~21 secs
Return current cycle count, in 32 bits (wraps after 21 dreamcast seconds)
*/
u32 sh4_sched_now();
static inline u32 sh4_sched_now()
{
return sh4_sched_ffb - Sh4cntx.sh4_sched_next;
}
/*
current time, in SH4 cycles, referenced to boot.
Does not wrap, 64 bits.
Return current cycle count, in 64 bits (effectively never wraps)
*/
u64 sh4_sched_now64();
static inline u64 sh4_sched_now64()
{
return sh4_sched_ffb - Sh4cntx.sh4_sched_next;
}
/*
Schedule a callback to be called sh4 *cycles* after the

View file

@ -88,7 +88,7 @@ public:
static std::shared_ptr<EvdevGamepadDevice> GetControllerForPort(int port)
{
for (auto& pair : evdev_gamepads)
for (const auto& pair : evdev_gamepads)
if (pair.second->maple_port() == port)
return pair.second;
return NULL;
@ -104,7 +104,7 @@ public:
static void PollDevices()
{
for (auto& pair : evdev_gamepads)
for (const auto& pair : evdev_gamepads)
pair.second->read_input();
}

View file

@ -204,7 +204,7 @@ static void elf_syms(FILE* out,const char* libfile)
}
}
static volatile bool prof_run;
static bool prof_run;
// This is not used:
static int str_ends_with(const char * str, const char * suffix)

View file

@ -10,8 +10,8 @@ SoundFrame RingBuffer[SAMPLE_COUNT];
const u32 RingBufferByteSize = sizeof(RingBuffer);
const u32 RingBufferSampleCount = SAMPLE_COUNT;
volatile u32 WritePtr; //last WRITEN sample
volatile u32 ReadPtr; //next sample to read
u32 WritePtr; //last WRITEN sample
u32 ReadPtr; //next sample to read
u32 gen_samples=0;

View file

@ -234,12 +234,12 @@ eFSReg alloc_fpu[]={f16,f17,f18,f19,f20,f21,f22,f23,
struct arm_reg_alloc: RegAlloc<eReg,eFSReg,false>
{
virtual void Preload(u32 reg,eReg nreg)
virtual void Preload(u32 reg,eReg nreg) override
{
verify(reg!=reg_pc_dyn);
LoadSh4Reg_mem(nreg,reg);
}
virtual void Writeback(u32 reg,eReg nreg)
virtual void Writeback(u32 reg,eReg nreg) override
{
if (reg==reg_pc_dyn)
// reg_pc_dyn has been stored in r4 by the jdyn op implementation
@ -249,13 +249,13 @@ struct arm_reg_alloc: RegAlloc<eReg,eFSReg,false>
StoreSh4Reg_mem(nreg,reg);
}
virtual void Preload_FPU(u32 reg,eFSReg nreg)
virtual void Preload_FPU(u32 reg, eFSReg nreg, bool _64bits) override
{
const s32 shRegOffs = (u8*)GetRegPtr(reg)-sh4_dyna_rcb ;
VLDR((nreg),r8,shRegOffs/4);
}
virtual void Writeback_FPU(u32 reg,eFSReg nreg)
virtual void Writeback_FPU(u32 reg, eFSReg nreg, bool _64bits) override
{
const s32 shRegOffs = (u8*)GetRegPtr(reg)-sh4_dyna_rcb ;
@ -716,7 +716,7 @@ mem_op_type memop_type(shil_opcode* op)
{
int Lsz=-1;
int sz=op->flags&0x7f;
int sz = op->size();
bool fp32=op->rs2.is_r32f() || op->rd.is_r32f();

View file

@ -16,15 +16,8 @@
You should have received a copy of the GNU General Public License
along with reicast. If not, see <https://www.gnu.org/licenses/>.
*/
#ifndef CORE_REC_ARM64_ARM64_REGALLOC_H_
#define CORE_REC_ARM64_ARM64_REGALLOC_H_
#ifdef OLD_REGALLOC
#include "hw/sh4/dyna/regalloc.h"
#else
#pragma once
#include "hw/sh4/dyna/ssa_regalloc.h"
#endif
#include "deps/vixl/aarch64/macro-assembler-aarch64.h"
using namespace vixl::aarch64;
@ -42,11 +35,7 @@ static eFReg alloc_fregs[] = { S8, S9, S10, S11, S12, S13, S14, S15, (eFReg)-1 }
class Arm64Assembler;
struct Arm64RegAlloc : RegAlloc<eReg, eFReg
#ifndef EXPLODE_SPANS
, false
#endif
>
struct Arm64RegAlloc : RegAlloc<eReg, eFReg, true>
{
Arm64RegAlloc(Arm64Assembler *assembler) : assembler(assembler) {}
@ -57,8 +46,10 @@ struct Arm64RegAlloc : RegAlloc<eReg, eFReg
virtual void Preload(u32 reg, eReg nreg) override;
virtual void Writeback(u32 reg, eReg nreg) override;
virtual void Preload_FPU(u32 reg, eFReg nreg) override;
virtual void Writeback_FPU(u32 reg, eFReg nreg) override;
virtual void Preload_FPU(u32 reg, eFReg nreg, bool _64bit) override;
virtual void Writeback_FPU(u32 reg, eFReg nreg, bool _64bit) override;
virtual void Merge_FPU(eFReg reg1, eFReg reg2) override;
virtual void Shift_FPU(eFReg reg) override;
const Register& MapRegister(const shil_param& param)
{
@ -70,21 +61,15 @@ struct Arm64RegAlloc : RegAlloc<eReg, eFReg
const VRegister& MapVRegister(const shil_param& param, u32 index = 0)
{
#ifdef OLD_REGALLOC
eFReg ereg = mapfv(param, index);
#else
#ifdef EXPLODE_SPANS
#error EXPLODE_SPANS not supported with ssa regalloc
#endif
verify(index == 0);
eFReg ereg = mapf(param);
#endif
if (ereg == (eFReg)-1)
die("VRegister not allocated");
return VRegister::GetSRegFromCode(ereg);
if (param.is_r64f())
return VRegister::GetDRegFromCode(ereg);
else
return VRegister::GetSRegFromCode(ereg);
}
Arm64Assembler *assembler;
};
#endif /* CORE_REC_ARM64_ARM64_REGALLOC_H_ */

View file

@ -22,13 +22,10 @@
#if FEAT_SHREC == DYNAREC_JIT
#include <unistd.h>
#include <map>
#include <setjmp.h>
#include "deps/vixl/aarch64/macro-assembler-aarch64.h"
using namespace vixl::aarch64;
//#define EXPLODE_SPANS
//#define NO_BLOCK_LINKING
#include "hw/sh4/sh4_opcode_list.h"
@ -42,6 +39,8 @@ using namespace vixl::aarch64;
#include "hw/mem/vmem32.h"
#include "arm64_regalloc.h"
using namespace vixl::aarch64;
#undef do_sqw_nommu
extern "C" void ngen_blockcheckfail(u32 pc);
@ -483,13 +482,15 @@ public:
verify(op.rd.is_reg());
verify(op.rs1.is_reg() || op.rs1.is_imm());
#ifdef EXPLODE_SPANS
Fmov(regalloc.MapVRegister(op.rd, 0), regalloc.MapVRegister(op.rs1, 0));
Fmov(regalloc.MapVRegister(op.rd, 1), regalloc.MapVRegister(op.rs1, 1));
#else
shil_param_to_host_reg(op.rs1, x15);
host_reg_to_shil_param(op.rd, x15);
#endif
if (op.rs1.is_reg() && regalloc.IsAllocf(op.rs1))
{
Fmov(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1));
}
else
{
shil_param_to_host_reg(op.rs1, x15);
host_reg_to_shil_param(op.rd, x15);
}
break;
case shop_readm:
@ -935,7 +936,7 @@ public:
case shop_xtrct:
{
const Register rd = regalloc.MapRegister(op.rd);
const Register& rd = regalloc.MapRegister(op.rd);
Lsr(rd, regalloc.MapRegister(op.rs1), 16);
Lsl(w0, regalloc.MapRegister(op.rs2), 16);
Orr(rd, rd, w0);
@ -990,14 +991,17 @@ public:
if (op.rs1.is_reg())
Add(x1, x1, Operand(regalloc.MapRegister(op.rs1), UXTH, 3));
else
{
// TODO get rid of this Add if rs1 is imm. Use MemOperand with offset when !imm
Add(x1, x1, Operand(op.rs1.imm_value() << 3));
#ifdef EXPLODE_SPANS
Ldr(regalloc.MapVRegister(op.rd, 0), MemOperand(x1, 4, PostIndex));
Ldr(regalloc.MapVRegister(op.rd, 1), MemOperand(x1));
#else
Ldr(x2, MemOperand(x1));
Str(x2, sh4_context_mem_operand(op.rd.reg_ptr()));
#endif
}
if (regalloc.IsAllocf(op.rd))
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1));
else
{
Ldr(x2, MemOperand(x1));
Str(x2, sh4_context_mem_operand(op.rd.reg_ptr()));
}
break;
case shop_fipr:
@ -1605,23 +1609,10 @@ private:
if (mmu_enabled())
Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); // pc
u32 size = op.flags & 0x7f;
if (!optimise || !GenReadMemoryFast(op, opid))
GenReadMemorySlow(size);
GenReadMemorySlow(op.size());
if (size < 8)
host_reg_to_shil_param(op.rd, w0);
else
{
#ifdef EXPLODE_SPANS
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
Fmov(regalloc.MapVRegister(op.rd, 0), w0);
Lsr(x0, x0, 32);
Fmov(regalloc.MapVRegister(op.rd, 1), w0);
#else
Str(x0, sh4_context_mem_operand(op.rd.reg_ptr()));
#endif
}
host_reg_to_shil_param(op.rd, x0);
}
bool GenReadMemoryImmediate(const shil_opcode& op)
@ -1629,11 +1620,12 @@ private:
if (!op.rs1.is_imm())
return false;
u32 size = op.flags & 0x7f;
const u32 size = op.size();
u32 addr = op.rs1._imm;
if (mmu_enabled())
{
if ((addr >> 12) != (block->vaddr >> 12))
if ((addr >> 12) < (block->vaddr >> 12)
|| ((addr + size - 1) >> 12) > (block->vaddr + block->sh4_code_size - 1) >> 12)
// When full mmu is on, only consider addresses in the same 4k page
return false;
u32 paddr;
@ -1647,9 +1639,11 @@ private:
rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
break;
case 4:
case 8:
rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
break;
case 8:
rv = mmu_data_translation<MMU_TT_DREAD, u64>(addr, paddr);
break;
default:
die("Invalid immediate size");
break;
@ -1659,7 +1653,7 @@ private:
addr = paddr;
}
bool isram = false;
void* ptr = _vmem_read_const(addr, isram, size > 4 ? 4 : size);
void* ptr = _vmem_read_const(addr, isram, size);
if (isram)
{
@ -1683,6 +1677,10 @@ private:
Ldr(regalloc.MapRegister(op.rd), MemOperand(x1));
break;
case 8:
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1));
break;
default:
die("Invalid size");
break;
@ -1792,8 +1790,7 @@ private:
Add(x1, *call_regs64[0], sizeof(Sh4Context), LeaveFlags);
}
u32 size = op.flags & 0x7f;
switch(size)
switch(op.size())
{
case 1:
Ldrsb(w0, MemOperand(x28, x1));
@ -1825,25 +1822,15 @@ private:
if (mmu_enabled())
Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); // pc
u32 size = op.flags & 0x7f;
if (size != 8)
if (op.size() != 8)
shil_param_to_host_reg(op.rs2, *call_regs[1]);
else
{
#ifdef EXPLODE_SPANS
verify(op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1));
Fmov(*call_regs[1], regalloc.MapVRegister(op.rs2, 1));
Lsl(*call_regs64[1], *call_regs64[1], 32);
Fmov(w2, regalloc.MapVRegister(op.rs2, 0));
Orr(*call_regs64[1], *call_regs64[1], x2);
#else
shil_param_to_host_reg(op.rs2, *call_regs64[1]);
#endif
}
if (optimise && GenWriteMemoryFast(op, opid))
return;
GenWriteMemorySlow(size);
GenWriteMemorySlow(op.size());
}
bool GenWriteMemoryImmediate(const shil_opcode& op)
@ -1851,11 +1838,12 @@ private:
if (!op.rs1.is_imm())
return false;
u32 size = op.flags & 0x7f;
const u32 size = op.size();
u32 addr = op.rs1._imm;
if (mmu_enabled())
{
if ((addr >> 12) != (block->vaddr >> 12) && ((addr >> 12) != ((block->vaddr + block->guest_opcodes * 2 - 1) >> 12)))
if ((addr >> 12) < (block->vaddr >> 12)
|| ((addr + size - 1) >> 12) > (block->vaddr + block->sh4_code_size - 1) >> 12)
// When full mmu is on, only consider addresses in the same 4k page
return false;
u32 paddr;
@ -1869,9 +1857,11 @@ private:
rv = mmu_data_translation<MMU_TT_DWRITE, u16>(addr, paddr);
break;
case 4:
case 8:
rv = mmu_data_translation<MMU_TT_DWRITE, u32>(addr, paddr);
break;
case 8:
rv = mmu_data_translation<MMU_TT_DWRITE, u64>(addr, paddr);
break;
default:
die("Invalid immediate size");
break;
@ -1881,28 +1871,34 @@ private:
addr = paddr;
}
bool isram = false;
void* ptr = _vmem_write_const(addr, isram, size > 4 ? 4 : size);
void* ptr = _vmem_write_const(addr, isram, size);
Register reg2;
if (size != 8)
if (op.rs2.is_imm())
{
if (op.rs2.is_imm())
Mov(w1, op.rs2._imm);
reg2 = w1;
}
else if (regalloc.IsAllocg(op.rs2))
{
reg2 = regalloc.MapRegister(op.rs2);
}
else if (regalloc.IsAllocf(op.rs2))
{
if (op.rs2.is_r64f())
{
Mov(w1, op.rs2._imm);
reg2 = w1;
Fmov(x1, VRegister::GetDRegFromCode(regalloc.MapVRegister(op.rs2).GetCode()));
reg2 = x1;
}
else if (regalloc.IsAllocg(op.rs2))
{
reg2 = regalloc.MapRegister(op.rs2);
}
else if (regalloc.IsAllocf(op.rs2))
else
{
Fmov(w1, regalloc.MapVRegister(op.rs2));
reg2 = w1;
}
else
die("Invalid rs2 param");
}
else
die("Invalid rs2 param");
if (isram)
{
Ldr(x0, reinterpret_cast<uintptr_t>(ptr));
@ -1921,14 +1917,7 @@ private:
break;
case 8:
#ifdef EXPLODE_SPANS
verify(op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1));
Str(regalloc.MapVRegister(op.rs2, 0), MemOperand(x1));
Str(regalloc.MapVRegister(op.rs2, 1), MemOperand(x1, 4));
#else
shil_param_to_host_reg(op.rs2, x1);
Str(x1, MemOperand(x0));
#endif
Str(reg2, MemOperand(x0));
break;
default:
@ -2000,8 +1989,7 @@ private:
Add(x7, *call_regs64[0], sizeof(Sh4Context), LeaveFlags);
}
u32 size = op.flags & 0x7f;
switch(size)
switch(op.size())
{
case 1:
Strb(w1, MemOperand(x28, x7));
@ -2117,9 +2105,8 @@ private:
else if (param.is_reg())
{
if (param.is_r64f())
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
else if (param.is_r32f())
{
verify(reg.Is64Bits());
if (regalloc.IsAllocf(param))
Fmov(reg, regalloc.MapVRegister(param));
else
@ -2127,10 +2114,21 @@ private:
}
else
{
if (regalloc.IsAllocg(param))
Mov(reg, regalloc.MapRegister(param));
const Register& reg32 = reg.Is32Bits() ? (const Register&)reg : Register::GetWRegFromCode(reg.GetCode());
if (param.is_r32f())
{
if (regalloc.IsAllocf(param))
Fmov(reg32, regalloc.MapVRegister(param));
else
Ldr(reg32, sh4_context_mem_operand(param.reg_ptr()));
}
else
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
{
if (regalloc.IsAllocg(param))
Mov(reg32, regalloc.MapRegister(param));
else
Ldr(reg32, sh4_context_mem_operand(param.reg_ptr()));
}
}
}
else
@ -2141,23 +2139,46 @@ private:
void host_reg_to_shil_param(const shil_param& param, const CPURegister& reg)
{
if (reg.Is64Bits())
if (param.is_r64f())
{
Str((const Register&)reg, sh4_context_mem_operand(param.reg_ptr()));
verify(reg.Is64Bits());
if (regalloc.IsAllocf(param))
{
if (reg.IsVRegister())
Fmov(regalloc.MapVRegister(param), (const VRegister&)reg);
else
Fmov(regalloc.MapVRegister(param), (const Register&)reg);
}
else
{
Str((const Register&)reg, sh4_context_mem_operand(param.reg_ptr()));
}
}
else if (regalloc.IsAllocg(param))
{
if (reg.IsRegister())
Mov(regalloc.MapRegister(param), (const Register&)reg);
{
const Register& reg32 = reg.Is32Bits() ? (const Register&)reg : Register::GetWRegFromCode(reg.GetCode());
Mov(regalloc.MapRegister(param), reg32);
}
else
Fmov(regalloc.MapRegister(param), (const VRegister&)reg);
{
const VRegister& reg32 = reg.Is32Bits() ? (const VRegister&)reg : VRegister::GetSRegFromCode(reg.GetCode());
Fmov(regalloc.MapRegister(param), reg32);
}
}
else if (regalloc.IsAllocf(param))
{
if (reg.IsVRegister())
Fmov(regalloc.MapVRegister(param), (const VRegister&)reg);
{
const VRegister& reg32 = reg.Is32Bits() ? (const VRegister&)reg : VRegister::GetSRegFromCode(reg.GetCode());
Fmov(regalloc.MapVRegister(param), reg32);
}
else
Fmov(regalloc.MapVRegister(param), (const Register&)reg);
{
const Register& reg32 = reg.Is32Bits() ? (const Register&)reg : Register::GetWRegFromCode(reg.GetCode());
Fmov(regalloc.MapVRegister(param), reg32);
}
}
else
{
@ -2334,15 +2355,22 @@ void Arm64RegAlloc::Writeback(u32 reg, eReg nreg)
{
assembler->Str(Register(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
}
void Arm64RegAlloc::Preload_FPU(u32 reg, eFReg nreg)
void Arm64RegAlloc::Preload_FPU(u32 reg, eFReg nreg, bool _64bit)
{
assembler->Ldr(VRegister(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
assembler->Ldr(VRegister(nreg, _64bit ? 64 : 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
}
void Arm64RegAlloc::Writeback_FPU(u32 reg, eFReg nreg)
void Arm64RegAlloc::Writeback_FPU(u32 reg, eFReg nreg, bool _64bit)
{
assembler->Str(VRegister(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
assembler->Str(VRegister(nreg, _64bit ? 64 : 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
}
void Arm64RegAlloc::Merge_FPU(eFReg reg1, eFReg reg2)
{
assembler->Sli(VRegister(reg1, 64), VRegister(reg2, 64), 32);
}
void Arm64RegAlloc::Shift_FPU(eFReg reg)
{
assembler->Urshr(VRegister(reg, 64), VRegister(reg, 64), 32);
}
extern "C" naked void do_sqw_nommu_area_3(u32 dst, u8* sqb)
{

View file

@ -1677,7 +1677,7 @@ public:
case shop_readm:
{
u32 size = op.flags & 0x7f;
u32 size = op.size();
if (op.rs1.is_imm()) {
verify(op.rs2.is_null() && op.rs3.is_null());
@ -1760,7 +1760,7 @@ public:
case shop_writem:
{
u32 size = op.flags & 0x7f;
u32 size = op.size();
if (op.rs1.is_imm()) {
verify(op.rs3.is_null());

View file

@ -3,7 +3,6 @@
#if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X64
#include <setjmp.h>
//#define EXPLODE_SPANS
//#define PROFILING
//#define CANONICAL_TEST
@ -115,7 +114,7 @@ void ngen_mainloop(void* v_cntx)
#endif
"pushq %rbx \n\t"
WIN32_ONLY( ".seh_pushreg %rbx \n\t")
#ifndef __MACH__ // rbp is pushed in the standard function prologue
#if !defined(__MACH__) && !defined(NO_OMIT_FRAME_POINTER) // rbp is pushed in the standard function prologue
"pushq %rbp \n\t"
#endif
#ifdef _WIN32
@ -195,7 +194,7 @@ WIN32_ONLY( ".seh_pushreg %r14 \n\t")
"popq %rsi \n\t"
"popq %rdi \n\t"
#endif
#ifndef __MACH__
#if !defined(__MACH__) && !defined(NO_OMIT_FRAME_POINTER)
"popq %rbp \n\t"
#endif
"popq %rbx \n\t"
@ -389,6 +388,7 @@ public:
shil_opcode& op = block->oplist[current_opid];
regalloc.OpBegin(&op, current_opid);
flushXmmRegisters = false;
switch (op.op)
{
@ -458,15 +458,20 @@ public:
verify(op.rd.is_r64());
verify(op.rs1.is_r64());
#ifdef EXPLODE_SPANS
movss(regalloc.MapXRegister(op.rd, 0), regalloc.MapXRegister(op.rs1, 0));
movss(regalloc.MapXRegister(op.rd, 1), regalloc.MapXRegister(op.rs1, 1));
#else
mov(rax, (uintptr_t)op.rs1.reg_ptr());
mov(rax, qword[rax]);
mov(rcx, (uintptr_t)op.rd.reg_ptr());
mov(qword[rcx], rax);
#endif
if (regalloc.IsAllocf(op.rd))
{
const Xbyak::Xmm& destReg = regalloc.MapXRegister(op.rd);
const Xbyak::Xmm& srcReg = regalloc.MapXRegister(op.rs1);
if (destReg != srcReg)
movq(destReg, srcReg);
}
else
{
mov(rax, (uintptr_t)op.rs1.reg_ptr());
mov(rax, qword[rax]);
mov(rcx, (uintptr_t)op.rd.reg_ptr());
mov(qword[rcx], rax);
}
}
break;
@ -490,24 +495,7 @@ public:
if (!optimise || !GenReadMemoryFast(op, block))
GenReadMemorySlow(op, block);
u32 size = op.flags & 0x7f;
if (size != 8)
host_reg_to_shil_param(op.rd, eax);
else {
#ifdef EXPLODE_SPANS
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1))
{
movd(regalloc.MapXRegister(op.rd, 0), eax);
shr(rax, 32);
movd(regalloc.MapXRegister(op.rd, 1), eax);
}
else
#endif
{
mov(rcx, (uintptr_t)op.rd.reg_ptr());
mov(qword[rcx], rax);
}
}
host_reg_to_shil_param(op.rd, rax);
}
break;
@ -528,26 +516,8 @@ public:
add(call_regs[0], dword[rax]);
}
}
shil_param_to_host_reg(op.rs2, call_regs64[1]);
u32 size = op.flags & 0x7f;
if (size != 8)
shil_param_to_host_reg(op.rs2, call_regs[1]);
else {
#ifdef EXPLODE_SPANS
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1))
{
movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1));
shl(call_regs64[1], 32);
movd(eax, regalloc.MapXRegister(op.rs2, 0));
or_(call_regs64[1], rax);
}
else
#endif
{
mov(rax, (uintptr_t)op.rs2.reg_ptr());
mov(call_regs64[1], qword[rax]);
}
}
if (!optimise || !GenWriteMemoryFast(op, block))
GenWriteMemorySlow(op, block);
}
@ -1077,37 +1047,46 @@ public:
else
movzx(rax, regalloc.MapRegister(op.rs1).cvt16());
mov(rcx, (uintptr_t)&sin_table);
#ifdef EXPLODE_SPANS
movss(regalloc.MapXRegister(op.rd, 0), dword[rcx + rax * 8]);
movss(regalloc.MapXRegister(op.rd, 1), dword[rcx + (rax * 8) + 4]);
#else
mov(rcx, qword[rcx + rax * 8]);
mov(rdx, (uintptr_t)op.rd.reg_ptr());
mov(qword[rdx], rcx);
#endif
if (regalloc.IsAllocf(op.rd))
movq(regalloc.MapXRegister(op.rd), qword[rcx + rax * 8]);
else
{
mov(rcx, qword[rcx + rax * 8]);
mov(rdx, (uintptr_t)op.rd.reg_ptr());
mov(qword[rdx], rcx);
}
break;
case shop_fipr:
{
mov(rax, (size_t)op.rs1.reg_ptr());
movaps(regalloc.MapXRegister(op.rd), dword[rax]);
mov(rax, (size_t)op.rs2.reg_ptr());
mulps(regalloc.MapXRegister(op.rd), dword[rax]);
// Using doubles for better precision
const Xbyak::Xmm &rd = regalloc.MapXRegister(op.rd);
// Only first-generation 64-bit CPUs lack SSE3 support
if (cpu.has(Xbyak::util::Cpu::tSSE3))
{
haddps(rd, rd);
haddps(rd, rd);
}
else
{
movhlps(xmm1, rd);
addps(rd, xmm1);
movaps(xmm1, rd);
shufps(xmm1, xmm1,1);
addss(rd, xmm1);
}
mov(rax, (size_t)op.rs1.reg_ptr());
mov(rcx, (size_t)op.rs2.reg_ptr());
pxor(xmm1, xmm1);
pxor(xmm0, xmm0);
pxor(xmm2, xmm2);
cvtss2sd(xmm1, dword[rax]);
cvtss2sd(xmm0, dword[rcx]);
mulsd(xmm0, xmm1);
pxor(xmm1, xmm1);
cvtss2sd(xmm2, dword[rax + 4]);
cvtss2sd(xmm1, dword[rcx + 4]);
mulsd(xmm1, xmm2);
pxor(xmm2, xmm2);
cvtss2sd(xmm2, dword[rax + 8]);
addsd(xmm1, xmm0);
pxor(xmm0, xmm0);
cvtss2sd(xmm0, dword[rcx + 8]);
mulsd(xmm0, xmm2);
pxor(xmm2, xmm2);
cvtss2sd(xmm2, dword[rax + 12]);
addsd(xmm1, xmm0);
pxor(xmm0, xmm0);
cvtss2sd(xmm0, dword[rcx + 12]);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm1);
cvtsd2ss(rd, xmm0);
}
break;
@ -1217,6 +1196,8 @@ public:
break;
}
regalloc.OpEnd(&op);
if (flushXmmRegisters)
regalloc.FlushXmmRegisters(&op);
}
regalloc.Cleanup();
current_opid = -1;
@ -1305,8 +1286,7 @@ public:
if (mmu_enabled())
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
u32 size = op.flags & 0x7f;
switch (size) {
switch (op.size()) {
case 1:
if (!mmu_enabled())
GenCall(ReadMem8);
@ -1356,8 +1336,7 @@ public:
if (mmu_enabled())
mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
u32 size = op.flags & 0x7f;
switch (size) {
switch (op.size()) {
case 1:
if (!mmu_enabled())
GenCall(WriteMem8);
@ -1441,11 +1420,6 @@ public:
// store from xmm0
case CPT_f32rv:
host_reg_to_shil_param(prm, xmm0);
#ifdef EXPLODE_SPANS
// The x86 dynarec saves to mem as well
//mov(rax, (uintptr_t)prm.reg_ptr());
//movd(dword[rax], xmm0);
#endif
break;
}
}
@ -1457,23 +1431,24 @@ public:
for (int i = CC_pars.size(); i-- > 0;)
{
verify(xmmused < 4 && regused < 4);
const shil_param& prm = *CC_pars[i].prm;
switch (CC_pars[i].type) {
//push the contents
case CPT_u32:
verify(regused < call_regs.size());
shil_param_to_host_reg(prm, call_regs[regused++]);
break;
case CPT_f32:
verify(xmmused < call_regsxmm.size());
shil_param_to_host_reg(prm, call_regsxmm[xmmused++]);
break;
//push the ptr itself
case CPT_ptr:
verify(prm.is_reg());
verify(regused < call_regs64.size());
mov(call_regs64[regused++], (size_t)prm.reg_ptr());
break;
@ -1495,15 +1470,32 @@ public:
mov(rax, (size_t)GetRegPtr(reg));
mov(dword[rax], Xbyak::Reg32(nreg));
}
void RegPreload_FPU(u32 reg, s8 nreg)
void RegPreload_FPU(u32 reg, s8 nreg, bool _64bit)
{
mov(rax, (size_t)GetRegPtr(reg));
movss(Xbyak::Xmm(nreg), dword[rax]);
if (_64bit)
movq(Xbyak::Xmm(nreg), qword[rax]);
else
movss(Xbyak::Xmm(nreg), dword[rax]);
}
void RegWriteback_FPU(u32 reg, s8 nreg)
void RegWriteback_FPU(u32 reg, s8 nreg, bool _64bit)
{
mov(rax, (size_t)GetRegPtr(reg));
movss(dword[rax], Xbyak::Xmm(nreg));
if (_64bit)
movq(qword[rax], Xbyak::Xmm(nreg));
else
movss(dword[rax], Xbyak::Xmm(nreg));
}
void RegMerge_FPU(s8 reg1, s8 reg2)
{
psllq(Xbyak::Xmm(reg2), 32);
por(Xbyak::Xmm(reg1), Xbyak::Xmm(reg2));
}
void RegShift_FPU(s8 reg)
{
psrlq(Xbyak::Xmm(reg), 32);
}
private:
@ -1514,11 +1506,12 @@ private:
{
if (!op.rs1.is_imm())
return false;
u32 size = op.flags & 0x7f;
u32 size = op.size();
u32 addr = op.rs1._imm;
if (mmu_enabled())
{
if ((addr >> 12) != (block->vaddr >> 12))
if ((addr >> 12) < (block->vaddr >> 12)
|| ((addr + size - 1) >> 12) > (block->vaddr + block->sh4_code_size - 1) >> 12)
// When full mmu is on, only consider addresses in the same 4k page
return false;
@ -1533,9 +1526,11 @@ private:
rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
break;
case 4:
case 8:
rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
break;
case 8:
rv = mmu_data_translation<MMU_TT_DREAD, u64>(addr, paddr);
break;
default:
die("Invalid immediate size");
break;
@ -1546,7 +1541,7 @@ private:
addr = paddr;
}
bool isram = false;
void* ptr = _vmem_read_const(addr, isram, size > 4 ? 4 : size);
void* ptr = _vmem_read_const(addr, isram, size);
if (isram)
{
@ -1590,17 +1585,11 @@ private:
break;
case 8:
mov(rcx, qword[rax]);
#ifdef EXPLODE_SPANS
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1))
{
movd(regalloc.MapXRegister(op.rd, 0), ecx);
shr(rcx, 32);
movd(regalloc.MapXRegister(op.rd, 1), ecx);
}
if (regalloc.IsAllocf(op.rd))
movq(regalloc.MapXRegister(op.rd), qword[rax]);
else
#endif
{
mov(rcx, qword[rax]);
mov(rax, (uintptr_t)op.rd.reg_ptr());
mov(qword[rax], rcx);
}
@ -1616,6 +1605,7 @@ private:
// Not RAM: the returned pointer is a memory handler
if (size == 8)
{
// FIXME the call to _vmem_read_const() would have asserted at this point
verify(!regalloc.IsAllocAny(op.rd));
// Need to call the handler twice
@ -1664,11 +1654,12 @@ private:
{
if (!op.rs1.is_imm())
return false;
u32 size = op.flags & 0x7f;
u32 size = op.size();
u32 addr = op.rs1._imm;
if (mmu_enabled())
{
if ((addr >> 12) != (block->vaddr >> 12))
if ((addr >> 12) < (block->vaddr >> 12)
|| ((addr + size - 1) >> 12) > (block->vaddr + block->sh4_code_size - 1) >> 12)
// When full mmu is on, only consider addresses in the same 4k page
return false;
@ -1683,9 +1674,11 @@ private:
rv = mmu_data_translation<MMU_TT_DWRITE, u16>(addr, paddr);
break;
case 4:
case 8:
rv = mmu_data_translation<MMU_TT_DWRITE, u32>(addr, paddr);
break;
case 8:
rv = mmu_data_translation<MMU_TT_DWRITE, u64>(addr, paddr);
break;
default:
die("Invalid immediate size");
break;
@ -1696,7 +1689,7 @@ private:
addr = paddr;
}
bool isram = false;
void* ptr = _vmem_write_const(addr, isram, size > 4 ? 4 : size);
void* ptr = _vmem_write_const(addr, isram, size);
if (isram)
{
@ -1746,16 +1739,9 @@ private:
break;
case 8:
#ifdef EXPLODE_SPANS
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1))
{
movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1));
shl(call_regs64[1], 32);
movd(eax, regalloc.MapXRegister(op.rs2, 0));
or_(call_regs64[1], rax);
}
if (regalloc.IsAllocf(op.rs2))
movq(qword[rax], regalloc.MapXRegister(op.rs2));
else
#endif
{
mov(rcx, (uintptr_t)op.rs2.reg_ptr());
mov(rcx, qword[rcx]);
@ -1792,7 +1778,6 @@ private:
mov(rax, (uintptr_t)virt_ram_base);
u32 size = op.flags & 0x7f;
//verify(getCurr() - start_addr == 26);
if (mem_access_offset == 0)
mem_access_offset = getCurr() - start_addr;
@ -1800,7 +1785,7 @@ private:
verify(getCurr() - start_addr == mem_access_offset);
block->memory_accesses[(void*)getCurr()] = (u32)current_opid;
switch (size)
switch (op.size())
{
case 1:
movsx(eax, byte[rax + call_regs64[0]]);
@ -1841,7 +1826,6 @@ private:
mov(rax, (uintptr_t)virt_ram_base);
u32 size = op.flags & 0x7f;
//verify(getCurr() - start_addr == 26);
if (mem_access_offset == 0)
mem_access_offset = getCurr() - start_addr;
@ -1849,18 +1833,18 @@ private:
verify(getCurr() - start_addr == mem_access_offset);
block->memory_accesses[(void*)getCurr()] = (u32)current_opid;
switch (size)
switch (op.size())
{
case 1:
mov(byte[rax + call_regs64[0] + 0], call_regs[1].cvt8());
mov(byte[rax + call_regs64[0] + 0], call_regs64[1].cvt8());
break;
case 2:
mov(word[rax + call_regs64[0]], call_regs[1].cvt16());
mov(word[rax + call_regs64[0]], call_regs64[1].cvt16());
break;
case 4:
mov(dword[rax + call_regs64[0]], call_regs[1]);
mov(dword[rax + call_regs64[0]], call_regs64[1].cvt32());
break;
case 8:
@ -1997,67 +1981,11 @@ private:
void GenCall(Ret(*function)(Params...), bool skip_floats = false)
{
#ifndef _WIN32
bool xmm8_mapped = !skip_floats && current_opid != -1 && regalloc.IsMapped(xmm8, current_opid);
bool xmm9_mapped = !skip_floats && current_opid != -1 && regalloc.IsMapped(xmm9, current_opid);
bool xmm10_mapped = !skip_floats && current_opid != -1 && regalloc.IsMapped(xmm10, current_opid);
bool xmm11_mapped = !skip_floats && current_opid != -1 && regalloc.IsMapped(xmm11, current_opid);
// Need to save xmm registers as they are not preserved in linux/mach
int offset = 0;
if (xmm8_mapped || xmm9_mapped || xmm10_mapped || xmm11_mapped)
{
sub(rsp, 4 * (xmm8_mapped + xmm9_mapped + xmm10_mapped + xmm11_mapped));
if (xmm8_mapped)
{
movd(ptr[rsp + offset], xmm8);
offset += 4;
}
if (xmm9_mapped)
{
movd(ptr[rsp + offset], xmm9);
offset += 4;
}
if (xmm10_mapped)
{
movd(ptr[rsp + offset], xmm10);
offset += 4;
}
if (xmm11_mapped)
{
movd(ptr[rsp + offset], xmm11);
offset += 4;
}
}
if (!skip_floats)
flushXmmRegisters = true;
#endif
call(CC_RX2RW(function));
#ifndef _WIN32
if (xmm8_mapped || xmm9_mapped || xmm10_mapped || xmm11_mapped)
{
if (xmm11_mapped)
{
offset -= 4;
movd(xmm11, ptr[rsp + offset]);
}
if (xmm10_mapped)
{
offset -= 4;
movd(xmm10, ptr[rsp + offset]);
}
if (xmm9_mapped)
{
offset -= 4;
movd(xmm9, ptr[rsp + offset]);
}
if (xmm8_mapped)
{
offset -= 4;
movd(xmm8, ptr[rsp + offset]);
}
add(rsp, 4 * (xmm8_mapped + xmm9_mapped + xmm10_mapped + xmm11_mapped));
}
#endif
}
// uses eax/rax
@ -2092,6 +2020,14 @@ private:
mov((const Xbyak::Reg32 &)reg, dword[rax]);
}
}
else if (param.is_r64f() && regalloc.IsAllocf(param))
{
Xbyak::Xmm sreg = regalloc.MapXRegister(param);
if (!reg.isXMM())
movq((const Xbyak::Reg64 &)reg, sreg);
else if (reg != sreg)
movq((const Xbyak::Xmm &)reg, sreg);
}
else
{
if (regalloc.IsAllocg(param))
@ -2105,10 +2041,20 @@ private:
else
{
mov(rax, (size_t)param.reg_ptr());
if (!reg.isXMM())
mov((const Xbyak::Reg32 &)reg, dword[rax]);
if (param.is_r64f())
{
if (!reg.isXMM())
mov((const Xbyak::Reg64 &)reg, qword[rax]);
else
movq((const Xbyak::Xmm &)reg, qword[rax]);
}
else
movss((const Xbyak::Xmm &)reg, dword[rax]);
{
if (!reg.isXMM())
mov((const Xbyak::Reg32 &)reg, dword[rax]);
else
movss((const Xbyak::Xmm &)reg, dword[rax]);
}
}
}
}
@ -2118,7 +2064,7 @@ private:
}
}
// uses rax
// uses rax or rcx
void host_reg_to_shil_param(const shil_param& param, const Xbyak::Reg& reg)
{
if (regalloc.IsAllocg(param))
@ -2133,17 +2079,38 @@ private:
{
Xbyak::Xmm sreg = regalloc.MapXRegister(param);
if (!reg.isXMM())
movd(sreg, (const Xbyak::Reg32 &)reg);
{
if (param.is_r64f())
movq(sreg, (const Xbyak::Reg64 &)reg);
else
movd(sreg, (const Xbyak::Reg32 &)reg);
}
else if (reg != sreg)
movss(sreg, (const Xbyak::Xmm &)reg);
{
if (param.is_r64f())
movq(sreg, (const Xbyak::Xmm &)reg);
else
movss(sreg, (const Xbyak::Xmm &)reg);
}
}
else
{
mov(rax, (size_t)param.reg_ptr());
if (!reg.isXMM())
mov(dword[rax], (const Xbyak::Reg32 &)reg);
const Xbyak::Reg& tmpReg = reg.getIdx() == rax.getIdx() ? rcx : rax;
mov(tmpReg, (size_t)param.reg_ptr());
if (param.is_r64f())
{
if (!reg.isXMM())
mov(qword[tmpReg], (const Xbyak::Reg64 &)reg);
else
movsd(qword[tmpReg], (const Xbyak::Xmm &)reg);
}
else
movss(dword[rax], (const Xbyak::Xmm &)reg);
{
if (!reg.isXMM())
mov(dword[tmpReg], (const Xbyak::Reg32 &)reg);
else
movss(dword[tmpReg], (const Xbyak::Xmm &)reg);
}
}
}
@ -2161,6 +2128,7 @@ private:
X64RegAlloc regalloc;
Xbyak::util::Cpu cpu;
size_t current_opid;
bool flushXmmRegisters = false;
Xbyak::Label exit_block;
static const u32 read_mem_op_size;
static const u32 write_mem_op_size;
@ -2180,15 +2148,22 @@ void X64RegAlloc::Writeback(u32 reg, Xbyak::Operand::Code nreg)
{
compiler->RegWriteback(reg, nreg);
}
void X64RegAlloc::Preload_FPU(u32 reg, s8 nreg)
void X64RegAlloc::Preload_FPU(u32 reg, s8 nreg, bool _64bit)
{
compiler->RegPreload_FPU(reg, nreg);
compiler->RegPreload_FPU(reg, nreg, _64bit);
}
void X64RegAlloc::Writeback_FPU(u32 reg, s8 nreg)
void X64RegAlloc::Writeback_FPU(u32 reg, s8 nreg, bool _64bit)
{
compiler->RegWriteback_FPU(reg, nreg);
compiler->RegWriteback_FPU(reg, nreg, _64bit);
}
void X64RegAlloc::Merge_FPU(s8 reg1, s8 reg2)
{
compiler->RegMerge_FPU(reg1, reg2);
}
void X64RegAlloc::Shift_FPU(s8 reg)
{
compiler->RegShift_FPU(reg);
}
static BlockCompiler* compiler;
void ngen_Compile(RuntimeBlockInfo* block, bool smc_checks, bool reset, bool staging, bool optimise)

View file

@ -16,38 +16,33 @@
You should have received a copy of the GNU General Public License
along with reicast. If not, see <https://www.gnu.org/licenses/>.
*/
#ifndef CORE_REC_X64_X64_REGALLOC_H_
#define CORE_REC_X64_X64_REGALLOC_H_
//#define OLD_REGALLOC
#pragma once
#include "deps/xbyak/xbyak.h"
#ifdef OLD_REGALLOC
#include "hw/sh4/dyna/regalloc.h"
#else
#include "hw/sh4/dyna/ssa_regalloc.h"
#endif
#ifdef _WIN32
static Xbyak::Operand::Code alloc_regs[] = { Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI, Xbyak::Operand::RSI,
Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15, (Xbyak::Operand::Code)-1 };
static s8 alloc_fregs[] = { 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1 }; // XMM6 to XMM15 are callee-saved in Windows
#else
static Xbyak::Operand::Code alloc_regs[] = { Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::R12, Xbyak::Operand::R13,
Xbyak::Operand::R14, Xbyak::Operand::R15, (Xbyak::Operand::Code)-1 };
static Xbyak::Operand::Code alloc_regs[] = {
Xbyak::Operand::RBX,
Xbyak::Operand::R12,
Xbyak::Operand::R13,
Xbyak::Operand::R14,
Xbyak::Operand::R15,
#ifndef NO_OMIT_FRAME_POINTER
Xbyak::Operand::RBP,
#endif
(Xbyak::Operand::Code)-1
};
static s8 alloc_fregs[] = { 8, 9, 10, 11, -1 }; // XMM8-11
#endif
class BlockCompiler;
struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8,
#ifdef EXPLODE_SPANS
true
#else
false
#endif
>
struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8, true>
{
X64RegAlloc(BlockCompiler *compiler) : compiler(compiler) {}
@ -58,8 +53,10 @@ struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8,
virtual void Preload(u32 reg, Xbyak::Operand::Code nreg) override;
virtual void Writeback(u32 reg, Xbyak::Operand::Code nreg) override;
virtual void Preload_FPU(u32 reg, s8 nreg) override;
virtual void Writeback_FPU(u32 reg, s8 nreg) override;
virtual void Preload_FPU(u32 reg, s8 nreg, bool _64bit) override;
virtual void Writeback_FPU(u32 reg, s8 nreg, bool _64bit) override;
virtual void Merge_FPU(s8 reg1, s8 reg2) override;
virtual void Shift_FPU(s8 reg) override;
Xbyak::Reg32 MapRegister(const shil_param& param)
{
@ -71,11 +68,7 @@ struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8,
Xbyak::Xmm MapXRegister(const shil_param& param, u32 index = 0)
{
#ifdef OLD_REGALLOC
s8 ereg = mapfv(param, index);
#else
s8 ereg = mapf(param);
#endif
if (ereg == -1)
die("VRegister not allocated");
return Xbyak::Xmm(ereg);
@ -83,19 +76,14 @@ struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8,
bool IsMapped(const Xbyak::Xmm &xmm, size_t opid)
{
#ifndef OLD_REGALLOC
return regf_used((s8)xmm.getIdx());
#else
for (size_t sid = 0; sid < all_spans.size(); sid++)
{
if (all_spans[sid]->nregf == xmm.getIdx() && all_spans[sid]->contains(opid))
return true;
}
return false;
#endif
}
void FlushXmmRegisters(shil_opcode *opcode)
{
for (Sh4RegType reg = reg_fr_0; reg <= reg_xf_15; reg = (Sh4RegType)(reg + 1))
FlushReg(reg, true, true);
}
BlockCompiler *compiler;
};
#endif /* CORE_REC_X64_X64_REGALLOC_H_ */

View file

@ -263,7 +263,7 @@ void ngen_opcode(RuntimeBlockInfo* block, shil_opcode* op,x86_block* x86e, bool
verify(reg.IsAllocAny((Sh4RegType)(op->rd._reg + i)));
}
u32 size = op->flags & 0x7f;
u32 size = op->size();
if (op->rs1.is_imm())
{
@ -449,7 +449,7 @@ void ngen_opcode(RuntimeBlockInfo* block, shil_opcode* op,x86_block* x86e, bool
case shop_writem:
{
u32 size=op->flags&0x7f;
u32 size = op->size();
verify(reg.IsAllocg(op->rs1) || op->rs1.is_imm());
verify(op->rs2.is_imm() || op->rs2.is_r32() || (op->rs2.count()==2 && reg.IsAllocf(op->rs2,0) && reg.IsAllocf(op->rs2,1)));

View file

@ -188,7 +188,6 @@ public:
if (!find_mapping())
input_mapper = new KbInputMapping();
}
virtual ~SDLKbGamepadDevice() {}
};
class MouseInputMapping : public InputMapping
@ -215,7 +214,6 @@ public:
if (!find_mapping())
input_mapper = new MouseInputMapping();
}
virtual ~SDLMouseGamepadDevice() {}
bool gamepad_btn_input(u32 code, bool pressed) override
{
if (gui_is_open())

View file

@ -139,7 +139,6 @@ public:
//E7 Right S3
//E8-FF Reserved
}
virtual ~SDLKeyboardDevice() {}
virtual const char* name() override { return "SDL Keyboard"; }
protected:

View file

@ -603,14 +603,14 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size)
REICAST_USA(aica_reg,0x8000);
channel_unserialize(data, total_size, V7_LIBRETRO);
channel_unserialize(data, total_size, V8_LIBRETRO);
REICAST_USA(cdda_sector,CDDA_SIZE);
REICAST_US(cdda_index);
REICAST_SKIP(4 * 64); // mxlr
REICAST_US(i); // samples_gen
register_unserialize(sb_regs, data, total_size, V7_LIBRETRO) ;
register_unserialize(sb_regs, data, total_size, V8_LIBRETRO) ;
REICAST_US(SB_ISTNRM);
REICAST_US(SB_FFST_rc);
REICAST_US(SB_FFST);
@ -721,16 +721,16 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size)
pal_needs_update = true;
REICAST_USA(OnChipRAM.data,OnChipRAM_SIZE);
register_unserialize(CCN, data, total_size, V7_LIBRETRO) ;
register_unserialize(UBC, data, total_size, V7_LIBRETRO) ;
register_unserialize(BSC, data, total_size, V7_LIBRETRO) ;
register_unserialize(DMAC, data, total_size, V7_LIBRETRO) ;
register_unserialize(CPG, data, total_size, V7_LIBRETRO) ;
register_unserialize(RTC, data, total_size, V7_LIBRETRO) ;
register_unserialize(INTC, data, total_size, V7_LIBRETRO) ;
register_unserialize(TMU, data, total_size, V7_LIBRETRO) ;
register_unserialize(SCI, data, total_size, V7_LIBRETRO) ;
register_unserialize(SCIF, data, total_size, V7_LIBRETRO) ;
register_unserialize(CCN, data, total_size, V8_LIBRETRO) ;
register_unserialize(UBC, data, total_size, V8_LIBRETRO) ;
register_unserialize(BSC, data, total_size, V8_LIBRETRO) ;
register_unserialize(DMAC, data, total_size, V8_LIBRETRO) ;
register_unserialize(CPG, data, total_size, V8_LIBRETRO) ;
register_unserialize(RTC, data, total_size, V8_LIBRETRO) ;
register_unserialize(INTC, data, total_size, V8_LIBRETRO) ;
register_unserialize(TMU, data, total_size, V8_LIBRETRO) ;
register_unserialize(SCI, data, total_size, V8_LIBRETRO) ;
register_unserialize(SCIF, data, total_size, V8_LIBRETRO) ;
u16 dummyshort;
@ -881,7 +881,7 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size)
REICAST_US(i); //LIBRETRO_S(cycle_counter);
REICAST_US(i); // idxnxx
REICAST_SKIP(sizeof(state_t)); // state
REICAST_SKIP(44); // state
REICAST_US(i); // div_som_reg1
REICAST_US(i); // div_som_reg2
REICAST_US(i); // div_som_reg3
@ -921,7 +921,7 @@ bool dc_unserialize(void **data, unsigned int *total_size)
*total_size = 0 ;
REICAST_US(version) ;
if (version == V7_LIBRETRO)
if (version == V8_LIBRETRO)
return dc_unserialize_libretro(data, total_size);
if (version != V4 && version < V5)
{

View file

@ -272,8 +272,8 @@ using namespace std;
#define likely(x) x
#define unlikely(x) x
#else
#define likely(x) __builtin_expect((x),1)
#define unlikely(x) __builtin_expect((x),0)
#define likely(x) __builtin_expect(static_cast<bool>(x), 1)
#define unlikely(x) __builtin_expect(static_cast<bool>(x), 0)
#endif
//basic includes
@ -321,11 +321,11 @@ bool dc_unserialize(void **data, unsigned int *total_size);
#endif
#ifndef STRIP_TEXT
#define verify(x) if((x)==false){ msgboxf("Verify Failed : " #x "\n in %s -> %s : %d \n",MBX_ICONERROR,(__FUNCTION__),(__FILE__),__LINE__); dbgbreak;}
#define die(reason) { msgboxf("Fatal error : %s\n in %s -> %s : %d \n",MBX_ICONERROR,(reason),(__FUNCTION__),(__FILE__),__LINE__); dbgbreak;}
#define verify(x) do { if ((x) == false){ msgboxf("Verify Failed : " #x "\n in %s -> %s : %d \n", MBX_ICONERROR, (__FUNCTION__), (__FILE__), __LINE__); dbgbreak;}} while (false)
#define die(reason) do { msgboxf("Fatal error : %s\n in %s -> %s : %d \n", MBX_ICONERROR,(reason), (__FUNCTION__), (__FILE__), __LINE__); dbgbreak;} while (false)
#else
#define verify(x) if((x)==false) { dbgbreak; }
#define die(reason) { dbgbreak; }
#define verify(x) do { if ((x) == false) dbgbreak; } while (false)
#define die(reason) do { dbgbreak; } while (false)
#endif
@ -677,7 +677,8 @@ enum serialize_version_enum {
V4,
V5_LIBRETRO_UNSUPPORTED,
V6_LIBRETRO_UNSUPPORTED,
V7_LIBRETRO,
V7_LIBRETRO_UNSUPPORTED,
V8_LIBRETRO,
V5 = 800,
V6 = 801,

View file

@ -197,8 +197,8 @@ private:
s16 last_left_thumb_y = 0;
s16 last_right_thumb_x = 0;
s16 last_right_thumb_y = 0;
double vib_stop_time;
float vib_inclination;
double vib_stop_time = 0;
float vib_inclination = 0;
static std::vector<std::shared_ptr<XInputGamepadDevice>> xinput_gamepads;
};
@ -238,7 +238,6 @@ public:
if (!find_mapping())
input_mapper = new KbInputMapping();
}
virtual ~WinKbGamepadDevice() {}
};
class MouseInputMapping : public InputMapping
@ -265,7 +264,7 @@ public:
if (!find_mapping())
input_mapper = new MouseInputMapping();
}
virtual ~WinMouseGamepadDevice() {}
bool gamepad_btn_input(u32 code, bool pressed) override
{
if (gui_is_open())