Compare commits

...
Sign in to create a new pull request.

7 commits

Author SHA1 Message Date
Flyinghead
187edde155 upgrade libretro savestate to v8. spg clean up 2019-11-05 23:18:36 +01:00
Flyinghead
8dc35a3916 use doubles to emulate FIPR on x86
fixes Sonic Adventure falling off the track in Windy Valley
2019-11-05 16:07:56 +01:00
Flyinghead
8766195f75 Merge remote-tracking branch 'origin/master' into fh/rec-doublefp 2019-11-04 22:38:47 +01:00
Flyinghead
e2c590c8a3 regalloc: convert 64-bit regs to 32-bit as needed
add size() method to shil_opcode
2019-11-02 20:28:08 +01:00
Flyinghead
06f61ef9a0 regalloc: allocate 64-bit registers for x64 and arm64 arch 2019-11-02 16:03:55 +01:00
Flyinghead
87c1840010 optimize read and write area7 handler. Simplify mem handlers template
Fix likely/unlikely macros. Add some to mmu and blockmanager
Fix verify macro
inline sh4_sched_now() and sh4_sched_now64()
shil: get rid of unused V2 and V3
2019-11-02 12:02:39 +01:00
Flyinghead
8de28dbe74 clean up unnecessary dtors, volatile. Add const. Missing init 2019-11-02 11:55:17 +01:00
36 changed files with 841 additions and 805 deletions

View file

@ -1627,20 +1627,20 @@ bool channel_unserialize(void **data, unsigned int *total_size, serialize_versio
REICAST_US(Chans[i].CA) ; REICAST_US(Chans[i].CA) ;
REICAST_US(Chans[i].step) ; REICAST_US(Chans[i].step) ;
if (ver < V7) if (ver != V8_LIBRETRO && ver < V7)
REICAST_US(dum); // Chans[i].update_rate REICAST_US(dum); // Chans[i].update_rate
Chans[i].UpdatePitch(); Chans[i].UpdatePitch();
REICAST_US(Chans[i].s0) ; REICAST_US(Chans[i].s0) ;
REICAST_US(Chans[i].s1) ; REICAST_US(Chans[i].s1) ;
REICAST_US(Chans[i].loop.looped); REICAST_US(Chans[i].loop.looped);
if (ver < V7) if (ver != V8_LIBRETRO && ver < V7)
{ {
REICAST_US(dum); // Chans[i].loop.LSA REICAST_US(dum); // Chans[i].loop.LSA
REICAST_US(dum); // Chans[i].loop.LEA REICAST_US(dum); // Chans[i].loop.LEA
} }
Chans[i].UpdateLoop(); Chans[i].UpdateLoop();
REICAST_US(Chans[i].adpcm.last_quant) ; REICAST_US(Chans[i].adpcm.last_quant) ;
if (ver >= V7) if (ver == V8_LIBRETRO || ver >= V7)
{ {
REICAST_US(Chans[i].adpcm.loopstart_quant); REICAST_US(Chans[i].adpcm.loopstart_quant);
REICAST_US(Chans[i].adpcm.loopstart_prev_sample); REICAST_US(Chans[i].adpcm.loopstart_prev_sample);
@ -1653,21 +1653,21 @@ bool channel_unserialize(void **data, unsigned int *total_size, serialize_versio
Chans[i].adpcm.loopstart_prev_sample = 0; Chans[i].adpcm.loopstart_prev_sample = 0;
} }
REICAST_US(Chans[i].noise_state) ; REICAST_US(Chans[i].noise_state) ;
if (ver < V7) if (ver != V8_LIBRETRO && ver < V7)
{ {
REICAST_US(dum); // Chans[i].VolMix.DLAtt REICAST_US(dum); // Chans[i].VolMix.DLAtt
REICAST_US(dum); // Chans[i].VolMix.DRAtt REICAST_US(dum); // Chans[i].VolMix.DRAtt
REICAST_US(dum); // Chans[i].VolMix.DSPAtt REICAST_US(dum); // Chans[i].VolMix.DSPAtt
} }
Chans[i].UpdateAtts(); Chans[i].UpdateAtts();
if (ver < V7) if (ver != V8_LIBRETRO && ver < V7)
REICAST_US(dum); // Chans[i].VolMix.DSPOut REICAST_US(dum); // Chans[i].VolMix.DSPOut
Chans[i].UpdateDSPMIX(); Chans[i].UpdateDSPMIX();
REICAST_US(Chans[i].AEG.val) ; REICAST_US(Chans[i].AEG.val) ;
REICAST_US(Chans[i].AEG.state) ; REICAST_US(Chans[i].AEG.state) ;
Chans[i].SetAegState(Chans[i].AEG.state); Chans[i].SetAegState(Chans[i].AEG.state);
if (ver < V7) if (ver != V8_LIBRETRO && ver < V7)
{ {
REICAST_US(dum); // Chans[i].AEG.AttackRate REICAST_US(dum); // Chans[i].AEG.AttackRate
REICAST_US(dum); // Chans[i].AEG.Decay1Rate REICAST_US(dum); // Chans[i].AEG.Decay1Rate
@ -1678,7 +1678,7 @@ bool channel_unserialize(void **data, unsigned int *total_size, serialize_versio
Chans[i].UpdateAEG(); Chans[i].UpdateAEG();
REICAST_US(Chans[i].FEG.value); REICAST_US(Chans[i].FEG.value);
REICAST_US(Chans[i].FEG.state); REICAST_US(Chans[i].FEG.state);
if (ver >= V7) if (ver == V8_LIBRETRO || ver >= V7)
{ {
REICAST_US(Chans[i].FEG.prev1); REICAST_US(Chans[i].FEG.prev1);
REICAST_US(Chans[i].FEG.prev2); REICAST_US(Chans[i].FEG.prev2);
@ -1690,7 +1690,7 @@ bool channel_unserialize(void **data, unsigned int *total_size, serialize_versio
} }
Chans[i].SetFegState(Chans[i].FEG.state); Chans[i].SetFegState(Chans[i].FEG.state);
Chans[i].UpdateFEG(); Chans[i].UpdateFEG();
if (ver < V7) if (ver != V8_LIBRETRO && ver < V7)
{ {
u8 dumu8; u8 dumu8;
REICAST_US(dumu8); // Chans[i].step_stream_lut1 REICAST_US(dumu8); // Chans[i].step_stream_lut1
@ -1700,10 +1700,10 @@ bool channel_unserialize(void **data, unsigned int *total_size, serialize_versio
Chans[i].UpdateStreamStep(); Chans[i].UpdateStreamStep();
REICAST_US(Chans[i].lfo.counter) ; REICAST_US(Chans[i].lfo.counter) ;
if (ver < V7) if (ver != V8_LIBRETRO && ver < V7)
REICAST_US(dum); // Chans[i].lfo.start_value REICAST_US(dum); // Chans[i].lfo.start_value
REICAST_US(Chans[i].lfo.state) ; REICAST_US(Chans[i].lfo.state) ;
if (ver < V7) if (ver != V8_LIBRETRO && ver < V7)
{ {
u8 dumu8; u8 dumu8;
REICAST_US(dumu8); // Chans[i].lfo.alfo REICAST_US(dumu8); // Chans[i].lfo.alfo
@ -1715,7 +1715,7 @@ bool channel_unserialize(void **data, unsigned int *total_size, serialize_versio
} }
Chans[i].UpdateLFO(); Chans[i].UpdateLFO();
REICAST_US(Chans[i].enabled) ; REICAST_US(Chans[i].enabled) ;
if (ver < V7) if (ver != V8_LIBRETRO && ver < V7)
REICAST_US(dum); // Chans[i].ChannelNumber REICAST_US(dum); // Chans[i].ChannelNumber
} }

View file

@ -258,9 +258,10 @@ static void WriteBios(u32 addr,u32 data,u32 sz)
//use unified size handler for registers //use unified size handler for registers
//it really makes no sense to use different size handlers on em -> especially when we can use templates :p //it really makes no sense to use different size handlers on em -> especially when we can use templates :p
template<u32 sz, class T> template<typename T>
T DYNACALL ReadMem_area0(u32 addr) T DYNACALL ReadMem_area0(u32 addr)
{ {
const u32 sz = (u32)sizeof(T);
addr &= 0x01FFFFFF;//to get rid of non needed bits addr &= 0x01FFFFFF;//to get rid of non needed bits
const u32 base=(addr>>16); const u32 base=(addr>>16);
//map 0x0000 to 0x01FF to Default handler //map 0x0000 to 0x01FF to Default handler
@ -343,9 +344,10 @@ T DYNACALL ReadMem_area0(u32 addr)
return 0; return 0;
} }
template<u32 sz, class T> template<class T>
void DYNACALL WriteMem_area0(u32 addr,T data) void DYNACALL WriteMem_area0(u32 addr,T data)
{ {
const u32 sz = (u32)sizeof(T);
addr &= 0x01FFFFFF;//to get rid of non needed bits addr &= 0x01FFFFFF;//to get rid of non needed bits
const u32 base=(addr>>16); const u32 base=(addr>>16);

View file

@ -117,8 +117,8 @@ bool IsOnSh4Ram(u32 addr)
static void maple_DoDma() static void maple_DoDma()
{ {
verify(SB_MDEN &1) verify(SB_MDEN &1);
verify(SB_MDST &1) verify(SB_MDST &1);
DEBUG_LOG(MAPLE, "Maple: DoMapleDma SB_MDSTAR=%x", SB_MDSTAR); DEBUG_LOG(MAPLE, "Maple: DoMapleDma SB_MDSTAR=%x", SB_MDSTAR);
u32 addr = SB_MDSTAR; u32 addr = SB_MDSTAR;

View file

@ -60,17 +60,8 @@ void _vmem_init_mappings();
//functions to register and map handlers/memory //functions to register and map handlers/memory
_vmem_handler _vmem_register_handler(_vmem_ReadMem8FP* read8,_vmem_ReadMem16FP* read16,_vmem_ReadMem32FP* read32, _vmem_WriteMem8FP* write8,_vmem_WriteMem16FP* write16,_vmem_WriteMem32FP* write32); _vmem_handler _vmem_register_handler(_vmem_ReadMem8FP* read8,_vmem_ReadMem16FP* read16,_vmem_ReadMem32FP* read32, _vmem_WriteMem8FP* write8,_vmem_WriteMem16FP* write16,_vmem_WriteMem32FP* write32);
#define _vmem_register_handler_Template(read,write) _vmem_register_handler \ #define _vmem_register_handler_Template(read, write) _vmem_register_handler(read<u8>, read<u16>, read<u32>, \
(read<1,u8>,read<2,u16>,read<4,u32>, \ write<u8>, write<u16>, write<u32>)
write<1,u8>,write<2,u16>,write<4,u32>)
#define _vmem_register_handler_Template1(read,write,extra_Tparam) _vmem_register_handler \
(read<1,u8,extra_Tparam>,read<2,u16,extra_Tparam>,read<4,u32,extra_Tparam>, \
write<1,u8,extra_Tparam>,write<2,u16,extra_Tparam>,write<4,u32,extra_Tparam>)
#define _vmem_register_handler_Template2(read,write,etp1,etp2) _vmem_register_handler \
(read<1,u8,etp1,etp2>,read<2,u16,etp1,etp2>,read<4,u32,etp1,etp2>, \
write<1,u8,etp1,etp2>,write<2,u16,etp1,etp2>,write<4,u32,etp1,etp2>)
void _vmem_map_handler(_vmem_handler Handler,u32 start,u32 end); void _vmem_map_handler(_vmem_handler Handler,u32 start,u32 end);
void _vmem_map_block(void* base,u32 start,u32 end,u32 mask); void _vmem_map_block(void* base,u32 start,u32 end,u32 mask);

View file

@ -268,7 +268,7 @@ static u32 vmem32_map_mmu(u32 address, bool write)
const vector<vram_lock>& blocks = vram_blocks[start / VRAM_PROT_SEGMENT]; const vector<vram_lock>& blocks = vram_blocks[start / VRAM_PROT_SEGMENT];
vramlist_lock.Lock(); vramlist_lock.Lock();
for (int i = blocks.size() - 1; i >= 0; i--) for (int i = (int)blocks.size() - 1; i >= 0; i--)
{ {
if (blocks[i].start < end && blocks[i].end >= start) if (blocks[i].start < end && blocks[i].end >= start)
{ {
@ -344,7 +344,7 @@ bool vmem32_handle_signal(void *fault_addr, bool write, u32 exception_pc)
if (!vmem32_inited || (u8*)fault_addr < virt_ram_base || (u8*)fault_addr >= virt_ram_base + VMEM32_SIZE) if (!vmem32_inited || (u8*)fault_addr < virt_ram_base || (u8*)fault_addr >= virt_ram_base + VMEM32_SIZE)
return false; return false;
//vmem32_page_faults++; //vmem32_page_faults++;
u32 guest_addr = (u8*)fault_addr - virt_ram_base; u32 guest_addr = (u32)((u8*)fault_addr - virt_ram_base);
u32 rv = vmem32_map_address(guest_addr, write); u32 rv = vmem32_map_address(guest_addr, write);
DEBUG_LOG(VMEM, "vmem32_handle_signal handled signal %s @ %p -> %08x rv=%d", write ? "W" : "R", fault_addr, guest_addr, rv); DEBUG_LOG(VMEM, "vmem32_handle_signal handled signal %s @ %p -> %08x rv=%d", write ? "W" : "R", fault_addr, guest_addr, rv);
if (rv == MMU_ERROR_NONE) if (rv == MMU_ERROR_NONE)

View file

@ -51,7 +51,7 @@ protected:
bool RomPioAutoIncrement; bool RomPioAutoIncrement;
u32 DmaOffset; u32 DmaOffset;
u32 DmaCount; u32 DmaCount;
u32 key; u32 key =0;
// Naomi 840-0001E communication board // Naomi 840-0001E communication board
u16 comm_ctrl = 0xC000; u16 comm_ctrl = 0xC000;
u16 comm_offset = 0; u16 comm_offset = 0;

View file

@ -1,3 +1,7 @@
//SPG emulation; Scanline/Raster beam registers & interrupts
//Time to emulate that stuff correctly ;)
//
//
#include "spg.h" #include "spg.h"
#include "Renderer_if.h" #include "Renderer_if.h"
#include "pvr_regs.h" #include "pvr_regs.h"
@ -6,38 +10,31 @@
#include "hw/sh4/sh4_sched.h" #include "hw/sh4/sh4_sched.h"
#include "input/gamepad_device.h" #include "input/gamepad_device.h"
//SPG emulation; Scanline/Raster beam registers & interrupts u32 in_vblank;
//Time to emulate that stuff correctly ;)
u32 in_vblank=0;
u32 clc_pvr_scanline; u32 clc_pvr_scanline;
u32 pvr_numscanlines=512; static u32 pvr_numscanlines = 512;
u32 prv_cur_scanline=-1; static u32 prv_cur_scanline = -1;
u32 vblk_cnt=0; static u32 vblk_cnt;
float last_fps=0; float last_fps=0;
//54 mhz pixel clock :) //54 mhz pixel clock :)
#define PIXEL_CLOCK (54*1000*1000/2) #define PIXEL_CLOCK (54*1000*1000/2)
u32 Line_Cycles=0; static u32 Line_Cycles;
u32 Frame_Cycles=0; static u32 Frame_Cycles;
int render_end_schid; int render_end_schid;
int vblank_schid; int vblank_schid;
void CalculateSync() void CalculateSync()
{ {
u32 pixel_clock; u32 pixel_clock = PIXEL_CLOCK / (FB_R_CTRL.vclk_div ? 1 : 2);
float scale_x=1,scale_y=1;
pixel_clock=PIXEL_CLOCK / (FB_R_CTRL.vclk_div?1:2);
//We need to calculate the pixel clock
u32 sync_cycles=(SPG_LOAD.hcount+1)*(SPG_LOAD.vcount+1);
pvr_numscanlines=SPG_LOAD.vcount+1; pvr_numscanlines=SPG_LOAD.vcount+1;
Line_Cycles=(u32)((u64)SH4_MAIN_CLOCK*(u64)(SPG_LOAD.hcount+1)/(u64)pixel_clock); Line_Cycles=(u32)((u64)SH4_MAIN_CLOCK*(u64)(SPG_LOAD.hcount+1)/(u64)pixel_clock);
float scale_x = 1;
float scale_y = 1;
if (SPG_CONTROL.interlace) if (SPG_CONTROL.interlace)
{ {
//this is a temp hack //this is a temp hack
@ -59,19 +56,15 @@ void CalculateSync()
rend_set_fb_scale(scale_x,scale_y); rend_set_fb_scale(scale_x,scale_y);
//Frame_Cycles=(u64)DCclock*(u64)sync_cycles/(u64)pixel_clock;
Frame_Cycles=pvr_numscanlines*Line_Cycles; Frame_Cycles=pvr_numscanlines*Line_Cycles;
prv_cur_scanline=0; prv_cur_scanline=0;
sh4_sched_request(vblank_schid,Line_Cycles); sh4_sched_request(vblank_schid,Line_Cycles);
} }
double speed_load_mspdf;
int mips_counter; int mips_counter;
double full_rps; static double full_rps;
static u32 lightgun_line = 0xffff; static u32 lightgun_line = 0xffff;
static u32 lightgun_hpos; static u32 lightgun_hpos;

View file

@ -57,10 +57,9 @@ DynarecCodeEntryPtr DYNACALL bm_GetCodeByVAddr(u32 addr)
if (!mmu_enabled()) if (!mmu_enabled())
#endif #endif
return bm_GetCode(addr); return bm_GetCode(addr);
#ifndef NO_MMU #ifndef NO_MMU
else if (unlikely(addr & 1))
{
if (addr & 1)
{ {
switch (addr) switch (addr)
{ {
@ -97,14 +96,13 @@ DynarecCodeEntryPtr DYNACALL bm_GetCodeByVAddr(u32 addr)
u32 paddr; u32 paddr;
u32 rv = mmu_instruction_translation(addr, paddr); u32 rv = mmu_instruction_translation(addr, paddr);
if (rv != MMU_ERROR_NONE) if (unlikely(rv != MMU_ERROR_NONE))
{ {
DoMMUException(addr, rv, MMU_TT_IREAD); DoMMUException(addr, rv, MMU_TT_IREAD);
mmu_instruction_translation(next_pc, paddr); mmu_instruction_translation(next_pc, paddr);
} }
return bm_GetCode(paddr); return bm_GetCode(paddr);
}
#endif #endif
} }
@ -115,7 +113,7 @@ RuntimeBlockInfoPtr DYNACALL bm_GetBlock(u32 addr)
DynarecCodeEntryPtr cde = bm_GetCode(addr); // Returns RX ptr DynarecCodeEntryPtr cde = bm_GetCode(addr); // Returns RX ptr
if (cde == ngen_FailedToFindBlock) if (cde == ngen_FailedToFindBlock)
return NULL; return nullptr;
else else
return bm_GetBlock((void*)cde); // Returns RX pointer return bm_GetBlock((void*)cde); // Returns RX pointer
} }
@ -124,18 +122,18 @@ RuntimeBlockInfoPtr DYNACALL bm_GetBlock(u32 addr)
RuntimeBlockInfoPtr bm_GetBlock(void* dynarec_code) RuntimeBlockInfoPtr bm_GetBlock(void* dynarec_code)
{ {
if (blkmap.empty()) if (blkmap.empty())
return NULL; return nullptr;
void *dynarecrw = CC_RX2RW(dynarec_code); void *dynarecrw = CC_RX2RW(dynarec_code);
// Returns a block who's code addr is bigger than dynarec_code (or end) // Returns a block who's code addr is bigger than dynarec_code (or end)
auto iter = blkmap.upper_bound(dynarecrw); auto iter = blkmap.upper_bound(dynarecrw);
if (iter == blkmap.begin()) if (iter == blkmap.begin())
return NULL; return nullptr;
iter--; // Need to go back to find the potential candidate iter--; // Need to go back to find the potential candidate
// However it might be out of bounds, check for that // However it might be out of bounds, check for that
if ((u8*)iter->second->code + iter->second->host_code_size < (u8*)dynarec_code) if ((u8*)iter->second->code + iter->second->host_code_size < (u8*)dynarec_code)
return NULL; return nullptr;
verify(iter->second->contains_code((u8*)dynarecrw)); verify(iter->second->contains_code((u8*)dynarecrw));
return iter->second; return iter->second;
@ -151,7 +149,7 @@ RuntimeBlockInfoPtr bm_GetStaleBlock(void* dynarec_code)
{ {
void *dynarecrw = CC_RX2RW(dynarec_code); void *dynarecrw = CC_RX2RW(dynarec_code);
if (del_blocks.empty()) if (del_blocks.empty())
return NULL; return nullptr;
// Start from the end to get the youngest one // Start from the end to get the youngest one
auto it = del_blocks.end(); auto it = del_blocks.end();
do do
@ -161,7 +159,7 @@ RuntimeBlockInfoPtr bm_GetStaleBlock(void* dynarec_code)
return *it; return *it;
} while (it != del_blocks.begin()); } while (it != del_blocks.begin());
return NULL; return nullptr;
} }
void bm_AddBlock(RuntimeBlockInfo* blk) void bm_AddBlock(RuntimeBlockInfo* blk)
@ -587,8 +585,7 @@ void bm_RamWriteAccess(u32 addr)
unprotected_pages[addr / PAGE_SIZE] = true; unprotected_pages[addr / PAGE_SIZE] = true;
bm_UnlockPage(addr); bm_UnlockPage(addr);
set<RuntimeBlockInfo*>& block_list = blocks_per_page[addr / PAGE_SIZE]; set<RuntimeBlockInfo*>& block_list = blocks_per_page[addr / PAGE_SIZE];
vector<RuntimeBlockInfo*> list_copy; vector<RuntimeBlockInfo*> list_copy(block_list.begin(), block_list.end());
list_copy.insert(list_copy.begin(), block_list.begin(), block_list.end());
if (!list_copy.empty()) if (!list_copy.empty())
DEBUG_LOG(DYNAREC, "bm_RamWriteAccess write access to %08x pc %08x", addr, next_pc); DEBUG_LOG(DYNAREC, "bm_RamWriteAccess write access to %08x pc %08x", addr, next_pc);
for (auto& block : list_copy) for (auto& block : list_copy)

View file

@ -393,7 +393,7 @@ struct RegAlloc
} }
else else
{ {
verify(regs.type==FMT_V4 || regs.type==FMT_V2 || regs.type==FMT_F64); verify(regs.type==FMT_V4 || regs.type==FMT_F64);
for (u32 i=0; i<regs.count(); i++) for (u32 i=0; i<regs.count(); i++)
{ {

View file

@ -14,16 +14,12 @@ enum shil_param_type
FMT_F32, FMT_F32,
FMT_F64, FMT_F64,
FMT_V2,
FMT_V3,
FMT_V4, FMT_V4,
FMT_V8, FMT_V8,
FMT_V16, FMT_V16,
FMT_REG_BASE = FMT_I32, FMT_REG_BASE = FMT_I32,
FMT_VECTOR_BASE=FMT_V2, FMT_VECTOR_BASE = FMT_V4,
FMT_MASK=0xFFFF,
}; };
/* /*
@ -125,9 +121,10 @@ struct shil_param
bool is_vector() const { return type >= FMT_VECTOR_BASE; } bool is_vector() const { return type >= FMT_VECTOR_BASE; }
u32 count() const { return type==FMT_F64?2:type==FMT_V2?2: u32 count() const { return type == FMT_F64 ? 2
type==FMT_V3?3:type==FMT_V4?4:type==FMT_V8?8: : type == FMT_V4 ? 4
type==FMT_V16?16:1; } //count of hardware regs : type == FMT_V8 ? 8
: type == FMT_V16 ? 16 : 1; } //count of hardware regs
/* /*
Imms: Imms:
@ -145,9 +142,7 @@ struct shil_param
struct shil_opcode struct shil_opcode
{ {
shilop op; shilop op;
u32 Flow;
u32 flags; u32 flags;
u32 flags2;
shil_param rd,rd2; shil_param rd,rd2;
shil_param rs1,rs2,rs3; shil_param rs1,rs2,rs3;
@ -157,6 +152,7 @@ struct shil_opcode
bool delay_slot; bool delay_slot;
string dissasm() const; string dissasm() const;
u32 size() const { return flags & 0x7f; }
}; };
const char* shil_opcode_name(int op); const char* shil_opcode_name(int op);

View file

@ -914,18 +914,32 @@ shil_opc_end()
//shop_fipr //shop_fipr
shil_opc(fipr) shil_opc(fipr)
#if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
shil_canonical shil_canonical
( (
f32,f1,(float* fn, float* fm), f32,f1,(float* fn, float* fm),
float idp;
idp=fn[0]*fm[0]; // Using double for better precision on x86 (Sonic Adventure 2)
double idp = (double)fn[0] * fm[0];
idp += (double)fn[1] * fm[1];
idp += (double)fn[2] * fm[2];
idp += (double)fn[3] * fm[3];
return fixNaN((float)idp);
)
#else
shil_canonical
(
f32,f1,(float* fn, float* fm),
float idp = fn[0] * fm[0];
idp += fn[1] * fm[1]; idp += fn[1] * fm[1];
idp += fn[2] * fm[2]; idp += fn[2] * fm[2];
idp += fn[3] * fm[3]; idp += fn[3] * fm[3];
return fixNaN(idp); return fixNaN(idp);
) )
#endif
shil_compile shil_compile
( (

View file

@ -223,7 +223,7 @@ private:
if (op.rs1.is_imm() && op.op == shop_readm && block->read_only if (op.rs1.is_imm() && op.op == shop_readm && block->read_only
&& (op.rs1._imm >> 12) >= (block->vaddr >> 12) && (op.rs1._imm >> 12) >= (block->vaddr >> 12)
&& (op.rs1._imm >> 12) <= ((block->vaddr + block->sh4_code_size - 1) >> 12) && (op.rs1._imm >> 12) <= ((block->vaddr + block->sh4_code_size - 1) >> 12)
&& (op.flags & 0x7f) <= 4) && op.size() <= 4)
{ {
bool doit = false; bool doit = false;
if (mmu_enabled()) if (mmu_enabled())
@ -240,7 +240,7 @@ private:
if (doit) if (doit)
{ {
u32 v; u32 v;
switch (op.flags & 0x7f) switch (op.size())
{ {
case 1: case 1:
v = (s32)(::s8)ReadMem8(op.rs1._imm); v = (s32)(::s8)ReadMem8(op.rs1._imm);
@ -566,7 +566,7 @@ private:
} }
// Attempt to eliminate them // Attempt to eliminate them
for (auto& alias : aliases) for (const auto& alias : aliases)
{ {
if (writeback_values.count(alias.first) > 0) if (writeback_values.count(alias.first) > 0)
continue; continue;

View file

@ -28,7 +28,7 @@
#define ssa_printf(...) DEBUG_LOG(DYNAREC, __VA_ARGS__) #define ssa_printf(...) DEBUG_LOG(DYNAREC, __VA_ARGS__)
template<typename nreg_t, typename nregf_t, bool explode_spans = true> template<typename nreg_t, typename nregf_t, bool _64bits = true>
class RegAlloc class RegAlloc
{ {
public: public:
@ -52,6 +52,7 @@ public:
void OpBegin(shil_opcode* op, int opid) void OpBegin(shil_opcode* op, int opid)
{ {
// TODO dup code with NeedsWriteBack
opnum = opid; opnum = opid;
if (op->op == shop_ifb) if (op->op == shop_ifb)
{ {
@ -79,17 +80,17 @@ public:
FlushReg((Sh4RegType)i, true); FlushReg((Sh4RegType)i, true);
} }
// Flush regs used by vector ops // Flush regs used by vector ops
if (op->rs1.is_reg() && op->rs1.count() > 1) if (IsVector(op->rs1))
{ {
for (int i = 0; i < op->rs1.count(); i++) for (int i = 0; i < op->rs1.count(); i++)
FlushReg((Sh4RegType)(op->rs1._reg + i), false); FlushReg((Sh4RegType)(op->rs1._reg + i), false);
} }
if (op->rs2.is_reg() && op->rs2.count() > 1) if (IsVector(op->rs2))
{ {
for (int i = 0; i < op->rs2.count(); i++) for (int i = 0; i < op->rs2.count(); i++)
FlushReg((Sh4RegType)(op->rs2._reg + i), false); FlushReg((Sh4RegType)(op->rs2._reg + i), false);
} }
if (op->rs3.is_reg() && op->rs3.count() > 1) if (IsVector(op->rs3))
{ {
for (int i = 0; i < op->rs3.count(); i++) for (int i = 0; i < op->rs3.count(); i++)
FlushReg((Sh4RegType)(op->rs3._reg + i), false); FlushReg((Sh4RegType)(op->rs3._reg + i), false);
@ -101,7 +102,7 @@ public:
AllocSourceReg(op->rs3); AllocSourceReg(op->rs3);
// Hard flush vector ops destination regs // Hard flush vector ops destination regs
// Note that this is incorrect if a reg is both src (scalar) and dest (vec). However such an op doesn't exist. // Note that this is incorrect if a reg is both src (scalar) and dest (vec). However such an op doesn't exist.
if (op->rd.is_reg() && op->rd.count() > 1) if (IsVector(op->rd))
{ {
for (int i = 0; i < op->rd.count(); i++) for (int i = 0; i < op->rd.count(); i++)
{ {
@ -109,7 +110,7 @@ public:
FlushReg((Sh4RegType)(op->rd._reg + i), true); FlushReg((Sh4RegType)(op->rd._reg + i), true);
} }
} }
if (op->rd2.is_reg() && op->rd2.count() > 1) if (IsVector(op->rd2))
{ {
for (int i = 0; i < op->rd2.count(); i++) for (int i = 0; i < op->rd2.count(); i++)
{ {
@ -171,41 +172,26 @@ public:
bool IsAllocAny(const shil_param& prm) bool IsAllocAny(const shil_param& prm)
{ {
if (prm.is_reg()) return IsAllocg(prm) || IsAllocf(prm);
{
bool rv = IsAllocAny(prm._reg);
if (prm.count() != 1)
{
for (u32 i = 1;i < prm.count(); i++)
verify(IsAllocAny((Sh4RegType)(prm._reg + i)) == rv);
}
return rv;
}
else
{
return false;
}
} }
bool IsAllocg(const shil_param& prm) bool IsAllocg(const shil_param& prm)
{ {
if (prm.is_reg()) if (prm.is_reg() && IsAllocg(prm._reg))
{ {
verify(prm.count() == 1); verify(prm.count() == 1);
return IsAllocg(prm._reg); return true;
} }
else
{
return false; return false;
} }
}
bool IsAllocf(const shil_param& prm) bool IsAllocf(const shil_param& prm)
{ {
if (prm.is_reg()) if (prm.is_reg())
{ {
verify(prm.count() == 1); if (!_64bits && prm.is_r64f())
return IsAllocf(prm._reg); return false;
return IsAllocf(prm._reg, prm.count());
} }
else else
{ {
@ -223,6 +209,9 @@ public:
nregf_t mapf(const shil_param& prm) nregf_t mapf(const shil_param& prm)
{ {
verify(IsAllocf(prm)); verify(IsAllocf(prm));
if (_64bits)
verify(prm.count() <= 2);
else
verify(prm.count() == 1); verify(prm.count() == 1);
return mapf(prm._reg); return mapf(prm._reg);
} }
@ -257,15 +246,20 @@ public:
virtual void Preload(u32 reg, nreg_t nreg) = 0; virtual void Preload(u32 reg, nreg_t nreg) = 0;
virtual void Writeback(u32 reg, nreg_t nreg) = 0; virtual void Writeback(u32 reg, nreg_t nreg) = 0;
virtual void Preload_FPU(u32 reg, nregf_t nreg) = 0; virtual void Preload_FPU(u32 reg, nregf_t nreg, bool _64bit) = 0;
virtual void Writeback_FPU(u32 reg, nregf_t nreg) = 0; virtual void Writeback_FPU(u32 reg, nregf_t nreg, bool _64bit) = 0;
// merge reg1 (least significant 32 bits) and reg2 (most significant 32 bits) into reg1 (64-bit result)
virtual void Merge_FPU(nregf_t reg1, nregf_t reg2) { die("not implemented"); }
// shift given 64-bit reg right by 32 bits
virtual void Shift_FPU(nregf_t reg) { die("not implemented"); }
private: private:
struct reg_alloc { struct reg_alloc {
u32 host_reg; u32 host_reg;
u16 version; u16 version[2];
bool write_back; bool write_back;
bool dirty; bool dirty;
bool _64bit;
}; };
bool IsFloat(Sh4RegType reg) bool IsFloat(Sh4RegType reg)
@ -285,11 +279,15 @@ private:
return (nregf_t)reg_alloced[reg].host_reg; return (nregf_t)reg_alloced[reg].host_reg;
} }
bool IsAllocf(Sh4RegType reg) bool IsAllocf(Sh4RegType reg, int size)
{ {
if (!IsFloat(reg)) if (!IsFloat(reg))
return false; return false;
return reg_alloced.find(reg) != reg_alloced.end(); auto it = reg_alloced.find(reg);
if (it == reg_alloced.end())
return false;
verify(it->second._64bit == (size == 2));
return true;
} }
bool IsAllocg(Sh4RegType reg) bool IsAllocg(Sh4RegType reg)
@ -299,9 +297,14 @@ private:
return reg_alloced.find(reg) != reg_alloced.end(); return reg_alloced.find(reg) != reg_alloced.end();
} }
bool IsAllocAny(Sh4RegType reg) bool IsVector(const shil_param& param)
{ {
return IsAllocg(reg) || IsAllocf(reg); return param.is_reg() && param.count() > (_64bits ? 2 : 1);
}
bool ContainsReg(const shil_param& param, Sh4RegType reg)
{
return param.is_reg() && reg >= param._reg && reg < (Sh4RegType)(param._reg + param.count());
} }
void WriteBackReg(Sh4RegType reg_num, struct reg_alloc& reg_alloc) void WriteBackReg(Sh4RegType reg_num, struct reg_alloc& reg_alloc)
@ -310,9 +313,9 @@ private:
{ {
if (!fast_forwarding) if (!fast_forwarding)
{ {
ssa_printf("WB %s.%d <- %cx", name_reg(reg_num).c_str(), reg_alloc.version, 'a' + reg_alloc.host_reg); ssa_printf("WB %s.%d <- %cx", name_reg(reg_num).c_str(), reg_alloc.version[0], 'a' + reg_alloc.host_reg);
if (IsFloat(reg_num)) if (IsFloat(reg_num))
Writeback_FPU(reg_num, (nregf_t)reg_alloc.host_reg); Writeback_FPU(reg_num, (nregf_t)reg_alloc.host_reg, reg_alloc._64bit);
else else
Writeback(reg_num, (nreg_t)reg_alloc.host_reg); Writeback(reg_num, (nreg_t)reg_alloc.host_reg);
} }
@ -320,12 +323,14 @@ private:
reg_alloc.dirty = false; reg_alloc.dirty = false;
} }
} }
protected:
void FlushReg(Sh4RegType reg_num, bool hard) void FlushReg(Sh4RegType reg_num, bool hard, bool write_if_dirty = false)
{ {
auto reg = reg_alloced.find(reg_num); auto reg = reg_alloced.find(reg_num);
if (reg != reg_alloced.end()) if (reg != reg_alloced.end())
{ {
if (write_if_dirty && reg->second.dirty)
reg->second.write_back = true;
WriteBackReg(reg->first, reg->second); WriteBackReg(reg->first, reg->second);
if (hard) if (hard)
{ {
@ -339,6 +344,7 @@ private:
} }
} }
private:
void FlushAllRegs(bool hard) void FlushAllRegs(bool hard)
{ {
if (hard) if (hard)
@ -355,8 +361,11 @@ private:
void AllocSourceReg(const shil_param& param) void AllocSourceReg(const shil_param& param)
{ {
if (param.is_reg() && param.count() == 1) // TODO EXPLODE_SPANS? if (param.is_reg()
&& ((_64bits && param.count() <= 2) || (!_64bits && param.count() == 1)))
{ {
Handle64bitRegisters(param, true);
auto it = reg_alloced.find(param._reg); auto it = reg_alloced.find(param._reg);
if (it == reg_alloced.end()) if (it == reg_alloced.end())
{ {
@ -381,16 +390,24 @@ private:
host_reg = host_fregs.back(); host_reg = host_fregs.back();
host_fregs.pop_back(); host_fregs.pop_back();
} }
reg_alloced[param._reg] = { host_reg, param.version[0], false, false }; if (param.is_r64f())
reg_alloced[param._reg] = { host_reg, { param.version[0], param.version[1] }, false, false, true };
else
reg_alloced[param._reg] = { host_reg, { param.version[0] }, false, false, false };
if (!fast_forwarding) if (!fast_forwarding)
{ {
ssa_printf("PL %s.%d -> %cx", name_reg(param._reg).c_str(), param.version[0], 'a' + host_reg); ssa_printf("PL %s.%d -> %cx", name_reg(param._reg).c_str(), param.version[0], 'a' + host_reg);
if (IsFloat(param._reg)) if (IsFloat(param._reg))
Preload_FPU(param._reg, (nregf_t)host_reg); Preload_FPU(param._reg, (nregf_t)host_reg, param.count() == 2);
else else
Preload(param._reg, (nreg_t)host_reg); Preload(param._reg, (nreg_t)host_reg);
} }
} }
else
{
verify(it->second._64bit == (param.count() == 2));
}
verify(param.count() == 1 || reg_alloced.find((Sh4RegType)(param._reg + 1)) == reg_alloced.end());
} }
} }
@ -400,14 +417,29 @@ private:
{ {
shil_opcode* op = &block->oplist[i]; shil_opcode* op = &block->oplist[i];
// if a subsequent op needs all or some regs flushed to mem // if a subsequent op needs all or some regs flushed to mem
switch (op->op)
{
// TODO we could look at the ifb op to optimize what to flush // TODO we could look at the ifb op to optimize what to flush
if (op->op == shop_ifb || (mmu_enabled() && (op->op == shop_readm || op->op == shop_writem || op->op == shop_pref))) case shop_ifb:
return true; return true;
if (op->op == shop_sync_sr && (/*reg == reg_sr_T ||*/ reg == reg_sr_status || reg == reg_old_sr_status || (reg >= reg_r0 && reg <= reg_r7) case shop_readm:
|| (reg >= reg_r0_Bank && reg <= reg_r7_Bank))) case shop_writem:
case shop_pref:
if (mmu_enabled())
return true; return true;
if (op->op == shop_sync_fpscr && (reg == reg_fpscr || reg == reg_old_fpscr || (reg >= reg_fr_0 && reg <= reg_xf_15))) break;
case shop_sync_sr:
if (/*reg == reg_sr_T ||*/ reg == reg_sr_status || reg == reg_old_sr_status || (reg >= reg_r0 && reg <= reg_r7)
|| (reg >= reg_r0_Bank && reg <= reg_r7_Bank))
return true; return true;
break;
case shop_sync_fpscr:
if (reg == reg_fpscr || reg == reg_old_fpscr || (reg >= reg_fr_0 && reg <= reg_xf_15))
return true;
break;
default:
break;
}
// if reg is used by a subsequent vector op that doesn't use reg allocation // if reg is used by a subsequent vector op that doesn't use reg allocation
if (UsesReg(op, reg, version, true)) if (UsesReg(op, reg, version, true))
return true; return true;
@ -423,8 +455,11 @@ private:
void AllocDestReg(const shil_param& param) void AllocDestReg(const shil_param& param)
{ {
if (param.is_reg() && param.count() == 1) // TODO EXPLODE_SPANS? if (param.is_reg()
&& ((_64bits && param.count() <= 2) || (!_64bits && param.count() == 1)))
{ {
Handle64bitRegisters(param, false);
auto it = reg_alloced.find(param._reg); auto it = reg_alloced.find(param._reg);
if (it == reg_alloced.end()) if (it == reg_alloced.end())
{ {
@ -449,7 +484,21 @@ private:
host_reg = host_fregs.back(); host_reg = host_fregs.back();
host_fregs.pop_back(); host_fregs.pop_back();
} }
reg_alloced[param._reg] = { host_reg, param.version[0], NeedsWriteBack(param._reg, param.version[0]), true }; if (param.is_r64f())
reg_alloced[param._reg] = {
host_reg,
{ param.version[0], param.version[1] },
NeedsWriteBack(param._reg, param.version[0])
|| NeedsWriteBack((Sh4RegType)(param._reg + 1), param.version[1]),
true,
true };
else
reg_alloced[param._reg] = {
host_reg,
{ param.version[0] },
NeedsWriteBack(param._reg, param.version[0]),
true,
false };
ssa_printf(" %s.%d -> %cx %s", name_reg(param._reg).c_str(), param.version[0], 'a' + host_reg, reg_alloced[param._reg].write_back ? "(wb)" : ""); ssa_printf(" %s.%d -> %cx %s", name_reg(param._reg).c_str(), param.version[0], 'a' + host_reg, reg_alloced[param._reg].write_back ? "(wb)" : "");
} }
else else
@ -458,9 +507,17 @@ private:
verify(!reg.write_back); verify(!reg.write_back);
reg.write_back = NeedsWriteBack(param._reg, param.version[0]); reg.write_back = NeedsWriteBack(param._reg, param.version[0]);
reg.dirty = true; reg.dirty = true;
reg.version = param.version[0]; reg.version[0] = param.version[0];
verify(reg._64bit == param.is_r64f());
if (param.is_r64f())
{
reg.version[1] = param.version[1];
// TODO this is handled by Handle64BitsRegisters()
reg.write_back = reg.write_back || NeedsWriteBack((Sh4RegType)(param._reg + 1), param.version[1]);
}
} }
verify(reg_alloced[param._reg].dirty); verify(reg_alloced[param._reg].dirty);
verify(param.count() == 1 || reg_alloced.find((Sh4RegType)(param._reg + 1)) == reg_alloced.end());
} }
} }
@ -495,7 +552,8 @@ private:
{ {
op = &block->oplist[i]; op = &block->oplist[i];
// Vector ops don't use reg alloc // Vector ops don't use reg alloc
if (UsesReg(op, reg.first, reg.second.version, false)) if (UsesReg(op, reg.first, reg.second.version[0], false)
|| (reg.second._64bit && UsesReg(op, (Sh4RegType)(reg.first + 1), reg.second.version[1], false)))
{ {
first_use = i; first_use = i;
break; break;
@ -531,8 +589,9 @@ private:
// It's possible that the same host reg is allocated to a source operand // It's possible that the same host reg is allocated to a source operand
// and to the (future) dest operand. In this case we want to keep both mappings // and to the (future) dest operand. In this case we want to keep both mappings
// until the current op is done. // until the current op is done.
WriteBackReg(spilled_reg, reg_alloced[spilled_reg]); reg_alloc& alloc = reg_alloced[spilled_reg];
u32 host_reg = reg_alloced[spilled_reg].host_reg; WriteBackReg(spilled_reg, alloc);
u32 host_reg = alloc.host_reg;
if (IsFloat(spilled_reg)) if (IsFloat(spilled_reg))
host_fregs.push_front((nregf_t)host_reg); host_fregs.push_front((nregf_t)host_reg);
else else
@ -541,24 +600,19 @@ private:
} }
} }
bool IsVectorOp(shil_opcode* op)
{
return op->rs1.count() > 1 || op->rs2.count() > 1 || op->rs3.count() > 1 || op->rd.count() > 1 || op->rd2.count() > 1;
}
bool UsesReg(shil_opcode* op, Sh4RegType reg, u32 version, bool vector) bool UsesReg(shil_opcode* op, Sh4RegType reg, u32 version, bool vector)
{ {
if (op->rs1.is_reg() && reg >= op->rs1._reg && reg < (Sh4RegType)(op->rs1._reg + op->rs1.count()) if (ContainsReg(op->rs1, reg)
&& version == op->rs1.version[reg - op->rs1._reg] && version == op->rs1.version[reg - op->rs1._reg]
&& vector == (op->rs1.count() > 1)) && vector == IsVector(op->rs1))
return true; return true;
if (op->rs2.is_reg() && reg >= op->rs2._reg && reg < (Sh4RegType)(op->rs2._reg + op->rs2.count()) if (ContainsReg(op->rs2, reg)
&& version == op->rs2.version[reg - op->rs2._reg] && version == op->rs2.version[reg - op->rs2._reg]
&& vector == (op->rs2.count() > 1)) && vector == IsVector(op->rs2))
return true; return true;
if (op->rs3.is_reg() && reg >= op->rs3._reg && reg < (Sh4RegType)(op->rs3._reg + op->rs3.count()) if (ContainsReg(op->rs3, reg)
&& version == op->rs3.version[reg - op->rs3._reg] && version == op->rs3.version[reg - op->rs3._reg]
&& vector == (op->rs3.count() > 1)) && vector == IsVector(op->rs3))
return true; return true;
return false; return false;
@ -566,14 +620,68 @@ private:
bool DefsReg(shil_opcode* op, Sh4RegType reg, bool vector) bool DefsReg(shil_opcode* op, Sh4RegType reg, bool vector)
{ {
if (op->rd.is_reg() && reg >= op->rd._reg && reg < (Sh4RegType)(op->rd._reg + op->rd.count()) if (ContainsReg(op->rd, reg) && vector == IsVector(op->rd))
&& vector == (op->rd.count() > 1))
return true; return true;
if (op->rd2.is_reg() && reg >= op->rd2._reg && reg < (Sh4RegType)(op->rd2._reg + op->rd2.count()) if (ContainsReg(op->rd2, reg) && vector == IsVector(op->rd2))
&& vector == (op->rd2.count() > 1))
return true; return true;
return false; return false;
} }
void Handle64bitRegisters(const shil_param& param, bool source)
{
if (!(_64bits && (param.is_r32f() || param.is_r64f())))
return;
auto it = reg_alloced.find(param._reg);
if (it != reg_alloced.end() && it->second._64bit != param.is_r64f())
{
if (param.is_r64f())
{
// Try to merge existing halves
auto it2 = reg_alloced.find((Sh4RegType)(param._reg + 1));
if (it2 != reg_alloced.end())
{
if (source)
it->second.dirty = it->second.dirty || it2->second.dirty;
else
it->second.dirty = false;
it->second._64bit = true;
nregf_t host_reg2 = (nregf_t)it2->second.host_reg;
reg_alloced.erase(it2);
Merge_FPU((nregf_t)it->second.host_reg, host_reg2);
return;
}
}
// Write back the 64-bit register even if used as destination because the other half needs to be saved
FlushReg(it->first, param.is_r64f(), source || it->second._64bit);
if (!param.is_r64f())
{
// Reuse existing reg
it->second._64bit = false;
}
}
if (param.is_r64f())
{
auto it2 = reg_alloced.find((Sh4RegType)(param._reg + 1));
if (it2 != reg_alloced.end())
FlushReg(it2->first, true, source);
}
else if (param._reg & 1)
{
auto it2 = reg_alloced.find((Sh4RegType)(param._reg - 1));
if (it2 != reg_alloced.end() && it2->second._64bit)
{
// Write back even when used as destination because the other half needs to be saved
FlushReg(it2->first, false, true);
reg_alloc alloc = it2->second;
Shift_FPU((nregf_t)alloc.host_reg);
alloc._64bit = false;
alloc.version[0] = alloc.version[1];
reg_alloced.erase(it2);
reg_alloced[param._reg] = alloc;
}
}
}
#if 0 #if 0
// Currently unused. Doesn't seem to help much // Currently unused. Doesn't seem to help much
bool DefsReg(int from, int to, Sh4RegType reg) bool DefsReg(int from, int to, Sh4RegType reg)

View file

@ -73,7 +73,7 @@ INLINE void Denorm32(float &value)
#define CHECK_FPU_32(v) v = fixNaN(v) #define CHECK_FPU_32(v) v = fixNaN(v)
#define CHECK_FPU_64(v) #define CHECK_FPU_64(v) v = fixNaN64(v)
//fadd <FREG_M>,<FREG_N> //fadd <FREG_M>,<FREG_N>
@ -116,7 +116,7 @@ sh4op(i1111_nnnn_mmmm_0001)
double drn=GetDR(n), drm=GetDR(m); double drn=GetDR(n), drm=GetDR(m);
drn-=drm; drn-=drm;
//dr[n] -= dr[m]; CHECK_FPU_64(drn);
SetDR(n,drn); SetDR(n,drn);
} }
} }
@ -137,7 +137,7 @@ sh4op(i1111_nnnn_mmmm_0010)
double drn=GetDR(n), drm=GetDR(m); double drn=GetDR(n), drm=GetDR(m);
drn*=drm; drn*=drm;
//dr[n] *= dr[m]; CHECK_FPU_64(drn);
SetDR(n,drn); SetDR(n,drn);
} }
} }
@ -160,6 +160,7 @@ sh4op(i1111_nnnn_mmmm_0011)
double drn=GetDR(n), drm=GetDR(m); double drn=GetDR(n), drm=GetDR(m);
drn/=drm; drn/=drm;
CHECK_FPU_64(drn);
SetDR(n,drn); SetDR(n,drn);
} }
} }
@ -506,14 +507,20 @@ sh4op(i1111_nnmm_1110_1101)
int m=(GetN(op)&0x3)<<2; int m=(GetN(op)&0x3)<<2;
if(fpscr.PR ==0) if(fpscr.PR ==0)
{ {
float idp; #if HOST_CPU == CPU_X86 || HOST_CPU == CPU_X64
idp=fr[n+0]*fr[m+0]; double idp = (double)fr[n + 0] * fr[m + 0];
idp+=fr[n+1]*fr[m+1]; idp += (double)fr[n + 1] * fr[m + 1];
idp+=fr[n+2]*fr[m+2]; idp += (double)fr[n + 2] * fr[m + 2];
idp+=fr[n+3]*fr[m+3]; idp += (double)fr[n + 3] * fr[m + 3];
float rv = (float)idp;
CHECK_FPU_32(idp); #else
fr[n+3]=idp; float rv = fr[n + 0] * fr[m + 0];
rv += fr[n + 1] * fr[m + 1];
rv += fr[n + 2] * fr[m + 2];
rv += fr[n + 3] * fr[m + 3];
#endif
CHECK_FPU_32(rv);
fr[n + 3] = rv;
} }
else else
{ {
@ -598,7 +605,6 @@ sh4op(i1111_1011_1111_1101)
//fschg //fschg
sh4op(i1111_0011_1111_1101) sh4op(i1111_0011_1111_1101)
{ {
//iNimp("fschg");
fpscr.SZ = 1 - fpscr.SZ; fpscr.SZ = 1 - fpscr.SZ;
} }
@ -616,8 +622,9 @@ sh4op(i1111_nnnn_0110_1101)
{ {
//Operation _can_ be done on sh4 //Operation _can_ be done on sh4
u32 n = GetN(op)>>1; u32 n = GetN(op)>>1;
f64 v = sqrt(GetDR(n));
SetDR(n,sqrt(GetDR(n))); CHECK_FPU_64(v);
SetDR(n, v);
} }
} }
@ -656,7 +663,6 @@ sh4op(i1111_nnnn_0011_1101)
//fmac <FREG_0>,<FREG_M>,<FREG_N> //fmac <FREG_0>,<FREG_M>,<FREG_N>
sh4op(i1111_nnnn_mmmm_1110) sh4op(i1111_nnnn_mmmm_1110)
{ {
//iNimp("fmac <FREG_0>,<FREG_M>,<FREG_N>");
if (fpscr.PR==0) if (fpscr.PR==0)
{ {
u32 n = GetN(op); u32 n = GetN(op);
@ -675,8 +681,6 @@ sh4op(i1111_nnnn_mmmm_1110)
//ftrv xmtrx,<FV_N> //ftrv xmtrx,<FV_N>
sh4op(i1111_nn01_1111_1101) sh4op(i1111_nn01_1111_1101)
{ {
//iNimp("ftrv xmtrx,<FV_N>");
/* /*
XF[0] XF[4] XF[8] XF[12] FR[n] FR[n] XF[0] XF[4] XF[8] XF[12] FR[n] FR[n]
XF[1] XF[5] XF[9] XF[13] * FR[n+1] -> FR[n+1] XF[1] XF[5] XF[9] XF[13] * FR[n+1] -> FR[n+1]

View file

@ -62,7 +62,7 @@ u32 mmu_full_lookup(u32 va, const TLB_Entry **entry, u32& rv);
#ifdef FAST_MMU #ifdef FAST_MMU
static INLINE u32 mmu_instruction_translation(u32 va, u32& rv) static INLINE u32 mmu_instruction_translation(u32 va, u32& rv)
{ {
if (va & 1) if (unlikely(va & 1))
return MMU_ERROR_BADADDR; return MMU_ERROR_BADADDR;
if (fast_reg_lut[va >> 29] != 0) if (fast_reg_lut[va >> 29] != 0)
{ {
@ -100,7 +100,7 @@ void DoMMUException(u32 addr, u32 error_code, u32 access_type);
{ {
u32 addr; u32 addr;
u32 rv = mmu_data_translation<MMU_TT_DREAD, T>(adr, addr); u32 rv = mmu_data_translation<MMU_TT_DREAD, T>(adr, addr);
if (rv != MMU_ERROR_NONE) if (unlikely(rv != MMU_ERROR_NONE))
{ {
DoMMUException(adr, rv, MMU_TT_DREAD); DoMMUException(adr, rv, MMU_TT_DREAD);
*exception_occurred = 1; *exception_occurred = 1;
@ -118,7 +118,7 @@ void DoMMUException(u32 addr, u32 error_code, u32 access_type);
{ {
u32 addr; u32 addr;
u32 rv = mmu_data_translation<MMU_TT_DWRITE, T>(adr, addr); u32 rv = mmu_data_translation<MMU_TT_DWRITE, T>(adr, addr);
if (rv != MMU_ERROR_NONE) if (unlikely(rv != MMU_ERROR_NONE))
{ {
DoMMUException(adr, rv, MMU_TT_DWRITE); DoMMUException(adr, rv, MMU_TT_DWRITE);
return 1; return 1;

View file

@ -33,6 +33,7 @@ void ubc_term();
void tmu_init(); void tmu_init();
void tmu_reset(bool hard); void tmu_reset(bool hard);
void tmu_term(); void tmu_term();
u32 read_TMU_TCNTch(u32 ch);
void ccn_init(); void ccn_init();
void ccn_reset(); void ccn_reset();

View file

@ -276,7 +276,7 @@ struct Sh4Context
sr_status_t old_sr; sr_status_t old_sr;
fpscr_t old_fpscr; fpscr_t old_fpscr;
volatile u32 CpuRunning; u32 CpuRunning;
int sh4_sched_next; int sh4_sched_next;
u32 interrupt_pend; u32 interrupt_pend;

View file

@ -84,17 +84,17 @@ static void map_area4(u32 base)
//AREA 5 -- Ext. Device //AREA 5 -- Ext. Device
//Read Ext.Device //Read Ext.Device
template <u32 sz,class T> template <typename T>
T DYNACALL ReadMem_extdev_T(u32 addr) T DYNACALL ReadMem_extdev_T(u32 addr)
{ {
return (T)libExtDevice_ReadMem_A5(addr,sz); return (T)libExtDevice_ReadMem_A5(addr, sizeof(T));
} }
//Write Ext.Device //Write Ext.Device
template <u32 sz,class T> template <typename T>
void DYNACALL WriteMem_extdev_T(u32 addr, T data) void DYNACALL WriteMem_extdev_T(u32 addr, T data)
{ {
libExtDevice_WriteMem_A5(addr,data,sz); libExtDevice_WriteMem_A5(addr, data, sizeof(T));
} }
_vmem_handler area5_handler; _vmem_handler area5_handler;

View file

@ -155,7 +155,7 @@ offset>>=2;
//Region P4 //Region P4
//Read P4 //Read P4
template <u32 sz,class T> template <class T>
T DYNACALL ReadMem_P4(u32 addr) T DYNACALL ReadMem_P4(u32 addr)
{ {
switch((addr>>24)&0xFF) switch((addr>>24)&0xFF)
@ -228,7 +228,7 @@ T DYNACALL ReadMem_P4(u32 addr)
} }
//Write P4 //Write P4
template <u32 sz,class T> template <class T>
void DYNACALL WriteMem_P4(u32 addr,T data) void DYNACALL WriteMem_P4(u32 addr,T data)
{ {
/*if (((addr>>26)&0x7)==7) /*if (((addr>>26)&0x7)==7)
@ -406,37 +406,40 @@ void DYNACALL WriteMem_sq(u32 addr,T data)
#define OUT_OF_RANGE(reg) INFO_LOG(SH4, "Out of range on register %s index %x", reg, addr) #define OUT_OF_RANGE(reg) INFO_LOG(SH4, "Out of range on register %s index %x", reg, addr)
//Read Area7 //Read Area7
template <u32 sz,class T> template <typename T>
T DYNACALL ReadMem_area7(u32 addr) T DYNACALL ReadMem_area7(u32 addr)
{ {
/* // TMU TCNT0 is by far the most frequently read register (x100 the second most read)
if (likely(addr==0xffd80024)) if (likely(addr == 0xFFD8000C))
{ {
return TMU_TCNT(2); //return (T)sh4_rio_read<sizeof(T)>(TMU, 0xC);
return (T)read_TMU_TCNTch(0);
} }
else if (likely(addr==0xFFD8000C)) else if (likely(addr == 0xFF000028))
{
return TMU_TCNT(0);
}
else */if (likely(addr==0xFF000028))
{ {
return CCN_INTEVT; return CCN_INTEVT;
} }
else if (likely(addr==0xFFA0002C))
{
return DMAC_CHCR(2).full;
}
//else if (addr==)
//printf("%08X\n",addr);
addr&=0x1FFFFFFF;
u32 map_base = addr >> 16; u32 map_base = addr >> 16;
addr &= 0xFF;
switch (map_base & 0x1FFF) switch (map_base & 0x1FFF)
{ {
case A7_REG_HASH(CCN_BASE_addr): case A7_REG_HASH(TMU_BASE_addr):
if (addr<=0x1F000044) if (addr <= 0x2C)
{ {
return (T)sh4_rio_read<sz>(CCN,addr & 0xFF); return (T)sh4_rio_read<sizeof(T)>(TMU, addr);
}
else
{
OUT_OF_RANGE("TMU");
return 0;
}
break;
case A7_REG_HASH(CCN_BASE_addr):
if (addr <= 0x44)
{
return (T)sh4_rio_read<sizeof(T)>(CCN, addr);
} }
else else
{ {
@ -445,10 +448,46 @@ T DYNACALL ReadMem_area7(u32 addr)
} }
break; break;
case A7_REG_HASH(UBC_BASE_addr): case A7_REG_HASH(DMAC_BASE_addr):
if (addr<=0x1F200020) if (addr <= 0x40)
{ {
return (T)sh4_rio_read<sz>(UBC,addr & 0xFF); return (T)sh4_rio_read<sizeof(T)>(DMAC, addr);
}
else
{
OUT_OF_RANGE("DMAC");
return 0;
}
break;
case A7_REG_HASH(INTC_BASE_addr):
if (addr <= 0x10)
{
return (T)sh4_rio_read<sizeof(T)>(INTC, addr);
}
else
{
OUT_OF_RANGE("INTC");
return 0;
}
break;
case A7_REG_HASH(RTC_BASE_addr):
if (addr <= 0x3C)
{
return (T)sh4_rio_read<sizeof(T)>(RTC, addr);
}
else
{
OUT_OF_RANGE("RTC");
return 0;
}
break;
case A7_REG_HASH(UBC_BASE_addr):
if (addr <= 0x20)
{
return (T)sh4_rio_read<sizeof(T)>(UBC, addr);
} }
else else
{ {
@ -458,9 +497,9 @@ T DYNACALL ReadMem_area7(u32 addr)
break; break;
case A7_REG_HASH(BSC_BASE_addr): case A7_REG_HASH(BSC_BASE_addr):
if (addr<=0x1F800048) if (addr <= 0x48)
{ {
return (T)sh4_rio_read<sz>(BSC,addr & 0xFF); return (T)sh4_rio_read<sizeof(T)>(BSC, addr);
} }
else else
{ {
@ -477,24 +516,10 @@ T DYNACALL ReadMem_area7(u32 addr)
INFO_LOG(SH4, "Read from write-only registers [dram settings 3]"); INFO_LOG(SH4, "Read from write-only registers [dram settings 3]");
return 0; return 0;
case A7_REG_HASH(DMAC_BASE_addr):
if (addr<=0x1FA00040)
{
return (T)sh4_rio_read<sz>(DMAC,addr & 0xFF);
}
else
{
OUT_OF_RANGE("DMAC");
return 0;
}
break;
case A7_REG_HASH(CPG_BASE_addr): case A7_REG_HASH(CPG_BASE_addr):
if (addr<=0x1FC00010) if (addr <= 0x10)
{ {
return (T)sh4_rio_read<sz>(CPG,addr & 0xFF); return (T)sh4_rio_read<sizeof(T)>(CPG, addr);
} }
else else
{ {
@ -503,46 +528,10 @@ T DYNACALL ReadMem_area7(u32 addr)
} }
break; break;
case A7_REG_HASH(RTC_BASE_addr):
if (addr<=0x1FC8003C)
{
return (T)sh4_rio_read<sz>(RTC,addr & 0xFF);
}
else
{
OUT_OF_RANGE("RTC");
return 0;
}
break;
case A7_REG_HASH(INTC_BASE_addr):
if (addr<=0x1FD00010)
{
return (T)sh4_rio_read<sz>(INTC,addr & 0xFF);
}
else
{
OUT_OF_RANGE("INTC");
return 0;
}
break;
case A7_REG_HASH(TMU_BASE_addr):
if (addr<=0x1FD8002C)
{
return (T)sh4_rio_read<sz>(TMU,addr & 0xFF);
}
else
{
OUT_OF_RANGE("TMU");
return 0;
}
break;
case A7_REG_HASH(SCI_BASE_addr): case A7_REG_HASH(SCI_BASE_addr):
if (addr<=0x1FE0001C) if (addr <= 0x1C)
{ {
return (T)sh4_rio_read<sz>(SCI,addr & 0xFF); return (T)sh4_rio_read<sizeof(T)>(SCI, addr);
} }
else else
{ {
@ -552,9 +541,9 @@ T DYNACALL ReadMem_area7(u32 addr)
break; break;
case A7_REG_HASH(SCIF_BASE_addr): case A7_REG_HASH(SCIF_BASE_addr):
if (addr<=0x1FE80024) if (addr <= 0x24)
{ {
return (T)sh4_rio_read<sz>(SCIF,addr & 0xFF); return (T)sh4_rio_read<sizeof(T)>(SCIF, addr);
} }
else else
{ {
@ -568,24 +557,23 @@ T DYNACALL ReadMem_area7(u32 addr)
switch(addr) switch(addr)
{ {
//UDI SDIR 0x1FF00000 0x1FF00000 16 0xFFFF Held Held Held Pclk //UDI SDIR 0x1FF00000 0x1FF00000 16 0xFFFF Held Held Held Pclk
case UDI_SDIR_addr : case (UDI_SDIR_addr & 0xff):
break; break;
//UDI SDDR 0x1FF00008 0x1FF00008 32 Held Held Held Held Pclk //UDI SDDR 0x1FF00008 0x1FF00008 32 Held Held Held Held Pclk
case UDI_SDDR_addr : case (UDI_SDDR_addr & 0xff):
break; break;
} }
break; break;
} }
INFO_LOG(SH4, "Unknown Read from Area7 - addr=%x", (map_base << 16) | addr);
INFO_LOG(SH4, "Unknown Read from Area7 - addr=%x", addr);
return 0; return 0;
} }
//Write Area7 //Write Area7
template <u32 sz,class T> template <typename T>
void DYNACALL WriteMem_area7(u32 addr, T data) void DYNACALL WriteMem_area7(u32 addr, T data)
{ {
if (likely(addr == 0xFF000038)) if (likely(addr == 0xFF000038))
@ -599,17 +587,36 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
return; return;
} }
//printf("%08X\n",addr);
addr&=0x1FFFFFFF;
u32 map_base = addr >> 16; u32 map_base = addr >> 16;
addr &= 0xFF;
switch (map_base & 0x1FFF) switch (map_base & 0x1FFF)
{ {
case A7_REG_HASH(DMAC_BASE_addr):
if (addr <= 0x40)
{
sh4_rio_write<sizeof(T)>(DMAC, addr, data);
}
else
{
OUT_OF_RANGE("DMAC");
}
return;
case A7_REG_HASH(TMU_BASE_addr):
if (addr <= 0x2C)
{
sh4_rio_write<sizeof(T)>(TMU, addr, data);
}
else
{
OUT_OF_RANGE("TMU");
}
return;
case A7_REG_HASH(CCN_BASE_addr): case A7_REG_HASH(CCN_BASE_addr):
if (addr<=0x1F00003C) if (addr <= 0x3C)
{ {
sh4_rio_write<sz>(CCN,addr & 0xFF,data); sh4_rio_write<sizeof(T)>(CCN, addr, data);
} }
else else
{ {
@ -617,10 +624,21 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
} }
return; return;
case A7_REG_HASH(UBC_BASE_addr): case A7_REG_HASH(INTC_BASE_addr):
if (addr<=0x1F200020) if (addr <= 0x0C)
{ {
sh4_rio_write<sz>(UBC,addr & 0xFF,data); sh4_rio_write<sizeof(T)>(INTC, addr, data);
}
else
{
OUT_OF_RANGE("INTC");
}
return;
case A7_REG_HASH(UBC_BASE_addr):
if (addr <= 0x20)
{
sh4_rio_write<sizeof(T)>(UBC, addr, data);
} }
else else
{ {
@ -629,9 +647,9 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
return; return;
case A7_REG_HASH(BSC_BASE_addr): case A7_REG_HASH(BSC_BASE_addr):
if (addr<=0x1F800048) if (addr <= 0x48)
{ {
sh4_rio_write<sz>(BSC,addr & 0xFF,data); sh4_rio_write<sizeof(T)>(BSC, addr, data);
} }
else else
{ {
@ -646,21 +664,10 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
//dram settings 3 / write only //dram settings 3 / write only
return; return;
case A7_REG_HASH(DMAC_BASE_addr):
if (addr<=0x1FA00040)
{
sh4_rio_write<sz>(DMAC,addr & 0xFF,data);
}
else
{
OUT_OF_RANGE("DMAC");
}
return;
case A7_REG_HASH(CPG_BASE_addr): case A7_REG_HASH(CPG_BASE_addr):
if (addr<=0x1FC00010) if (addr <= 0x10)
{ {
sh4_rio_write<sz>(CPG,addr & 0xFF,data); sh4_rio_write<sizeof(T)>(CPG, addr, data);
} }
else else
{ {
@ -669,9 +676,9 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
return; return;
case A7_REG_HASH(RTC_BASE_addr): case A7_REG_HASH(RTC_BASE_addr):
if (addr<=0x1FC8003C) if (addr <= 0x3C)
{ {
sh4_rio_write<sz>(RTC,addr & 0xFF,data); sh4_rio_write<sizeof(T)>(RTC, addr, data);
} }
else else
{ {
@ -679,32 +686,10 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
} }
return; return;
case A7_REG_HASH(INTC_BASE_addr):
if (addr<=0x1FD0000C)
{
sh4_rio_write<sz>(INTC,addr & 0xFF,data);
}
else
{
OUT_OF_RANGE("INTC");
}
return;
case A7_REG_HASH(TMU_BASE_addr):
if (addr<=0x1FD8002C)
{
sh4_rio_write<sz>(TMU,addr & 0xFF,data);
}
else
{
OUT_OF_RANGE("TMU");
}
return;
case A7_REG_HASH(SCI_BASE_addr): case A7_REG_HASH(SCI_BASE_addr):
if (addr<=0x1FE0001C) if (addr <= 0x1C)
{ {
sh4_rio_write<sz>(SCI,addr & 0xFF,data); sh4_rio_write<sizeof(T)>(SCI, addr, data);
} }
else else
{ {
@ -713,9 +698,9 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
return; return;
case A7_REG_HASH(SCIF_BASE_addr): case A7_REG_HASH(SCIF_BASE_addr):
if (addr<=0x1FE80024) if (addr <= 0x24)
{ {
sh4_rio_write<sz>(SCIF,addr & 0xFF,data); sh4_rio_write<sizeof(T)>(SCIF, addr, data);
} }
else else
{ {
@ -728,18 +713,18 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
switch(addr) switch(addr)
{ {
//UDI SDIR 0xFFF00000 0x1FF00000 16 0xFFFF Held Held Held Pclk //UDI SDIR 0xFFF00000 0x1FF00000 16 0xFFFF Held Held Held Pclk
case UDI_SDIR_addr : case (UDI_SDIR_addr & 0xff):
break; break;
//UDI SDDR 0xFFF00008 0x1FF00008 32 Held Held Held Held Pclk //UDI SDDR 0xFFF00008 0x1FF00008 32 Held Held Held Held Pclk
case UDI_SDDR_addr : case (UDI_SDDR_addr & 0xff):
break; break;
} }
break; break;
} }
INFO_LOG(SH4, "Write to Area7 not implemented, addr=%x, data=%x", addr, data); INFO_LOG(SH4, "Write to Area7 not implemented, addr=%x, data=%x", (map_base << 16) | addr, data);
} }
@ -747,22 +732,12 @@ void DYNACALL WriteMem_area7(u32 addr,T data)
//On Chip Ram //On Chip Ram
//*********** //***********
//Read OCR //Read OCR
template <u32 sz,class T> template <typename T>
T DYNACALL ReadMem_area7_OCR_T(u32 addr) T DYNACALL ReadMem_area7_OCR_T(u32 addr)
{ {
if (CCN_CCR.ORA) if (CCN_CCR.ORA)
{ {
if (sz==1) return *(T*)&OnChipRAM[addr & OnChipRAM_MASK];
return (T)OnChipRAM[addr&OnChipRAM_MASK];
else if (sz==2)
return (T)*(u16*)&OnChipRAM[addr&OnChipRAM_MASK];
else if (sz==4)
return (T)*(u32*)&OnChipRAM[addr&OnChipRAM_MASK];
else
{
ERROR_LOG(SH4, "ReadMem_area7_OCR_T: template SZ is wrong = %d", sz);
return 0xDE;
}
} }
else else
{ {
@ -772,21 +747,12 @@ T DYNACALL ReadMem_area7_OCR_T(u32 addr)
} }
//Write OCR //Write OCR
template <u32 sz,class T> template <typename T>
void DYNACALL WriteMem_area7_OCR_T(u32 addr, T data) void DYNACALL WriteMem_area7_OCR_T(u32 addr, T data)
{ {
if (CCN_CCR.ORA) if (CCN_CCR.ORA)
{ {
if (sz==1) *(T*)&OnChipRAM[addr & OnChipRAM_MASK] = data;
OnChipRAM[addr&OnChipRAM_MASK]=(u8)data;
else if (sz==2)
*(u16*)&OnChipRAM[addr&OnChipRAM_MASK]=(u16)data;
else if (sz==4)
*(u32*)&OnChipRAM[addr&OnChipRAM_MASK]=data;
else
{
ERROR_LOG(SH4, "WriteMem_area7_OCR_T: template SZ is wrong = %d", sz);
}
} }
else else
{ {

View file

@ -75,21 +75,6 @@ int sh4_sched_register(int tag, sh4_sched_callback* ssc)
return sch_list.size()-1; return sch_list.size()-1;
} }
/*
Return current cycle count, in 32 bits (wraps after 21 dreamcast seconds)
*/
u32 sh4_sched_now()
{
return sh4_sched_ffb-Sh4cntx.sh4_sched_next;
}
/*
Return current cycle count, in 64 bits (effectively never wraps)
*/
u64 sh4_sched_now64()
{
return sh4_sched_ffb-Sh4cntx.sh4_sched_next;
}
void sh4_sched_request(int id, int cycles) void sh4_sched_request(int id, int cycles)
{ {
verify(cycles== -1 || (cycles >= 0 && cycles <= SH4_MAIN_CLOCK)); verify(cycles== -1 || (cycles >= 0 && cycles <= SH4_MAIN_CLOCK));

View file

@ -3,6 +3,8 @@
#include "types.h" #include "types.h"
extern u64 sh4_sched_ffb;
/* /*
tag, as passed on sh4_sched_register tag, as passed on sh4_sched_register
sch_cycles, the cycle duration that the callback requested (sh4_sched_request) sch_cycles, the cycle duration that the callback requested (sh4_sched_request)
@ -17,16 +19,20 @@ typedef int sh4_sched_callback(int tag, int sch_cycl, int jitter);
int sh4_sched_register(int tag, sh4_sched_callback* ssc); int sh4_sched_register(int tag, sh4_sched_callback* ssc);
/* /*
current time in SH4 cycles, referenced to boot. Return current cycle count, in 32 bits (wraps after 21 dreamcast seconds)
Wraps every ~21 secs
*/ */
u32 sh4_sched_now(); static inline u32 sh4_sched_now()
{
return sh4_sched_ffb - Sh4cntx.sh4_sched_next;
}
/* /*
current time, in SH4 cycles, referenced to boot. Return current cycle count, in 64 bits (effectively never wraps)
Does not wrap, 64 bits.
*/ */
u64 sh4_sched_now64(); static inline u64 sh4_sched_now64()
{
return sh4_sched_ffb - Sh4cntx.sh4_sched_next;
}
/* /*
Schedule a callback to be called sh4 *cycles* after the Schedule a callback to be called sh4 *cycles* after the

View file

@ -88,7 +88,7 @@ public:
static std::shared_ptr<EvdevGamepadDevice> GetControllerForPort(int port) static std::shared_ptr<EvdevGamepadDevice> GetControllerForPort(int port)
{ {
for (auto& pair : evdev_gamepads) for (const auto& pair : evdev_gamepads)
if (pair.second->maple_port() == port) if (pair.second->maple_port() == port)
return pair.second; return pair.second;
return NULL; return NULL;
@ -104,7 +104,7 @@ public:
static void PollDevices() static void PollDevices()
{ {
for (auto& pair : evdev_gamepads) for (const auto& pair : evdev_gamepads)
pair.second->read_input(); pair.second->read_input();
} }

View file

@ -204,7 +204,7 @@ static void elf_syms(FILE* out,const char* libfile)
} }
} }
static volatile bool prof_run; static bool prof_run;
// This is not used: // This is not used:
static int str_ends_with(const char * str, const char * suffix) static int str_ends_with(const char * str, const char * suffix)

View file

@ -10,8 +10,8 @@ SoundFrame RingBuffer[SAMPLE_COUNT];
const u32 RingBufferByteSize = sizeof(RingBuffer); const u32 RingBufferByteSize = sizeof(RingBuffer);
const u32 RingBufferSampleCount = SAMPLE_COUNT; const u32 RingBufferSampleCount = SAMPLE_COUNT;
volatile u32 WritePtr; //last WRITEN sample u32 WritePtr; //last WRITEN sample
volatile u32 ReadPtr; //next sample to read u32 ReadPtr; //next sample to read
u32 gen_samples=0; u32 gen_samples=0;

View file

@ -234,12 +234,12 @@ eFSReg alloc_fpu[]={f16,f17,f18,f19,f20,f21,f22,f23,
struct arm_reg_alloc: RegAlloc<eReg,eFSReg,false> struct arm_reg_alloc: RegAlloc<eReg,eFSReg,false>
{ {
virtual void Preload(u32 reg,eReg nreg) virtual void Preload(u32 reg,eReg nreg) override
{ {
verify(reg!=reg_pc_dyn); verify(reg!=reg_pc_dyn);
LoadSh4Reg_mem(nreg,reg); LoadSh4Reg_mem(nreg,reg);
} }
virtual void Writeback(u32 reg,eReg nreg) virtual void Writeback(u32 reg,eReg nreg) override
{ {
if (reg==reg_pc_dyn) if (reg==reg_pc_dyn)
// reg_pc_dyn has been stored in r4 by the jdyn op implementation // reg_pc_dyn has been stored in r4 by the jdyn op implementation
@ -249,13 +249,13 @@ struct arm_reg_alloc: RegAlloc<eReg,eFSReg,false>
StoreSh4Reg_mem(nreg,reg); StoreSh4Reg_mem(nreg,reg);
} }
virtual void Preload_FPU(u32 reg,eFSReg nreg) virtual void Preload_FPU(u32 reg, eFSReg nreg, bool _64bits) override
{ {
const s32 shRegOffs = (u8*)GetRegPtr(reg)-sh4_dyna_rcb ; const s32 shRegOffs = (u8*)GetRegPtr(reg)-sh4_dyna_rcb ;
VLDR((nreg),r8,shRegOffs/4); VLDR((nreg),r8,shRegOffs/4);
} }
virtual void Writeback_FPU(u32 reg,eFSReg nreg) virtual void Writeback_FPU(u32 reg, eFSReg nreg, bool _64bits) override
{ {
const s32 shRegOffs = (u8*)GetRegPtr(reg)-sh4_dyna_rcb ; const s32 shRegOffs = (u8*)GetRegPtr(reg)-sh4_dyna_rcb ;
@ -716,7 +716,7 @@ mem_op_type memop_type(shil_opcode* op)
{ {
int Lsz=-1; int Lsz=-1;
int sz=op->flags&0x7f; int sz = op->size();
bool fp32=op->rs2.is_r32f() || op->rd.is_r32f(); bool fp32=op->rs2.is_r32f() || op->rd.is_r32f();

View file

@ -16,15 +16,8 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with reicast. If not, see <https://www.gnu.org/licenses/>. along with reicast. If not, see <https://www.gnu.org/licenses/>.
*/ */
#pragma once
#ifndef CORE_REC_ARM64_ARM64_REGALLOC_H_
#define CORE_REC_ARM64_ARM64_REGALLOC_H_
#ifdef OLD_REGALLOC
#include "hw/sh4/dyna/regalloc.h"
#else
#include "hw/sh4/dyna/ssa_regalloc.h" #include "hw/sh4/dyna/ssa_regalloc.h"
#endif
#include "deps/vixl/aarch64/macro-assembler-aarch64.h" #include "deps/vixl/aarch64/macro-assembler-aarch64.h"
using namespace vixl::aarch64; using namespace vixl::aarch64;
@ -42,11 +35,7 @@ static eFReg alloc_fregs[] = { S8, S9, S10, S11, S12, S13, S14, S15, (eFReg)-1 }
class Arm64Assembler; class Arm64Assembler;
struct Arm64RegAlloc : RegAlloc<eReg, eFReg struct Arm64RegAlloc : RegAlloc<eReg, eFReg, true>
#ifndef EXPLODE_SPANS
, false
#endif
>
{ {
Arm64RegAlloc(Arm64Assembler *assembler) : assembler(assembler) {} Arm64RegAlloc(Arm64Assembler *assembler) : assembler(assembler) {}
@ -57,8 +46,10 @@ struct Arm64RegAlloc : RegAlloc<eReg, eFReg
virtual void Preload(u32 reg, eReg nreg) override; virtual void Preload(u32 reg, eReg nreg) override;
virtual void Writeback(u32 reg, eReg nreg) override; virtual void Writeback(u32 reg, eReg nreg) override;
virtual void Preload_FPU(u32 reg, eFReg nreg) override; virtual void Preload_FPU(u32 reg, eFReg nreg, bool _64bit) override;
virtual void Writeback_FPU(u32 reg, eFReg nreg) override; virtual void Writeback_FPU(u32 reg, eFReg nreg, bool _64bit) override;
virtual void Merge_FPU(eFReg reg1, eFReg reg2) override;
virtual void Shift_FPU(eFReg reg) override;
const Register& MapRegister(const shil_param& param) const Register& MapRegister(const shil_param& param)
{ {
@ -70,21 +61,15 @@ struct Arm64RegAlloc : RegAlloc<eReg, eFReg
const VRegister& MapVRegister(const shil_param& param, u32 index = 0) const VRegister& MapVRegister(const shil_param& param, u32 index = 0)
{ {
#ifdef OLD_REGALLOC
eFReg ereg = mapfv(param, index);
#else
#ifdef EXPLODE_SPANS
#error EXPLODE_SPANS not supported with ssa regalloc
#endif
verify(index == 0); verify(index == 0);
eFReg ereg = mapf(param); eFReg ereg = mapf(param);
#endif
if (ereg == (eFReg)-1) if (ereg == (eFReg)-1)
die("VRegister not allocated"); die("VRegister not allocated");
if (param.is_r64f())
return VRegister::GetDRegFromCode(ereg);
else
return VRegister::GetSRegFromCode(ereg); return VRegister::GetSRegFromCode(ereg);
} }
Arm64Assembler *assembler; Arm64Assembler *assembler;
}; };
#endif /* CORE_REC_ARM64_ARM64_REGALLOC_H_ */

View file

@ -22,13 +22,10 @@
#if FEAT_SHREC == DYNAREC_JIT #if FEAT_SHREC == DYNAREC_JIT
#include <unistd.h> #include <unistd.h>
#include <map>
#include <setjmp.h> #include <setjmp.h>
#include "deps/vixl/aarch64/macro-assembler-aarch64.h" #include "deps/vixl/aarch64/macro-assembler-aarch64.h"
using namespace vixl::aarch64;
//#define EXPLODE_SPANS
//#define NO_BLOCK_LINKING //#define NO_BLOCK_LINKING
#include "hw/sh4/sh4_opcode_list.h" #include "hw/sh4/sh4_opcode_list.h"
@ -42,6 +39,8 @@ using namespace vixl::aarch64;
#include "hw/mem/vmem32.h" #include "hw/mem/vmem32.h"
#include "arm64_regalloc.h" #include "arm64_regalloc.h"
using namespace vixl::aarch64;
#undef do_sqw_nommu #undef do_sqw_nommu
extern "C" void ngen_blockcheckfail(u32 pc); extern "C" void ngen_blockcheckfail(u32 pc);
@ -483,13 +482,15 @@ public:
verify(op.rd.is_reg()); verify(op.rd.is_reg());
verify(op.rs1.is_reg() || op.rs1.is_imm()); verify(op.rs1.is_reg() || op.rs1.is_imm());
#ifdef EXPLODE_SPANS if (op.rs1.is_reg() && regalloc.IsAllocf(op.rs1))
Fmov(regalloc.MapVRegister(op.rd, 0), regalloc.MapVRegister(op.rs1, 0)); {
Fmov(regalloc.MapVRegister(op.rd, 1), regalloc.MapVRegister(op.rs1, 1)); Fmov(regalloc.MapVRegister(op.rd), regalloc.MapVRegister(op.rs1));
#else }
else
{
shil_param_to_host_reg(op.rs1, x15); shil_param_to_host_reg(op.rs1, x15);
host_reg_to_shil_param(op.rd, x15); host_reg_to_shil_param(op.rd, x15);
#endif }
break; break;
case shop_readm: case shop_readm:
@ -935,7 +936,7 @@ public:
case shop_xtrct: case shop_xtrct:
{ {
const Register rd = regalloc.MapRegister(op.rd); const Register& rd = regalloc.MapRegister(op.rd);
Lsr(rd, regalloc.MapRegister(op.rs1), 16); Lsr(rd, regalloc.MapRegister(op.rs1), 16);
Lsl(w0, regalloc.MapRegister(op.rs2), 16); Lsl(w0, regalloc.MapRegister(op.rs2), 16);
Orr(rd, rd, w0); Orr(rd, rd, w0);
@ -990,14 +991,17 @@ public:
if (op.rs1.is_reg()) if (op.rs1.is_reg())
Add(x1, x1, Operand(regalloc.MapRegister(op.rs1), UXTH, 3)); Add(x1, x1, Operand(regalloc.MapRegister(op.rs1), UXTH, 3));
else else
{
// TODO get rid of this Add if rs1 is imm. Use MemOperand with offset when !imm
Add(x1, x1, Operand(op.rs1.imm_value() << 3)); Add(x1, x1, Operand(op.rs1.imm_value() << 3));
#ifdef EXPLODE_SPANS }
Ldr(regalloc.MapVRegister(op.rd, 0), MemOperand(x1, 4, PostIndex)); if (regalloc.IsAllocf(op.rd))
Ldr(regalloc.MapVRegister(op.rd, 1), MemOperand(x1)); Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1));
#else else
{
Ldr(x2, MemOperand(x1)); Ldr(x2, MemOperand(x1));
Str(x2, sh4_context_mem_operand(op.rd.reg_ptr())); Str(x2, sh4_context_mem_operand(op.rd.reg_ptr()));
#endif }
break; break;
case shop_fipr: case shop_fipr:
@ -1605,23 +1609,10 @@ private:
if (mmu_enabled()) if (mmu_enabled())
Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); // pc Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); // pc
u32 size = op.flags & 0x7f;
if (!optimise || !GenReadMemoryFast(op, opid)) if (!optimise || !GenReadMemoryFast(op, opid))
GenReadMemorySlow(size); GenReadMemorySlow(op.size());
if (size < 8) host_reg_to_shil_param(op.rd, x0);
host_reg_to_shil_param(op.rd, w0);
else
{
#ifdef EXPLODE_SPANS
verify(op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1));
Fmov(regalloc.MapVRegister(op.rd, 0), w0);
Lsr(x0, x0, 32);
Fmov(regalloc.MapVRegister(op.rd, 1), w0);
#else
Str(x0, sh4_context_mem_operand(op.rd.reg_ptr()));
#endif
}
} }
bool GenReadMemoryImmediate(const shil_opcode& op) bool GenReadMemoryImmediate(const shil_opcode& op)
@ -1629,11 +1620,12 @@ private:
if (!op.rs1.is_imm()) if (!op.rs1.is_imm())
return false; return false;
u32 size = op.flags & 0x7f; const u32 size = op.size();
u32 addr = op.rs1._imm; u32 addr = op.rs1._imm;
if (mmu_enabled()) if (mmu_enabled())
{ {
if ((addr >> 12) != (block->vaddr >> 12)) if ((addr >> 12) < (block->vaddr >> 12)
|| ((addr + size - 1) >> 12) > (block->vaddr + block->sh4_code_size - 1) >> 12)
// When full mmu is on, only consider addresses in the same 4k page // When full mmu is on, only consider addresses in the same 4k page
return false; return false;
u32 paddr; u32 paddr;
@ -1647,9 +1639,11 @@ private:
rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr); rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
break; break;
case 4: case 4:
case 8:
rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr); rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
break; break;
case 8:
rv = mmu_data_translation<MMU_TT_DREAD, u64>(addr, paddr);
break;
default: default:
die("Invalid immediate size"); die("Invalid immediate size");
break; break;
@ -1659,7 +1653,7 @@ private:
addr = paddr; addr = paddr;
} }
bool isram = false; bool isram = false;
void* ptr = _vmem_read_const(addr, isram, size > 4 ? 4 : size); void* ptr = _vmem_read_const(addr, isram, size);
if (isram) if (isram)
{ {
@ -1683,6 +1677,10 @@ private:
Ldr(regalloc.MapRegister(op.rd), MemOperand(x1)); Ldr(regalloc.MapRegister(op.rd), MemOperand(x1));
break; break;
case 8:
Ldr(regalloc.MapVRegister(op.rd), MemOperand(x1));
break;
default: default:
die("Invalid size"); die("Invalid size");
break; break;
@ -1792,8 +1790,7 @@ private:
Add(x1, *call_regs64[0], sizeof(Sh4Context), LeaveFlags); Add(x1, *call_regs64[0], sizeof(Sh4Context), LeaveFlags);
} }
u32 size = op.flags & 0x7f; switch(op.size())
switch(size)
{ {
case 1: case 1:
Ldrsb(w0, MemOperand(x28, x1)); Ldrsb(w0, MemOperand(x28, x1));
@ -1825,25 +1822,15 @@ private:
if (mmu_enabled()) if (mmu_enabled())
Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); // pc Mov(*call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 2 : 0)); // pc
u32 size = op.flags & 0x7f; if (op.size() != 8)
if (size != 8)
shil_param_to_host_reg(op.rs2, *call_regs[1]); shil_param_to_host_reg(op.rs2, *call_regs[1]);
else else
{
#ifdef EXPLODE_SPANS
verify(op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1));
Fmov(*call_regs[1], regalloc.MapVRegister(op.rs2, 1));
Lsl(*call_regs64[1], *call_regs64[1], 32);
Fmov(w2, regalloc.MapVRegister(op.rs2, 0));
Orr(*call_regs64[1], *call_regs64[1], x2);
#else
shil_param_to_host_reg(op.rs2, *call_regs64[1]); shil_param_to_host_reg(op.rs2, *call_regs64[1]);
#endif
}
if (optimise && GenWriteMemoryFast(op, opid)) if (optimise && GenWriteMemoryFast(op, opid))
return; return;
GenWriteMemorySlow(size); GenWriteMemorySlow(op.size());
} }
bool GenWriteMemoryImmediate(const shil_opcode& op) bool GenWriteMemoryImmediate(const shil_opcode& op)
@ -1851,11 +1838,12 @@ private:
if (!op.rs1.is_imm()) if (!op.rs1.is_imm())
return false; return false;
u32 size = op.flags & 0x7f; const u32 size = op.size();
u32 addr = op.rs1._imm; u32 addr = op.rs1._imm;
if (mmu_enabled()) if (mmu_enabled())
{ {
if ((addr >> 12) != (block->vaddr >> 12) && ((addr >> 12) != ((block->vaddr + block->guest_opcodes * 2 - 1) >> 12))) if ((addr >> 12) < (block->vaddr >> 12)
|| ((addr + size - 1) >> 12) > (block->vaddr + block->sh4_code_size - 1) >> 12)
// When full mmu is on, only consider addresses in the same 4k page // When full mmu is on, only consider addresses in the same 4k page
return false; return false;
u32 paddr; u32 paddr;
@ -1869,9 +1857,11 @@ private:
rv = mmu_data_translation<MMU_TT_DWRITE, u16>(addr, paddr); rv = mmu_data_translation<MMU_TT_DWRITE, u16>(addr, paddr);
break; break;
case 4: case 4:
case 8:
rv = mmu_data_translation<MMU_TT_DWRITE, u32>(addr, paddr); rv = mmu_data_translation<MMU_TT_DWRITE, u32>(addr, paddr);
break; break;
case 8:
rv = mmu_data_translation<MMU_TT_DWRITE, u64>(addr, paddr);
break;
default: default:
die("Invalid immediate size"); die("Invalid immediate size");
break; break;
@ -1881,11 +1871,9 @@ private:
addr = paddr; addr = paddr;
} }
bool isram = false; bool isram = false;
void* ptr = _vmem_write_const(addr, isram, size > 4 ? 4 : size); void* ptr = _vmem_write_const(addr, isram, size);
Register reg2; Register reg2;
if (size != 8)
{
if (op.rs2.is_imm()) if (op.rs2.is_imm())
{ {
Mov(w1, op.rs2._imm); Mov(w1, op.rs2._imm);
@ -1896,13 +1884,21 @@ private:
reg2 = regalloc.MapRegister(op.rs2); reg2 = regalloc.MapRegister(op.rs2);
} }
else if (regalloc.IsAllocf(op.rs2)) else if (regalloc.IsAllocf(op.rs2))
{
if (op.rs2.is_r64f())
{
Fmov(x1, VRegister::GetDRegFromCode(regalloc.MapVRegister(op.rs2).GetCode()));
reg2 = x1;
}
else
{ {
Fmov(w1, regalloc.MapVRegister(op.rs2)); Fmov(w1, regalloc.MapVRegister(op.rs2));
reg2 = w1; reg2 = w1;
} }
}
else else
die("Invalid rs2 param"); die("Invalid rs2 param");
}
if (isram) if (isram)
{ {
Ldr(x0, reinterpret_cast<uintptr_t>(ptr)); Ldr(x0, reinterpret_cast<uintptr_t>(ptr));
@ -1921,14 +1917,7 @@ private:
break; break;
case 8: case 8:
#ifdef EXPLODE_SPANS Str(reg2, MemOperand(x0));
verify(op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1));
Str(regalloc.MapVRegister(op.rs2, 0), MemOperand(x1));
Str(regalloc.MapVRegister(op.rs2, 1), MemOperand(x1, 4));
#else
shil_param_to_host_reg(op.rs2, x1);
Str(x1, MemOperand(x0));
#endif
break; break;
default: default:
@ -2000,8 +1989,7 @@ private:
Add(x7, *call_regs64[0], sizeof(Sh4Context), LeaveFlags); Add(x7, *call_regs64[0], sizeof(Sh4Context), LeaveFlags);
} }
u32 size = op.flags & 0x7f; switch(op.size())
switch(size)
{ {
case 1: case 1:
Strb(w1, MemOperand(x28, x7)); Strb(w1, MemOperand(x28, x7));
@ -2117,9 +2105,8 @@ private:
else if (param.is_reg()) else if (param.is_reg())
{ {
if (param.is_r64f()) if (param.is_r64f())
Ldr(reg, sh4_context_mem_operand(param.reg_ptr()));
else if (param.is_r32f())
{ {
verify(reg.Is64Bits());
if (regalloc.IsAllocf(param)) if (regalloc.IsAllocf(param))
Fmov(reg, regalloc.MapVRegister(param)); Fmov(reg, regalloc.MapVRegister(param));
else else
@ -2127,10 +2114,21 @@ private:
} }
else else
{ {
if (regalloc.IsAllocg(param)) const Register& reg32 = reg.Is32Bits() ? (const Register&)reg : Register::GetWRegFromCode(reg.GetCode());
Mov(reg, regalloc.MapRegister(param)); if (param.is_r32f())
{
if (regalloc.IsAllocf(param))
Fmov(reg32, regalloc.MapVRegister(param));
else else
Ldr(reg, sh4_context_mem_operand(param.reg_ptr())); Ldr(reg32, sh4_context_mem_operand(param.reg_ptr()));
}
else
{
if (regalloc.IsAllocg(param))
Mov(reg32, regalloc.MapRegister(param));
else
Ldr(reg32, sh4_context_mem_operand(param.reg_ptr()));
}
} }
} }
else else
@ -2141,18 +2139,10 @@ private:
void host_reg_to_shil_param(const shil_param& param, const CPURegister& reg) void host_reg_to_shil_param(const shil_param& param, const CPURegister& reg)
{ {
if (reg.Is64Bits()) if (param.is_r64f())
{ {
Str((const Register&)reg, sh4_context_mem_operand(param.reg_ptr())); verify(reg.Is64Bits());
} if (regalloc.IsAllocf(param))
else if (regalloc.IsAllocg(param))
{
if (reg.IsRegister())
Mov(regalloc.MapRegister(param), (const Register&)reg);
else
Fmov(regalloc.MapRegister(param), (const VRegister&)reg);
}
else if (regalloc.IsAllocf(param))
{ {
if (reg.IsVRegister()) if (reg.IsVRegister())
Fmov(regalloc.MapVRegister(param), (const VRegister&)reg); Fmov(regalloc.MapVRegister(param), (const VRegister&)reg);
@ -2160,6 +2150,37 @@ private:
Fmov(regalloc.MapVRegister(param), (const Register&)reg); Fmov(regalloc.MapVRegister(param), (const Register&)reg);
} }
else else
{
Str((const Register&)reg, sh4_context_mem_operand(param.reg_ptr()));
}
}
else if (regalloc.IsAllocg(param))
{
if (reg.IsRegister())
{
const Register& reg32 = reg.Is32Bits() ? (const Register&)reg : Register::GetWRegFromCode(reg.GetCode());
Mov(regalloc.MapRegister(param), reg32);
}
else
{
const VRegister& reg32 = reg.Is32Bits() ? (const VRegister&)reg : VRegister::GetSRegFromCode(reg.GetCode());
Fmov(regalloc.MapRegister(param), reg32);
}
}
else if (regalloc.IsAllocf(param))
{
if (reg.IsVRegister())
{
const VRegister& reg32 = reg.Is32Bits() ? (const VRegister&)reg : VRegister::GetSRegFromCode(reg.GetCode());
Fmov(regalloc.MapVRegister(param), reg32);
}
else
{
const Register& reg32 = reg.Is32Bits() ? (const Register&)reg : Register::GetWRegFromCode(reg.GetCode());
Fmov(regalloc.MapVRegister(param), reg32);
}
}
else
{ {
Str(reg, sh4_context_mem_operand(param.reg_ptr())); Str(reg, sh4_context_mem_operand(param.reg_ptr()));
} }
@ -2334,15 +2355,22 @@ void Arm64RegAlloc::Writeback(u32 reg, eReg nreg)
{ {
assembler->Str(Register(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg))); assembler->Str(Register(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
} }
void Arm64RegAlloc::Preload_FPU(u32 reg, eFReg nreg) void Arm64RegAlloc::Preload_FPU(u32 reg, eFReg nreg, bool _64bit)
{ {
assembler->Ldr(VRegister(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg))); assembler->Ldr(VRegister(nreg, _64bit ? 64 : 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
} }
void Arm64RegAlloc::Writeback_FPU(u32 reg, eFReg nreg) void Arm64RegAlloc::Writeback_FPU(u32 reg, eFReg nreg, bool _64bit)
{ {
assembler->Str(VRegister(nreg, 32), assembler->sh4_context_mem_operand(GetRegPtr(reg))); assembler->Str(VRegister(nreg, _64bit ? 64 : 32), assembler->sh4_context_mem_operand(GetRegPtr(reg)));
}
void Arm64RegAlloc::Merge_FPU(eFReg reg1, eFReg reg2)
{
assembler->Sli(VRegister(reg1, 64), VRegister(reg2, 64), 32);
}
void Arm64RegAlloc::Shift_FPU(eFReg reg)
{
assembler->Urshr(VRegister(reg, 64), VRegister(reg, 64), 32);
} }
extern "C" naked void do_sqw_nommu_area_3(u32 dst, u8* sqb) extern "C" naked void do_sqw_nommu_area_3(u32 dst, u8* sqb)
{ {

View file

@ -1677,7 +1677,7 @@ public:
case shop_readm: case shop_readm:
{ {
u32 size = op.flags & 0x7f; u32 size = op.size();
if (op.rs1.is_imm()) { if (op.rs1.is_imm()) {
verify(op.rs2.is_null() && op.rs3.is_null()); verify(op.rs2.is_null() && op.rs3.is_null());
@ -1760,7 +1760,7 @@ public:
case shop_writem: case shop_writem:
{ {
u32 size = op.flags & 0x7f; u32 size = op.size();
if (op.rs1.is_imm()) { if (op.rs1.is_imm()) {
verify(op.rs3.is_null()); verify(op.rs3.is_null());

View file

@ -3,7 +3,6 @@
#if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X64 #if FEAT_SHREC == DYNAREC_JIT && HOST_CPU == CPU_X64
#include <setjmp.h> #include <setjmp.h>
//#define EXPLODE_SPANS
//#define PROFILING //#define PROFILING
//#define CANONICAL_TEST //#define CANONICAL_TEST
@ -115,7 +114,7 @@ void ngen_mainloop(void* v_cntx)
#endif #endif
"pushq %rbx \n\t" "pushq %rbx \n\t"
WIN32_ONLY( ".seh_pushreg %rbx \n\t") WIN32_ONLY( ".seh_pushreg %rbx \n\t")
#ifndef __MACH__ // rbp is pushed in the standard function prologue #if !defined(__MACH__) && !defined(NO_OMIT_FRAME_POINTER) // rbp is pushed in the standard function prologue
"pushq %rbp \n\t" "pushq %rbp \n\t"
#endif #endif
#ifdef _WIN32 #ifdef _WIN32
@ -195,7 +194,7 @@ WIN32_ONLY( ".seh_pushreg %r14 \n\t")
"popq %rsi \n\t" "popq %rsi \n\t"
"popq %rdi \n\t" "popq %rdi \n\t"
#endif #endif
#ifndef __MACH__ #if !defined(__MACH__) && !defined(NO_OMIT_FRAME_POINTER)
"popq %rbp \n\t" "popq %rbp \n\t"
#endif #endif
"popq %rbx \n\t" "popq %rbx \n\t"
@ -389,6 +388,7 @@ public:
shil_opcode& op = block->oplist[current_opid]; shil_opcode& op = block->oplist[current_opid];
regalloc.OpBegin(&op, current_opid); regalloc.OpBegin(&op, current_opid);
flushXmmRegisters = false;
switch (op.op) switch (op.op)
{ {
@ -458,15 +458,20 @@ public:
verify(op.rd.is_r64()); verify(op.rd.is_r64());
verify(op.rs1.is_r64()); verify(op.rs1.is_r64());
#ifdef EXPLODE_SPANS if (regalloc.IsAllocf(op.rd))
movss(regalloc.MapXRegister(op.rd, 0), regalloc.MapXRegister(op.rs1, 0)); {
movss(regalloc.MapXRegister(op.rd, 1), regalloc.MapXRegister(op.rs1, 1)); const Xbyak::Xmm& destReg = regalloc.MapXRegister(op.rd);
#else const Xbyak::Xmm& srcReg = regalloc.MapXRegister(op.rs1);
if (destReg != srcReg)
movq(destReg, srcReg);
}
else
{
mov(rax, (uintptr_t)op.rs1.reg_ptr()); mov(rax, (uintptr_t)op.rs1.reg_ptr());
mov(rax, qword[rax]); mov(rax, qword[rax]);
mov(rcx, (uintptr_t)op.rd.reg_ptr()); mov(rcx, (uintptr_t)op.rd.reg_ptr());
mov(qword[rcx], rax); mov(qword[rcx], rax);
#endif }
} }
break; break;
@ -490,24 +495,7 @@ public:
if (!optimise || !GenReadMemoryFast(op, block)) if (!optimise || !GenReadMemoryFast(op, block))
GenReadMemorySlow(op, block); GenReadMemorySlow(op, block);
u32 size = op.flags & 0x7f; host_reg_to_shil_param(op.rd, rax);
if (size != 8)
host_reg_to_shil_param(op.rd, eax);
else {
#ifdef EXPLODE_SPANS
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1))
{
movd(regalloc.MapXRegister(op.rd, 0), eax);
shr(rax, 32);
movd(regalloc.MapXRegister(op.rd, 1), eax);
}
else
#endif
{
mov(rcx, (uintptr_t)op.rd.reg_ptr());
mov(qword[rcx], rax);
}
}
} }
break; break;
@ -528,26 +516,8 @@ public:
add(call_regs[0], dword[rax]); add(call_regs[0], dword[rax]);
} }
} }
shil_param_to_host_reg(op.rs2, call_regs64[1]);
u32 size = op.flags & 0x7f;
if (size != 8)
shil_param_to_host_reg(op.rs2, call_regs[1]);
else {
#ifdef EXPLODE_SPANS
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1))
{
movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1));
shl(call_regs64[1], 32);
movd(eax, regalloc.MapXRegister(op.rs2, 0));
or_(call_regs64[1], rax);
}
else
#endif
{
mov(rax, (uintptr_t)op.rs2.reg_ptr());
mov(call_regs64[1], qword[rax]);
}
}
if (!optimise || !GenWriteMemoryFast(op, block)) if (!optimise || !GenWriteMemoryFast(op, block))
GenWriteMemorySlow(op, block); GenWriteMemorySlow(op, block);
} }
@ -1077,37 +1047,46 @@ public:
else else
movzx(rax, regalloc.MapRegister(op.rs1).cvt16()); movzx(rax, regalloc.MapRegister(op.rs1).cvt16());
mov(rcx, (uintptr_t)&sin_table); mov(rcx, (uintptr_t)&sin_table);
#ifdef EXPLODE_SPANS if (regalloc.IsAllocf(op.rd))
movss(regalloc.MapXRegister(op.rd, 0), dword[rcx + rax * 8]); movq(regalloc.MapXRegister(op.rd), qword[rcx + rax * 8]);
movss(regalloc.MapXRegister(op.rd, 1), dword[rcx + (rax * 8) + 4]); else
#else {
mov(rcx, qword[rcx + rax * 8]); mov(rcx, qword[rcx + rax * 8]);
mov(rdx, (uintptr_t)op.rd.reg_ptr()); mov(rdx, (uintptr_t)op.rd.reg_ptr());
mov(qword[rdx], rcx); mov(qword[rdx], rcx);
#endif }
break; break;
case shop_fipr: case shop_fipr:
{ {
mov(rax, (size_t)op.rs1.reg_ptr()); // Using doubles for better precision
movaps(regalloc.MapXRegister(op.rd), dword[rax]);
mov(rax, (size_t)op.rs2.reg_ptr());
mulps(regalloc.MapXRegister(op.rd), dword[rax]);
const Xbyak::Xmm &rd = regalloc.MapXRegister(op.rd); const Xbyak::Xmm &rd = regalloc.MapXRegister(op.rd);
// Only first-generation 64-bit CPUs lack SSE3 support mov(rax, (size_t)op.rs1.reg_ptr());
if (cpu.has(Xbyak::util::Cpu::tSSE3)) mov(rcx, (size_t)op.rs2.reg_ptr());
{ pxor(xmm1, xmm1);
haddps(rd, rd); pxor(xmm0, xmm0);
haddps(rd, rd); pxor(xmm2, xmm2);
} cvtss2sd(xmm1, dword[rax]);
else cvtss2sd(xmm0, dword[rcx]);
{ mulsd(xmm0, xmm1);
movhlps(xmm1, rd); pxor(xmm1, xmm1);
addps(rd, xmm1); cvtss2sd(xmm2, dword[rax + 4]);
movaps(xmm1, rd); cvtss2sd(xmm1, dword[rcx + 4]);
shufps(xmm1, xmm1,1); mulsd(xmm1, xmm2);
addss(rd, xmm1); pxor(xmm2, xmm2);
} cvtss2sd(xmm2, dword[rax + 8]);
addsd(xmm1, xmm0);
pxor(xmm0, xmm0);
cvtss2sd(xmm0, dword[rcx + 8]);
mulsd(xmm0, xmm2);
pxor(xmm2, xmm2);
cvtss2sd(xmm2, dword[rax + 12]);
addsd(xmm1, xmm0);
pxor(xmm0, xmm0);
cvtss2sd(xmm0, dword[rcx + 12]);
mulsd(xmm0, xmm2);
addsd(xmm0, xmm1);
cvtsd2ss(rd, xmm0);
} }
break; break;
@ -1217,6 +1196,8 @@ public:
break; break;
} }
regalloc.OpEnd(&op); regalloc.OpEnd(&op);
if (flushXmmRegisters)
regalloc.FlushXmmRegisters(&op);
} }
regalloc.Cleanup(); regalloc.Cleanup();
current_opid = -1; current_opid = -1;
@ -1305,8 +1286,7 @@ public:
if (mmu_enabled()) if (mmu_enabled())
mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc mov(call_regs[1], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
u32 size = op.flags & 0x7f; switch (op.size()) {
switch (size) {
case 1: case 1:
if (!mmu_enabled()) if (!mmu_enabled())
GenCall(ReadMem8); GenCall(ReadMem8);
@ -1356,8 +1336,7 @@ public:
if (mmu_enabled()) if (mmu_enabled())
mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc mov(call_regs[2], block->vaddr + op.guest_offs - (op.delay_slot ? 1 : 0)); // pc
u32 size = op.flags & 0x7f; switch (op.size()) {
switch (size) {
case 1: case 1:
if (!mmu_enabled()) if (!mmu_enabled())
GenCall(WriteMem8); GenCall(WriteMem8);
@ -1441,11 +1420,6 @@ public:
// store from xmm0 // store from xmm0
case CPT_f32rv: case CPT_f32rv:
host_reg_to_shil_param(prm, xmm0); host_reg_to_shil_param(prm, xmm0);
#ifdef EXPLODE_SPANS
// The x86 dynarec saves to mem as well
//mov(rax, (uintptr_t)prm.reg_ptr());
//movd(dword[rax], xmm0);
#endif
break; break;
} }
} }
@ -1457,23 +1431,24 @@ public:
for (int i = CC_pars.size(); i-- > 0;) for (int i = CC_pars.size(); i-- > 0;)
{ {
verify(xmmused < 4 && regused < 4);
const shil_param& prm = *CC_pars[i].prm; const shil_param& prm = *CC_pars[i].prm;
switch (CC_pars[i].type) { switch (CC_pars[i].type) {
//push the contents //push the contents
case CPT_u32: case CPT_u32:
verify(regused < call_regs.size());
shil_param_to_host_reg(prm, call_regs[regused++]); shil_param_to_host_reg(prm, call_regs[regused++]);
break; break;
case CPT_f32: case CPT_f32:
verify(xmmused < call_regsxmm.size());
shil_param_to_host_reg(prm, call_regsxmm[xmmused++]); shil_param_to_host_reg(prm, call_regsxmm[xmmused++]);
break; break;
//push the ptr itself //push the ptr itself
case CPT_ptr: case CPT_ptr:
verify(prm.is_reg()); verify(prm.is_reg());
verify(regused < call_regs64.size());
mov(call_regs64[regused++], (size_t)prm.reg_ptr()); mov(call_regs64[regused++], (size_t)prm.reg_ptr());
break; break;
@ -1495,17 +1470,34 @@ public:
mov(rax, (size_t)GetRegPtr(reg)); mov(rax, (size_t)GetRegPtr(reg));
mov(dword[rax], Xbyak::Reg32(nreg)); mov(dword[rax], Xbyak::Reg32(nreg));
} }
void RegPreload_FPU(u32 reg, s8 nreg) void RegPreload_FPU(u32 reg, s8 nreg, bool _64bit)
{ {
mov(rax, (size_t)GetRegPtr(reg)); mov(rax, (size_t)GetRegPtr(reg));
if (_64bit)
movq(Xbyak::Xmm(nreg), qword[rax]);
else
movss(Xbyak::Xmm(nreg), dword[rax]); movss(Xbyak::Xmm(nreg), dword[rax]);
} }
void RegWriteback_FPU(u32 reg, s8 nreg) void RegWriteback_FPU(u32 reg, s8 nreg, bool _64bit)
{ {
mov(rax, (size_t)GetRegPtr(reg)); mov(rax, (size_t)GetRegPtr(reg));
if (_64bit)
movq(qword[rax], Xbyak::Xmm(nreg));
else
movss(dword[rax], Xbyak::Xmm(nreg)); movss(dword[rax], Xbyak::Xmm(nreg));
} }
void RegMerge_FPU(s8 reg1, s8 reg2)
{
psllq(Xbyak::Xmm(reg2), 32);
por(Xbyak::Xmm(reg1), Xbyak::Xmm(reg2));
}
void RegShift_FPU(s8 reg)
{
psrlq(Xbyak::Xmm(reg), 32);
}
private: private:
typedef void (BlockCompiler::*X64BinaryOp)(const Xbyak::Operand&, const Xbyak::Operand&); typedef void (BlockCompiler::*X64BinaryOp)(const Xbyak::Operand&, const Xbyak::Operand&);
typedef void (BlockCompiler::*X64BinaryFOp)(const Xbyak::Xmm&, const Xbyak::Operand&); typedef void (BlockCompiler::*X64BinaryFOp)(const Xbyak::Xmm&, const Xbyak::Operand&);
@ -1514,11 +1506,12 @@ private:
{ {
if (!op.rs1.is_imm()) if (!op.rs1.is_imm())
return false; return false;
u32 size = op.flags & 0x7f; u32 size = op.size();
u32 addr = op.rs1._imm; u32 addr = op.rs1._imm;
if (mmu_enabled()) if (mmu_enabled())
{ {
if ((addr >> 12) != (block->vaddr >> 12)) if ((addr >> 12) < (block->vaddr >> 12)
|| ((addr + size - 1) >> 12) > (block->vaddr + block->sh4_code_size - 1) >> 12)
// When full mmu is on, only consider addresses in the same 4k page // When full mmu is on, only consider addresses in the same 4k page
return false; return false;
@ -1533,9 +1526,11 @@ private:
rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr); rv = mmu_data_translation<MMU_TT_DREAD, u16>(addr, paddr);
break; break;
case 4: case 4:
case 8:
rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr); rv = mmu_data_translation<MMU_TT_DREAD, u32>(addr, paddr);
break; break;
case 8:
rv = mmu_data_translation<MMU_TT_DREAD, u64>(addr, paddr);
break;
default: default:
die("Invalid immediate size"); die("Invalid immediate size");
break; break;
@ -1546,7 +1541,7 @@ private:
addr = paddr; addr = paddr;
} }
bool isram = false; bool isram = false;
void* ptr = _vmem_read_const(addr, isram, size > 4 ? 4 : size); void* ptr = _vmem_read_const(addr, isram, size);
if (isram) if (isram)
{ {
@ -1590,17 +1585,11 @@ private:
break; break;
case 8: case 8:
mov(rcx, qword[rax]); if (regalloc.IsAllocf(op.rd))
#ifdef EXPLODE_SPANS movq(regalloc.MapXRegister(op.rd), qword[rax]);
if (op.rd.count() == 2 && regalloc.IsAllocf(op.rd, 0) && regalloc.IsAllocf(op.rd, 1))
{
movd(regalloc.MapXRegister(op.rd, 0), ecx);
shr(rcx, 32);
movd(regalloc.MapXRegister(op.rd, 1), ecx);
}
else else
#endif
{ {
mov(rcx, qword[rax]);
mov(rax, (uintptr_t)op.rd.reg_ptr()); mov(rax, (uintptr_t)op.rd.reg_ptr());
mov(qword[rax], rcx); mov(qword[rax], rcx);
} }
@ -1616,6 +1605,7 @@ private:
// Not RAM: the returned pointer is a memory handler // Not RAM: the returned pointer is a memory handler
if (size == 8) if (size == 8)
{ {
// FIXME the call to _vmem_read_const() would have asserted at this point
verify(!regalloc.IsAllocAny(op.rd)); verify(!regalloc.IsAllocAny(op.rd));
// Need to call the handler twice // Need to call the handler twice
@ -1664,11 +1654,12 @@ private:
{ {
if (!op.rs1.is_imm()) if (!op.rs1.is_imm())
return false; return false;
u32 size = op.flags & 0x7f; u32 size = op.size();
u32 addr = op.rs1._imm; u32 addr = op.rs1._imm;
if (mmu_enabled()) if (mmu_enabled())
{ {
if ((addr >> 12) != (block->vaddr >> 12)) if ((addr >> 12) < (block->vaddr >> 12)
|| ((addr + size - 1) >> 12) > (block->vaddr + block->sh4_code_size - 1) >> 12)
// When full mmu is on, only consider addresses in the same 4k page // When full mmu is on, only consider addresses in the same 4k page
return false; return false;
@ -1683,9 +1674,11 @@ private:
rv = mmu_data_translation<MMU_TT_DWRITE, u16>(addr, paddr); rv = mmu_data_translation<MMU_TT_DWRITE, u16>(addr, paddr);
break; break;
case 4: case 4:
case 8:
rv = mmu_data_translation<MMU_TT_DWRITE, u32>(addr, paddr); rv = mmu_data_translation<MMU_TT_DWRITE, u32>(addr, paddr);
break; break;
case 8:
rv = mmu_data_translation<MMU_TT_DWRITE, u64>(addr, paddr);
break;
default: default:
die("Invalid immediate size"); die("Invalid immediate size");
break; break;
@ -1696,7 +1689,7 @@ private:
addr = paddr; addr = paddr;
} }
bool isram = false; bool isram = false;
void* ptr = _vmem_write_const(addr, isram, size > 4 ? 4 : size); void* ptr = _vmem_write_const(addr, isram, size);
if (isram) if (isram)
{ {
@ -1746,16 +1739,9 @@ private:
break; break;
case 8: case 8:
#ifdef EXPLODE_SPANS if (regalloc.IsAllocf(op.rs2))
if (op.rs2.count() == 2 && regalloc.IsAllocf(op.rs2, 0) && regalloc.IsAllocf(op.rs2, 1)) movq(qword[rax], regalloc.MapXRegister(op.rs2));
{
movd(call_regs[1], regalloc.MapXRegister(op.rs2, 1));
shl(call_regs64[1], 32);
movd(eax, regalloc.MapXRegister(op.rs2, 0));
or_(call_regs64[1], rax);
}
else else
#endif
{ {
mov(rcx, (uintptr_t)op.rs2.reg_ptr()); mov(rcx, (uintptr_t)op.rs2.reg_ptr());
mov(rcx, qword[rcx]); mov(rcx, qword[rcx]);
@ -1792,7 +1778,6 @@ private:
mov(rax, (uintptr_t)virt_ram_base); mov(rax, (uintptr_t)virt_ram_base);
u32 size = op.flags & 0x7f;
//verify(getCurr() - start_addr == 26); //verify(getCurr() - start_addr == 26);
if (mem_access_offset == 0) if (mem_access_offset == 0)
mem_access_offset = getCurr() - start_addr; mem_access_offset = getCurr() - start_addr;
@ -1800,7 +1785,7 @@ private:
verify(getCurr() - start_addr == mem_access_offset); verify(getCurr() - start_addr == mem_access_offset);
block->memory_accesses[(void*)getCurr()] = (u32)current_opid; block->memory_accesses[(void*)getCurr()] = (u32)current_opid;
switch (size) switch (op.size())
{ {
case 1: case 1:
movsx(eax, byte[rax + call_regs64[0]]); movsx(eax, byte[rax + call_regs64[0]]);
@ -1841,7 +1826,6 @@ private:
mov(rax, (uintptr_t)virt_ram_base); mov(rax, (uintptr_t)virt_ram_base);
u32 size = op.flags & 0x7f;
//verify(getCurr() - start_addr == 26); //verify(getCurr() - start_addr == 26);
if (mem_access_offset == 0) if (mem_access_offset == 0)
mem_access_offset = getCurr() - start_addr; mem_access_offset = getCurr() - start_addr;
@ -1849,18 +1833,18 @@ private:
verify(getCurr() - start_addr == mem_access_offset); verify(getCurr() - start_addr == mem_access_offset);
block->memory_accesses[(void*)getCurr()] = (u32)current_opid; block->memory_accesses[(void*)getCurr()] = (u32)current_opid;
switch (size) switch (op.size())
{ {
case 1: case 1:
mov(byte[rax + call_regs64[0] + 0], call_regs[1].cvt8()); mov(byte[rax + call_regs64[0] + 0], call_regs64[1].cvt8());
break; break;
case 2: case 2:
mov(word[rax + call_regs64[0]], call_regs[1].cvt16()); mov(word[rax + call_regs64[0]], call_regs64[1].cvt16());
break; break;
case 4: case 4:
mov(dword[rax + call_regs64[0]], call_regs[1]); mov(dword[rax + call_regs64[0]], call_regs64[1].cvt32());
break; break;
case 8: case 8:
@ -1997,67 +1981,11 @@ private:
void GenCall(Ret(*function)(Params...), bool skip_floats = false) void GenCall(Ret(*function)(Params...), bool skip_floats = false)
{ {
#ifndef _WIN32 #ifndef _WIN32
bool xmm8_mapped = !skip_floats && current_opid != -1 && regalloc.IsMapped(xmm8, current_opid); if (!skip_floats)
bool xmm9_mapped = !skip_floats && current_opid != -1 && regalloc.IsMapped(xmm9, current_opid); flushXmmRegisters = true;
bool xmm10_mapped = !skip_floats && current_opid != -1 && regalloc.IsMapped(xmm10, current_opid);
bool xmm11_mapped = !skip_floats && current_opid != -1 && regalloc.IsMapped(xmm11, current_opid);
// Need to save xmm registers as they are not preserved in linux/mach
int offset = 0;
if (xmm8_mapped || xmm9_mapped || xmm10_mapped || xmm11_mapped)
{
sub(rsp, 4 * (xmm8_mapped + xmm9_mapped + xmm10_mapped + xmm11_mapped));
if (xmm8_mapped)
{
movd(ptr[rsp + offset], xmm8);
offset += 4;
}
if (xmm9_mapped)
{
movd(ptr[rsp + offset], xmm9);
offset += 4;
}
if (xmm10_mapped)
{
movd(ptr[rsp + offset], xmm10);
offset += 4;
}
if (xmm11_mapped)
{
movd(ptr[rsp + offset], xmm11);
offset += 4;
}
}
#endif #endif
call(CC_RX2RW(function)); call(CC_RX2RW(function));
#ifndef _WIN32
if (xmm8_mapped || xmm9_mapped || xmm10_mapped || xmm11_mapped)
{
if (xmm11_mapped)
{
offset -= 4;
movd(xmm11, ptr[rsp + offset]);
}
if (xmm10_mapped)
{
offset -= 4;
movd(xmm10, ptr[rsp + offset]);
}
if (xmm9_mapped)
{
offset -= 4;
movd(xmm9, ptr[rsp + offset]);
}
if (xmm8_mapped)
{
offset -= 4;
movd(xmm8, ptr[rsp + offset]);
}
add(rsp, 4 * (xmm8_mapped + xmm9_mapped + xmm10_mapped + xmm11_mapped));
}
#endif
} }
// uses eax/rax // uses eax/rax
@ -2092,6 +2020,14 @@ private:
mov((const Xbyak::Reg32 &)reg, dword[rax]); mov((const Xbyak::Reg32 &)reg, dword[rax]);
} }
} }
else if (param.is_r64f() && regalloc.IsAllocf(param))
{
Xbyak::Xmm sreg = regalloc.MapXRegister(param);
if (!reg.isXMM())
movq((const Xbyak::Reg64 &)reg, sreg);
else if (reg != sreg)
movq((const Xbyak::Xmm &)reg, sreg);
}
else else
{ {
if (regalloc.IsAllocg(param)) if (regalloc.IsAllocg(param))
@ -2105,6 +2041,15 @@ private:
else else
{ {
mov(rax, (size_t)param.reg_ptr()); mov(rax, (size_t)param.reg_ptr());
if (param.is_r64f())
{
if (!reg.isXMM())
mov((const Xbyak::Reg64 &)reg, qword[rax]);
else
movq((const Xbyak::Xmm &)reg, qword[rax]);
}
else
{
if (!reg.isXMM()) if (!reg.isXMM())
mov((const Xbyak::Reg32 &)reg, dword[rax]); mov((const Xbyak::Reg32 &)reg, dword[rax]);
else else
@ -2112,13 +2057,14 @@ private:
} }
} }
} }
}
else else
{ {
verify(param.is_null()); verify(param.is_null());
} }
} }
// uses rax // uses rax or rcx
void host_reg_to_shil_param(const shil_param& param, const Xbyak::Reg& reg) void host_reg_to_shil_param(const shil_param& param, const Xbyak::Reg& reg)
{ {
if (regalloc.IsAllocg(param)) if (regalloc.IsAllocg(param))
@ -2133,17 +2079,38 @@ private:
{ {
Xbyak::Xmm sreg = regalloc.MapXRegister(param); Xbyak::Xmm sreg = regalloc.MapXRegister(param);
if (!reg.isXMM()) if (!reg.isXMM())
{
if (param.is_r64f())
movq(sreg, (const Xbyak::Reg64 &)reg);
else
movd(sreg, (const Xbyak::Reg32 &)reg); movd(sreg, (const Xbyak::Reg32 &)reg);
}
else if (reg != sreg) else if (reg != sreg)
{
if (param.is_r64f())
movq(sreg, (const Xbyak::Xmm &)reg);
else
movss(sreg, (const Xbyak::Xmm &)reg); movss(sreg, (const Xbyak::Xmm &)reg);
} }
}
else else
{ {
mov(rax, (size_t)param.reg_ptr()); const Xbyak::Reg& tmpReg = reg.getIdx() == rax.getIdx() ? rcx : rax;
mov(tmpReg, (size_t)param.reg_ptr());
if (param.is_r64f())
{
if (!reg.isXMM()) if (!reg.isXMM())
mov(dword[rax], (const Xbyak::Reg32 &)reg); mov(qword[tmpReg], (const Xbyak::Reg64 &)reg);
else else
movss(dword[rax], (const Xbyak::Xmm &)reg); movsd(qword[tmpReg], (const Xbyak::Xmm &)reg);
}
else
{
if (!reg.isXMM())
mov(dword[tmpReg], (const Xbyak::Reg32 &)reg);
else
movss(dword[tmpReg], (const Xbyak::Xmm &)reg);
}
} }
} }
@ -2161,6 +2128,7 @@ private:
X64RegAlloc regalloc; X64RegAlloc regalloc;
Xbyak::util::Cpu cpu; Xbyak::util::Cpu cpu;
size_t current_opid; size_t current_opid;
bool flushXmmRegisters = false;
Xbyak::Label exit_block; Xbyak::Label exit_block;
static const u32 read_mem_op_size; static const u32 read_mem_op_size;
static const u32 write_mem_op_size; static const u32 write_mem_op_size;
@ -2180,15 +2148,22 @@ void X64RegAlloc::Writeback(u32 reg, Xbyak::Operand::Code nreg)
{ {
compiler->RegWriteback(reg, nreg); compiler->RegWriteback(reg, nreg);
} }
void X64RegAlloc::Preload_FPU(u32 reg, s8 nreg) void X64RegAlloc::Preload_FPU(u32 reg, s8 nreg, bool _64bit)
{ {
compiler->RegPreload_FPU(reg, nreg); compiler->RegPreload_FPU(reg, nreg, _64bit);
} }
void X64RegAlloc::Writeback_FPU(u32 reg, s8 nreg) void X64RegAlloc::Writeback_FPU(u32 reg, s8 nreg, bool _64bit)
{ {
compiler->RegWriteback_FPU(reg, nreg); compiler->RegWriteback_FPU(reg, nreg, _64bit);
}
void X64RegAlloc::Merge_FPU(s8 reg1, s8 reg2)
{
compiler->RegMerge_FPU(reg1, reg2);
}
void X64RegAlloc::Shift_FPU(s8 reg)
{
compiler->RegShift_FPU(reg);
} }
static BlockCompiler* compiler; static BlockCompiler* compiler;
void ngen_Compile(RuntimeBlockInfo* block, bool smc_checks, bool reset, bool staging, bool optimise) void ngen_Compile(RuntimeBlockInfo* block, bool smc_checks, bool reset, bool staging, bool optimise)

View file

@ -16,38 +16,33 @@
You should have received a copy of the GNU General Public License You should have received a copy of the GNU General Public License
along with reicast. If not, see <https://www.gnu.org/licenses/>. along with reicast. If not, see <https://www.gnu.org/licenses/>.
*/ */
#pragma once
#ifndef CORE_REC_X64_X64_REGALLOC_H_
#define CORE_REC_X64_X64_REGALLOC_H_
//#define OLD_REGALLOC
#include "deps/xbyak/xbyak.h" #include "deps/xbyak/xbyak.h"
#ifdef OLD_REGALLOC
#include "hw/sh4/dyna/regalloc.h"
#else
#include "hw/sh4/dyna/ssa_regalloc.h" #include "hw/sh4/dyna/ssa_regalloc.h"
#endif
#ifdef _WIN32 #ifdef _WIN32
static Xbyak::Operand::Code alloc_regs[] = { Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI, Xbyak::Operand::RSI, static Xbyak::Operand::Code alloc_regs[] = { Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::RDI, Xbyak::Operand::RSI,
Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15, (Xbyak::Operand::Code)-1 }; Xbyak::Operand::R12, Xbyak::Operand::R13, Xbyak::Operand::R14, Xbyak::Operand::R15, (Xbyak::Operand::Code)-1 };
static s8 alloc_fregs[] = { 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1 }; // XMM6 to XMM15 are callee-saved in Windows static s8 alloc_fregs[] = { 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, -1 }; // XMM6 to XMM15 are callee-saved in Windows
#else #else
static Xbyak::Operand::Code alloc_regs[] = { Xbyak::Operand::RBX, Xbyak::Operand::RBP, Xbyak::Operand::R12, Xbyak::Operand::R13, static Xbyak::Operand::Code alloc_regs[] = {
Xbyak::Operand::R14, Xbyak::Operand::R15, (Xbyak::Operand::Code)-1 }; Xbyak::Operand::RBX,
Xbyak::Operand::R12,
Xbyak::Operand::R13,
Xbyak::Operand::R14,
Xbyak::Operand::R15,
#ifndef NO_OMIT_FRAME_POINTER
Xbyak::Operand::RBP,
#endif
(Xbyak::Operand::Code)-1
};
static s8 alloc_fregs[] = { 8, 9, 10, 11, -1 }; // XMM8-11 static s8 alloc_fregs[] = { 8, 9, 10, 11, -1 }; // XMM8-11
#endif #endif
class BlockCompiler; class BlockCompiler;
struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8, struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8, true>
#ifdef EXPLODE_SPANS
true
#else
false
#endif
>
{ {
X64RegAlloc(BlockCompiler *compiler) : compiler(compiler) {} X64RegAlloc(BlockCompiler *compiler) : compiler(compiler) {}
@ -58,8 +53,10 @@ struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8,
virtual void Preload(u32 reg, Xbyak::Operand::Code nreg) override; virtual void Preload(u32 reg, Xbyak::Operand::Code nreg) override;
virtual void Writeback(u32 reg, Xbyak::Operand::Code nreg) override; virtual void Writeback(u32 reg, Xbyak::Operand::Code nreg) override;
virtual void Preload_FPU(u32 reg, s8 nreg) override; virtual void Preload_FPU(u32 reg, s8 nreg, bool _64bit) override;
virtual void Writeback_FPU(u32 reg, s8 nreg) override; virtual void Writeback_FPU(u32 reg, s8 nreg, bool _64bit) override;
virtual void Merge_FPU(s8 reg1, s8 reg2) override;
virtual void Shift_FPU(s8 reg) override;
Xbyak::Reg32 MapRegister(const shil_param& param) Xbyak::Reg32 MapRegister(const shil_param& param)
{ {
@ -71,11 +68,7 @@ struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8,
Xbyak::Xmm MapXRegister(const shil_param& param, u32 index = 0) Xbyak::Xmm MapXRegister(const shil_param& param, u32 index = 0)
{ {
#ifdef OLD_REGALLOC
s8 ereg = mapfv(param, index);
#else
s8 ereg = mapf(param); s8 ereg = mapf(param);
#endif
if (ereg == -1) if (ereg == -1)
die("VRegister not allocated"); die("VRegister not allocated");
return Xbyak::Xmm(ereg); return Xbyak::Xmm(ereg);
@ -83,19 +76,14 @@ struct X64RegAlloc : RegAlloc<Xbyak::Operand::Code, s8,
bool IsMapped(const Xbyak::Xmm &xmm, size_t opid) bool IsMapped(const Xbyak::Xmm &xmm, size_t opid)
{ {
#ifndef OLD_REGALLOC
return regf_used((s8)xmm.getIdx()); return regf_used((s8)xmm.getIdx());
#else
for (size_t sid = 0; sid < all_spans.size(); sid++)
{
if (all_spans[sid]->nregf == xmm.getIdx() && all_spans[sid]->contains(opid))
return true;
} }
return false;
#endif void FlushXmmRegisters(shil_opcode *opcode)
{
for (Sh4RegType reg = reg_fr_0; reg <= reg_xf_15; reg = (Sh4RegType)(reg + 1))
FlushReg(reg, true, true);
} }
BlockCompiler *compiler; BlockCompiler *compiler;
}; };
#endif /* CORE_REC_X64_X64_REGALLOC_H_ */

View file

@ -263,7 +263,7 @@ void ngen_opcode(RuntimeBlockInfo* block, shil_opcode* op,x86_block* x86e, bool
verify(reg.IsAllocAny((Sh4RegType)(op->rd._reg + i))); verify(reg.IsAllocAny((Sh4RegType)(op->rd._reg + i)));
} }
u32 size = op->flags & 0x7f; u32 size = op->size();
if (op->rs1.is_imm()) if (op->rs1.is_imm())
{ {
@ -449,7 +449,7 @@ void ngen_opcode(RuntimeBlockInfo* block, shil_opcode* op,x86_block* x86e, bool
case shop_writem: case shop_writem:
{ {
u32 size=op->flags&0x7f; u32 size = op->size();
verify(reg.IsAllocg(op->rs1) || op->rs1.is_imm()); verify(reg.IsAllocg(op->rs1) || op->rs1.is_imm());
verify(op->rs2.is_imm() || op->rs2.is_r32() || (op->rs2.count()==2 && reg.IsAllocf(op->rs2,0) && reg.IsAllocf(op->rs2,1))); verify(op->rs2.is_imm() || op->rs2.is_r32() || (op->rs2.count()==2 && reg.IsAllocf(op->rs2,0) && reg.IsAllocf(op->rs2,1)));

View file

@ -188,7 +188,6 @@ public:
if (!find_mapping()) if (!find_mapping())
input_mapper = new KbInputMapping(); input_mapper = new KbInputMapping();
} }
virtual ~SDLKbGamepadDevice() {}
}; };
class MouseInputMapping : public InputMapping class MouseInputMapping : public InputMapping
@ -215,7 +214,6 @@ public:
if (!find_mapping()) if (!find_mapping())
input_mapper = new MouseInputMapping(); input_mapper = new MouseInputMapping();
} }
virtual ~SDLMouseGamepadDevice() {}
bool gamepad_btn_input(u32 code, bool pressed) override bool gamepad_btn_input(u32 code, bool pressed) override
{ {
if (gui_is_open()) if (gui_is_open())

View file

@ -139,7 +139,6 @@ public:
//E7 Right S3 //E7 Right S3
//E8-FF Reserved //E8-FF Reserved
} }
virtual ~SDLKeyboardDevice() {}
virtual const char* name() override { return "SDL Keyboard"; } virtual const char* name() override { return "SDL Keyboard"; }
protected: protected:

View file

@ -603,14 +603,14 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size)
REICAST_USA(aica_reg,0x8000); REICAST_USA(aica_reg,0x8000);
channel_unserialize(data, total_size, V7_LIBRETRO); channel_unserialize(data, total_size, V8_LIBRETRO);
REICAST_USA(cdda_sector,CDDA_SIZE); REICAST_USA(cdda_sector,CDDA_SIZE);
REICAST_US(cdda_index); REICAST_US(cdda_index);
REICAST_SKIP(4 * 64); // mxlr REICAST_SKIP(4 * 64); // mxlr
REICAST_US(i); // samples_gen REICAST_US(i); // samples_gen
register_unserialize(sb_regs, data, total_size, V7_LIBRETRO) ; register_unserialize(sb_regs, data, total_size, V8_LIBRETRO) ;
REICAST_US(SB_ISTNRM); REICAST_US(SB_ISTNRM);
REICAST_US(SB_FFST_rc); REICAST_US(SB_FFST_rc);
REICAST_US(SB_FFST); REICAST_US(SB_FFST);
@ -721,16 +721,16 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size)
pal_needs_update = true; pal_needs_update = true;
REICAST_USA(OnChipRAM.data,OnChipRAM_SIZE); REICAST_USA(OnChipRAM.data,OnChipRAM_SIZE);
register_unserialize(CCN, data, total_size, V7_LIBRETRO) ; register_unserialize(CCN, data, total_size, V8_LIBRETRO) ;
register_unserialize(UBC, data, total_size, V7_LIBRETRO) ; register_unserialize(UBC, data, total_size, V8_LIBRETRO) ;
register_unserialize(BSC, data, total_size, V7_LIBRETRO) ; register_unserialize(BSC, data, total_size, V8_LIBRETRO) ;
register_unserialize(DMAC, data, total_size, V7_LIBRETRO) ; register_unserialize(DMAC, data, total_size, V8_LIBRETRO) ;
register_unserialize(CPG, data, total_size, V7_LIBRETRO) ; register_unserialize(CPG, data, total_size, V8_LIBRETRO) ;
register_unserialize(RTC, data, total_size, V7_LIBRETRO) ; register_unserialize(RTC, data, total_size, V8_LIBRETRO) ;
register_unserialize(INTC, data, total_size, V7_LIBRETRO) ; register_unserialize(INTC, data, total_size, V8_LIBRETRO) ;
register_unserialize(TMU, data, total_size, V7_LIBRETRO) ; register_unserialize(TMU, data, total_size, V8_LIBRETRO) ;
register_unserialize(SCI, data, total_size, V7_LIBRETRO) ; register_unserialize(SCI, data, total_size, V8_LIBRETRO) ;
register_unserialize(SCIF, data, total_size, V7_LIBRETRO) ; register_unserialize(SCIF, data, total_size, V8_LIBRETRO) ;
u16 dummyshort; u16 dummyshort;
@ -881,7 +881,7 @@ static bool dc_unserialize_libretro(void **data, unsigned int *total_size)
REICAST_US(i); //LIBRETRO_S(cycle_counter); REICAST_US(i); //LIBRETRO_S(cycle_counter);
REICAST_US(i); // idxnxx REICAST_US(i); // idxnxx
REICAST_SKIP(sizeof(state_t)); // state REICAST_SKIP(44); // state
REICAST_US(i); // div_som_reg1 REICAST_US(i); // div_som_reg1
REICAST_US(i); // div_som_reg2 REICAST_US(i); // div_som_reg2
REICAST_US(i); // div_som_reg3 REICAST_US(i); // div_som_reg3
@ -921,7 +921,7 @@ bool dc_unserialize(void **data, unsigned int *total_size)
*total_size = 0 ; *total_size = 0 ;
REICAST_US(version) ; REICAST_US(version) ;
if (version == V7_LIBRETRO) if (version == V8_LIBRETRO)
return dc_unserialize_libretro(data, total_size); return dc_unserialize_libretro(data, total_size);
if (version != V4 && version < V5) if (version != V4 && version < V5)
{ {

View file

@ -272,8 +272,8 @@ using namespace std;
#define likely(x) x #define likely(x) x
#define unlikely(x) x #define unlikely(x) x
#else #else
#define likely(x) __builtin_expect((x),1) #define likely(x) __builtin_expect(static_cast<bool>(x), 1)
#define unlikely(x) __builtin_expect((x),0) #define unlikely(x) __builtin_expect(static_cast<bool>(x), 0)
#endif #endif
//basic includes //basic includes
@ -321,11 +321,11 @@ bool dc_unserialize(void **data, unsigned int *total_size);
#endif #endif
#ifndef STRIP_TEXT #ifndef STRIP_TEXT
#define verify(x) if((x)==false){ msgboxf("Verify Failed : " #x "\n in %s -> %s : %d \n",MBX_ICONERROR,(__FUNCTION__),(__FILE__),__LINE__); dbgbreak;} #define verify(x) do { if ((x) == false){ msgboxf("Verify Failed : " #x "\n in %s -> %s : %d \n", MBX_ICONERROR, (__FUNCTION__), (__FILE__), __LINE__); dbgbreak;}} while (false)
#define die(reason) { msgboxf("Fatal error : %s\n in %s -> %s : %d \n",MBX_ICONERROR,(reason),(__FUNCTION__),(__FILE__),__LINE__); dbgbreak;} #define die(reason) do { msgboxf("Fatal error : %s\n in %s -> %s : %d \n", MBX_ICONERROR,(reason), (__FUNCTION__), (__FILE__), __LINE__); dbgbreak;} while (false)
#else #else
#define verify(x) if((x)==false) { dbgbreak; } #define verify(x) do { if ((x) == false) dbgbreak; } while (false)
#define die(reason) { dbgbreak; } #define die(reason) do { dbgbreak; } while (false)
#endif #endif
@ -677,7 +677,8 @@ enum serialize_version_enum {
V4, V4,
V5_LIBRETRO_UNSUPPORTED, V5_LIBRETRO_UNSUPPORTED,
V6_LIBRETRO_UNSUPPORTED, V6_LIBRETRO_UNSUPPORTED,
V7_LIBRETRO, V7_LIBRETRO_UNSUPPORTED,
V8_LIBRETRO,
V5 = 800, V5 = 800,
V6 = 801, V6 = 801,

View file

@ -197,8 +197,8 @@ private:
s16 last_left_thumb_y = 0; s16 last_left_thumb_y = 0;
s16 last_right_thumb_x = 0; s16 last_right_thumb_x = 0;
s16 last_right_thumb_y = 0; s16 last_right_thumb_y = 0;
double vib_stop_time; double vib_stop_time = 0;
float vib_inclination; float vib_inclination = 0;
static std::vector<std::shared_ptr<XInputGamepadDevice>> xinput_gamepads; static std::vector<std::shared_ptr<XInputGamepadDevice>> xinput_gamepads;
}; };
@ -238,7 +238,6 @@ public:
if (!find_mapping()) if (!find_mapping())
input_mapper = new KbInputMapping(); input_mapper = new KbInputMapping();
} }
virtual ~WinKbGamepadDevice() {}
}; };
class MouseInputMapping : public InputMapping class MouseInputMapping : public InputMapping
@ -265,7 +264,7 @@ public:
if (!find_mapping()) if (!find_mapping())
input_mapper = new MouseInputMapping(); input_mapper = new MouseInputMapping();
} }
virtual ~WinMouseGamepadDevice() {}
bool gamepad_btn_input(u32 code, bool pressed) override bool gamepad_btn_input(u32 code, bool pressed) override
{ {
if (gui_is_open()) if (gui_is_open())