Compare commits

...
Sign in to create a new pull request.

34 commits

Author SHA1 Message Date
Unknown W. Brackets
735858ef42 ARM buildfix. 2015-04-12 14:45:32 -07:00
Unknown W. Brackets
5c6bebf3ba Oops, buildfix. 2015-04-12 14:33:17 -07:00
Unknown W. Brackets
0256c1d94c Fix a shift warning.
We only need 32-bits for this field, the shift is correct.
2015-04-12 14:20:06 -07:00
Henrik Rydgård
3fb980de0d Jit IR: Add "clobbered" flags. Apply FlushRemap to some FPR operations as well.
Use to avoid writeback when FlushRemap-ing when possible - however, disabled the optimization.
Missing flags in MIPSTables? Or doesn't work right until we unfold delay slots..
2015-04-12 14:20:04 -07:00
Henrik Rydgård
06ceb11175 Re-enable joint VROT 2015-04-12 14:17:28 -07:00
Henrik Rydgård
9aac27ce04 Fix some fp instruction in/out flags 2015-04-12 14:17:27 -07:00
Henrik Rydgård
c97948a143 jit viewer: Make it possible to quickly find blocks with FP instructions 2015-04-12 14:17:27 -07:00
Henrik Rydgård
c8ce6353aa Make ExtractIR no longer a member function of the Jit classes. Unify JitOptions across the backends (required for that) 2015-04-12 14:17:26 -07:00
Henrik Rydgård
a7a9f4e21f x86: Two minor optimizations: Ignore transpose for vmidt/vmone/vmzero, save a MOV in many cases of "jr ra" 2015-04-12 14:16:20 -07:00
Henrik Rydgård
1046a4fcdc x86: Apply the liveness optimization to "TriArith" instructions too. 2015-04-12 14:16:19 -07:00
Henrik Rydgård
e57c395787 Apply the liveness optimization to logic-imm ops too. Fix typo 2015-04-12 14:16:19 -07:00
Henrik Rydgård
2e6844f76f Further fixes to liveness. Use it to eliminate some mov instructions in addiu. 2015-04-12 14:14:53 -07:00
Henrik Rydgård
185c6484a5 Jit IR: Simplify DetermineRegisterUsage, thanks to the new op in/out flags 2015-04-12 14:13:43 -07:00
Henrik Rydgård
17cad13dcb Correct liveness computation so that it can be used for the intended
purpose: Being able to writeback/discard regs from the regalloc
immediately when no longer needed.
2015-04-12 14:13:43 -07:00
Henrik Rydgård
6944727ef7 IR: Compute in/out bitmasks for each instruction in the first pass, so
further passes get it for free.
2015-04-12 14:13:42 -07:00
Unknown W. Brackets
8a6d7e7e56 JIT IR: Fix typo breaking reordering. 2015-04-12 14:13:42 -07:00
Henrik Rydgard
04de9b180c Some more minor changes 2015-04-12 14:13:41 -07:00
Henrik Rydgard
db889c92e8 Address a bunch of minor issues 2015-04-12 14:13:41 -07:00
Henrik Rydgard
cad69038ef Best-effort update of the MipsJit prototype 2015-04-12 14:13:40 -07:00
Henrik Rydgard
799bd01a8d Fix IR on ARM 2015-04-12 14:13:39 -07:00
Henrik Rydgård
4e86c03a0c Add a trivial optimization 2015-04-12 14:13:39 -07:00
Henrik Rydgård
0dfd1be8ae Correct instruction counting, minor prep 2015-04-12 14:13:38 -07:00
Henrik Rydgård
53b5b9d202 Play around with shuffling around load/store instructions.
Should let us get much better use out of LDMIA/STMIA on ARM.
2015-04-12 14:13:38 -07:00
Henrik Rydgård
03a9050e52 Add way to print some block bloat stats. Disable block joining for now. 2015-04-12 14:13:37 -07:00
Henrik Rydgård
f1fa25cdee Undelete an accidentally deleted optimization (immbranch) 2015-04-12 14:13:37 -07:00
Henrik Rydgård
bb40d3e202 Aggressively inline across jal / jr ra. Will need limiting heuristics to prevent code bloat... 2015-04-12 14:13:36 -07:00
Henrik Rydgård
a64230f259 Follow plain jumps in the IR extractor
Disassembly gets confused though, that needs fixing (together with proxy blocks)
2015-04-12 14:13:35 -07:00
Henrik Rydgård
601ace2fde Get rid of the "compiling" flag. 2015-04-12 14:13:35 -07:00
Henrik Rydgård
e50772be98 Jit IR: Make it possible to show IR blocks in the jit viewer 2015-04-12 14:13:34 -07:00
Henrik Rydgård
9805f83a40 JIT IR: Some more work on IR processing 2015-04-12 14:13:34 -07:00
Henrik Rydgård
e697428f2f Debug build buildfix on mac (not very pretty) 2015-04-12 14:12:54 -07:00
Henrik Rydgård
08c65b9ef5 Jit IR: Fix mac build, update ARM code too (untested) 2015-04-12 14:12:53 -07:00
Henrik Rydgard
432852035e JIT IR: Skip already-compiled delay slots more elegantly than "-1". Solve the ReplaceJalTo problem. 2015-04-12 14:11:46 -07:00
Henrik Rydgard
78ddeefa78 Trivial IR implementation (extract instructions into a block, then compile out of that).
Has issues with replacements.
2015-04-12 14:10:34 -07:00
39 changed files with 958 additions and 554 deletions

View file

@ -1344,6 +1344,8 @@ add_library(${CoreLibName} ${CoreLinkType}
Core/MIPS/JitCommon/JitBlockCache.h
Core/MIPS/JitCommon/JitState.cpp
Core/MIPS/JitCommon/JitState.h
Core/MIPS/IR.cpp
Core/MIPS/IR.h
Core/MIPS/MIPS.cpp
Core/MIPS/MIPS.h
Core/MIPS/MIPSAnalyst.cpp

View file

@ -243,9 +243,10 @@ const char *DefaultLangRegion() {
const char *CreateRandMAC() {
std::stringstream randStream;
u32 value;
srand(time(nullptr));
for (int i = 0; i < 6; i++) {
u32 value = rand() % 256;
value = rand() % 256;
if (value <= 15)
randStream << '0' << std::hex << value;
else

View file

@ -416,6 +416,7 @@
<ClCompile Include="MIPS\JitCommon\JitBlockCache.cpp" />
<ClCompile Include="MIPS\JitCommon\JitCommon.cpp" />
<ClCompile Include="MIPS\JitCommon\JitState.cpp" />
<ClCompile Include="Mips\IR.cpp" />
<ClCompile Include="MIPS\MIPS.cpp" />
<ClCompile Include="MIPS\MIPSAnalyst.cpp" />
<ClCompile Include="MIPS\MIPSAsm.cpp" />
@ -661,6 +662,7 @@
<ClInclude Include="MIPS\JitCommon\JitCommon.h" />
<ClInclude Include="MIPS\JitCommon\NativeJit.h" />
<ClInclude Include="MIPS\JitCommon\JitState.h" />
<ClInclude Include="Mips\IR.h" />
<ClInclude Include="MIPS\MIPS.h" />
<ClInclude Include="MIPS\MIPSAnalyst.h" />
<ClInclude Include="MIPS\MIPSAsm.h" />
@ -731,4 +733,4 @@
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
</Project>

View file

@ -72,6 +72,9 @@
<ClCompile Include="MIPS\MIPS.cpp">
<Filter>MIPS</Filter>
</ClCompile>
<ClCompile Include="MIPS\IR.cpp">
<Filter>MIPS</Filter>
</ClCompile>
<ClCompile Include="MIPS\MIPSDebugInterface.cpp">
<Filter>MIPS</Filter>
</ClCompile>
@ -463,6 +466,9 @@
<ClCompile Include="MIPS\JitCommon\JitBlockCache.cpp">
<Filter>MIPS\JitCommon</Filter>
</ClCompile>
<ClCompile Include="MIPS\JitCommon\JitState.cpp">
<Filter>MIPS\JitCommon</Filter>
</ClCompile>
<ClCompile Include="Cwcheat.cpp">
<Filter>Core</Filter>
</ClCompile>
@ -613,6 +619,9 @@
<ClInclude Include="MIPS\MIPSDebugInterface.h">
<Filter>MIPS</Filter>
</ClInclude>
<ClInclude Include="MIPS\IR.h">
<Filter>MIPS</Filter>
</ClInclude>
<ClInclude Include="MIPS\MIPS.h">
<Filter>MIPS</Filter>
</ClInclude>
@ -1134,4 +1143,4 @@
<None Include="..\android\jni\Android.mk" />
<None Include="GameLogNotes.txt" />
</ItemGroup>
</Project>
</Project>

View file

@ -86,24 +86,6 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
immBranchTaken = !immBranchNotTaken;
}
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
if (!immBranchTaken) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
@ -166,7 +148,6 @@ void ArmJit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
WriteExit(GetCompilerPC() + 8, js.nextExit++);
}
js.compiling = false;
}
@ -180,98 +161,44 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
MIPSGPReg rs = _RS;
u32 targetAddr = GetCompilerPC() + offset + 4;
bool immBranch = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs)) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 imm = (s32)gpr.GetImm(rs);
switch (cc)
{
case CC_GT: immBranchNotTaken = imm > 0; break;
case CC_GE: immBranchNotTaken = imm >= 0; break;
case CC_LT: immBranchNotTaken = imm < 0; break;
case CC_LE: immBranchNotTaken = imm <= 0; break;
default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp().");
}
immBranch = true;
immBranchTaken = !immBranchNotTaken;
}
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
if (!immBranchTaken) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (immBranchTaken && andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
if (immBranchTaken || !likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
CMP(gpr.R(rs), Operand2(0, TYPE_IMM));
ArmGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
const u32 destAddr = immBranchTaken ? targetAddr : GetCompilerPC() + 8;
WriteExit(destAddr, js.nextExit++);
} else {
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
CMP(gpr.R(rs), Operand2(0, TYPE_IMM));
ArmGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
}
else
{
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
if (andLink)
{
gpr.SetRegImm(SCRATCHREG1, GetCompilerPC() + 8);
STR(SCRATCHREG1, CTXREG, MIPS_REG_RA * 4);
}
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(GetCompilerPC() + 8, js.nextExit++);
ptr = B_CC(cc);
}
js.compiling = false;
else
{
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
if (andLink)
{
gpr.SetRegImm(SCRATCHREG1, GetCompilerPC() + 8);
STR(SCRATCHREG1, CTXREG, MIPS_REG_RA * 4);
}
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(GetCompilerPC() + 8, js.nextExit++);
}
@ -357,7 +284,6 @@ void ArmJit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely)
SetJumpTarget(ptr);
// Not taken
WriteExit(GetCompilerPC() + 8, js.nextExit++);
js.compiling = false;
}
void ArmJit::Comp_FPUBranch(MIPSOpcode op)
@ -404,8 +330,7 @@ void ArmJit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely)
ArmGen::FixupBranch ptr;
js.inDelaySlot = true;
if (!likely)
{
if (!likely) {
if (!delaySlotIsNice && !delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
@ -428,7 +353,6 @@ void ArmJit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely)
// Not taken
u32 notTakenTarget = GetCompilerPC() + (delaySlotIsBranch ? 4 : 8);
WriteExit(notTakenTarget, js.nextExit++);
js.compiling = false;
}
void ArmJit::Comp_VBranch(MIPSOpcode op)
@ -454,8 +378,6 @@ void ArmJit::Comp_Jump(MIPSOpcode op) {
if (!Memory::IsValidAddress(targetAddr)) {
if (js.nextExit == 0) {
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr);
} else {
js.compiling = false;
}
// TODO: Mark this block dirty or something? May be indication it will be changed by imports.
return;
@ -464,14 +386,6 @@ void ArmJit::Comp_Jump(MIPSOpcode op) {
switch (op >> 26) {
case 2: //j
CompileDelaySlot(DELAYSLOT_NICE);
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
FlushAll();
WriteExit(targetAddr, js.nextExit++);
break;
@ -482,14 +396,6 @@ void ArmJit::Comp_Jump(MIPSOpcode op) {
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
CompileDelaySlot(DELAYSLOT_NICE);
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
FlushAll();
WriteExit(targetAddr, js.nextExit++);
break;
@ -498,7 +404,6 @@ void ArmJit::Comp_Jump(MIPSOpcode op) {
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
void ArmJit::Comp_JumpReg(MIPSOpcode op)
@ -542,15 +447,6 @@ void ArmJit::Comp_JumpReg(MIPSOpcode op)
gpr.DiscardR(MIPS_REG_T9);
}
if (jo.continueJumps && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) {
AddContinuedBlock(gpr.GetImm(rs));
// Account for the increment in the loop.
js.compilerPC = gpr.GetImm(rs) - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
gpr.MapReg(rs);
destReg = gpr.R(rs); // Safe because FlushAll doesn't change any regs
FlushAll();
@ -564,19 +460,17 @@ void ArmJit::Comp_JumpReg(MIPSOpcode op)
FlushAll();
}
switch (op & 0x3f)
{
switch (op & 0x3f) {
case 8: //jr
break;
case 9: //jalr
break;
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
_dbg_assert_msg_(CPU, 0, "Trying to compile instruction that can't be compiled");
break;
}
WriteExitDestInR(destReg);
js.compiling = false;
}
@ -625,14 +519,12 @@ void ArmJit::Comp_Syscall(MIPSOpcode op)
RestoreDowncount();
WriteSyscallExit();
js.compiling = false;
}
void ArmJit::Comp_Break(MIPSOpcode op)
{
Comp_Generic(op);
WriteSyscallExit();
js.compiling = false;
}
} // namespace Mipscomp

View file

@ -65,10 +65,10 @@ void ArmJit::Comp_FPU3op(MIPSOpcode op)
case 2: { //F(fd) = F(fs) * F(ft); //mul
MIPSOpcode nextOp = GetOffsetInstruction(1);
// Optimization possible if destination is the same
if (fd == (int)((nextOp>>6) & 0x1F)) {
if (fd == (int)((nextOp >> 6) & 0x1F)) {
// VMUL + VNEG -> VNMUL
if (!strcmp(MIPSGetName(nextOp), "neg.s")) {
if (fd == (int)((nextOp>>11) & 0x1F)) {
if (fd == (int)((nextOp >> 11) & 0x1F)) {
VNMUL(fpr.R(fd), fpr.R(fs), fpr.R(ft));
EatInstruction(nextOp);
}

View file

@ -136,7 +136,7 @@ namespace MIPSComp
// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
// TODO: But some ops seem to use const 0 instead?
if (regnum >= n) {
WARN_LOG(CPU, "JIT: Invalid VFPU swizzle: %08x : %d / %d at PC = %08x (%s)", prefix, regnum, n, GetCompilerPC(), MIPSDisasmAt(js.compilerPC));
WARN_LOG(CPU, "JIT: Invalid VFPU swizzle: %08x : %d / %d at PC = %08x (%s)", prefix, regnum, n, GetCompilerPC(), MIPSDisasmAt(GetCompilerPC()));
regnum = 0;
}
@ -2031,7 +2031,7 @@ namespace MIPSComp
u32 nextOp = GetOffsetInstruction(1).encoding;
int vd2 = -1;
int imm2 = -1;
if ((nextOp >> 26) == 60 && ((nextOp >> 21) & 0x1F) == 29 && _VS == MIPS_GET_VS(nextOp)) {
if (false && (nextOp >> 26) == 60 && ((nextOp >> 21) & 0x1F) == 29 && _VS == MIPS_GET_VS(nextOp)) {
// Pair of vrot. Let's join them.
vd2 = MIPS_GET_VD(nextOp);
imm2 = (nextOp >> 16) & 0x1f;
@ -2068,7 +2068,7 @@ namespace MIPSComp
// If the negsin setting differs between the two joint invocations, we need to flip the second one.
bool negSin2 = (imm2 & 0x10) ? true : false;
CompVrotShuffle(dregs2, imm2, sz, negSin1 != negSin2);
js.compilerPC += 4;
irblock.entries[js.irBlockPos + 1].flags |= IR_FLAG_SKIP;
}
fpr.ReleaseSpillLocksAndDiscardTemps();

View file

@ -170,12 +170,13 @@ void ArmJit::EatInstruction(MIPSOpcode op) {
}
js.numInstructions++;
js.compilerPC += 4;
js.irBlockPos++;
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
}
void ArmJit::CompileDelaySlot(int flags)
{
IREntry &entry = irblock.entries[js.irBlockPos + 1];
// preserve flag around the delay slot! Maybe this is not always necessary on ARM where
// we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the
// delay slot, we're screwed.
@ -183,8 +184,7 @@ void ArmJit::CompileDelaySlot(int flags)
MRS(R8); // Save flags register. R8 is preserved through function calls and is not allocated.
js.inDelaySlot = true;
MIPSOpcode op = GetOffsetInstruction(1);
MIPSCompileOp(op);
MIPSCompileOp(entry.op);
js.inDelaySlot = false;
if (flags & DELAYSLOT_FLUSH)
@ -236,23 +236,22 @@ void ArmJit::RunLoopUntil(u64 globalticks)
}
u32 ArmJit::GetCompilerPC() {
return js.compilerPC;
return irblock.entries[js.irBlockPos].origAddress;
}
MIPSOpcode ArmJit::GetOffsetInstruction(int offset) {
return Memory::Read_Instruction(GetCompilerPC() + 4 * offset);
return irblock.entries[js.irBlockPos + offset].op;
}
const u8 *ArmJit::DoJit(u32 em_address, JitBlock *b)
{
js.cancel = false;
js.blockStart = js.compilerPC = mips_->pc;
js.blockStart = mips_->pc;
js.lastContinuedPC = 0;
js.initialBlockSize = 0;
js.nextExit = 0;
js.downcountAmount = 0;
js.curBlock = b;
js.compiling = true;
js.inDelaySlot = false;
js.PrefixStart();
@ -287,6 +286,9 @@ const u8 *ArmJit::DoJit(u32 em_address, JitBlock *b)
}
b->normalEntry = GetCodePtr();
ExtractIR(jo, em_address, &irblock);
// TODO: this needs work
MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);
@ -296,24 +298,19 @@ const u8 *ArmJit::DoJit(u32 em_address, JitBlock *b)
int partialFlushOffset = 0;
js.numInstructions = 0;
while (js.compiling)
{
gpr.SetCompilerPC(GetCompilerPC()); // Let it know for log messages
MIPSOpcode inst = Memory::Read_Opcode_JIT(GetCompilerPC());
//MIPSInfo info = MIPSGetInfo(inst);
//if (info & IS_VFPU) {
// logBlocks = 1;
//}
js.irBlock = &irblock;
js.irBlockPos = 0;
while (js.irBlockPos < irblock.entries.size()) {
IREntry &entry = irblock.entries[js.irBlockPos];
if (entry.flags & IR_FLAG_SKIP)
goto skip_entry;
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
js.downcountAmount += MIPSGetInstructionCycleEstimate(entry.op);
MIPSCompileOp(inst);
MIPSCompileOp(entry.op);
js.compilerPC += 4;
js.numInstructions++;
#ifndef HAVE_ARMV7
if ((GetCodePtr() - b->checkedEntry - partialFlushOffset) > 3200)
{
if ((GetCodePtr() - b->checkedEntry - partialFlushOffset) > 3200) {
// We need to prematurely flush as we are out of range
FixupBranch skip = B_CC(CC_AL);
FlushLitPool();
@ -322,13 +319,9 @@ const u8 *ArmJit::DoJit(u32 em_address, JitBlock *b)
}
#endif
// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
if (GetSpaceLeft() < 0x800 || js.numInstructions >= JitBlockCache::MAX_BLOCK_INSTRUCTIONS)
{
FlushAll();
WriteExit(GetCompilerPC(), js.nextExit++);
js.compiling = false;
}
skip_entry:
js.irBlockPos++;
js.numInstructions++;
}
if (jo.useForwardJump) {
@ -428,8 +421,7 @@ bool ArmJit::ReplaceJalTo(u32 dest) {
WriteDownCountR(R0);
}
js.compilerPC += 4;
// No writing exits, keep going!
js.irBlockPos++;
// Add a trigger so that if the inlined code changes, we invalidate this block.
blocks.ProxyBlock(js.blockStart, dest, funcSize / sizeof(u32), GetCodePtr());
@ -467,7 +459,6 @@ void ArmJit::Comp_ReplacementFunc(MIPSOpcode op)
LDR(R1, CTXREG, MIPS_REG_RA * 4);
js.downcountAmount += cycles;
WriteExitDestInR(R1);
js.compiling = false;
}
} else if (entry->replaceFunc) {
FlushAll();
@ -493,7 +484,6 @@ void ArmJit::Comp_ReplacementFunc(MIPSOpcode op)
LDR(R1, CTXREG, MIPS_REG_RA * 4);
WriteDownCountR(R0);
WriteExitDestInR(R1);
js.compiling = false;
}
} else {
ERROR_LOG(HLE, "Replacement function %s has neither jit nor regular impl", entry->name);

View file

@ -26,16 +26,19 @@
#include "Core/MIPS/ARM/ArmRegCache.h"
#include "Core/MIPS/ARM/ArmRegCacheFPU.h"
#include "Core/MIPS/MIPSVFPUUtils.h"
#include "Core/MIPS/ARM/ArmAsm.h"
#include "Core/MIPS/IR.h"
#ifndef offsetof
#include "stddef.h"
#endif
struct ReplacementTableEntry;
namespace MIPSComp
{
class ArmJit : public ArmGen::ARMXCodeBlock
{
class ArmJit : public ArmGen::ARMXCodeBlock {
public:
ArmJit(MIPSState *mips);
virtual ~ArmJit();
@ -43,6 +46,8 @@ public:
void DoState(PointerWrap &p);
static void DoDummyState(PointerWrap &p);
const JitOptions &GetJitOptions() { return jo; }
// Compiled ops should ignore delay slots
// the compiler will take care of them by itself
// OR NOT
@ -278,6 +283,8 @@ private:
JitOptions jo;
JitState js;
IRBlock irblock;
ArmRegCache gpr;
ArmRegCacheFPU fpr;

View file

@ -16,6 +16,7 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "Core/MemMap.h"
#include "Core/MIPS/IR.h"
#include "Core/MIPS/ARM/ArmRegCache.h"
#include "Core/MIPS/ARM/ArmJit.h"
#include "Core/MIPS/MIPSAnalyst.h"
@ -207,13 +208,13 @@ ARMReg ArmRegCache::FindBestToSpill(bool unusedOnly, bool *clobbered) {
continue;
// Awesome, a clobbered reg. Let's use it.
if (MIPSAnalyst::IsRegisterClobbered(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) {
if (js_->irBlock->IsRegisterClobbered(ar[reg].mipsReg, js_->irBlockPos, UNUSED_LOOKAHEAD_OPS)) {
*clobbered = true;
return reg;
}
// Not awesome. A used reg. Let's try to avoid spilling.
if (unusedOnly && MIPSAnalyst::IsRegisterUsed(ar[reg].mipsReg, compilerPC_, UNUSED_LOOKAHEAD_OPS)) {
if (unusedOnly && js_->irBlock->IsRegisterUsed(ar[reg].mipsReg, js_->irBlockPos, UNUSED_LOOKAHEAD_OPS)) {
continue;
}

View file

@ -20,6 +20,7 @@
#include "base/logging.h"
#include "Common/CPUDetect.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/IR.h"
#include "Core/MIPS/ARM/ArmRegCacheFPU.h"
#include "Core/MIPS/ARM/ArmJit.h"
#include "Core/MIPS/MIPSTables.h"
@ -132,7 +133,7 @@ bool ArmRegCacheFPU::IsMapped(MIPSReg r) {
ARMReg ArmRegCacheFPU::MapReg(MIPSReg mipsReg, int mapFlags) {
// INFO_LOG(JIT, "FPR MapReg: %i flags=%i", mipsReg, mapFlags);
if (jo_->useNEONVFPU && mipsReg >= 32) {
ERROR_LOG(JIT, "Cannot map VFPU registers to ARM VFP registers in NEON mode. PC=%08x", js_->compilerPC);
ERROR_LOG(JIT, "Cannot map VFPU registers to ARM VFP registers in NEON mode.");
return S0;
}
@ -195,7 +196,7 @@ allocate:
}
// Uh oh, we have all them spilllocked....
ERROR_LOG(JIT, "Out of spillable registers at PC %08x!!!", js_->compilerPC);
ERROR_LOG(JIT, "Out of spillable registers at %08x!!!", js_->blockStart);
return INVALID_REG;
}
@ -379,7 +380,7 @@ void ArmRegCacheFPU::FlushR(MIPSReg r) {
// mipsreg that's been part of a quad.
int quad = mr[r].reg - Q0;
if (qr[quad].isDirty) {
WARN_LOG(JIT, "FlushR found quad register %i - PC=%08x", quad, js_->compilerPC);
WARN_LOG(JIT, "FlushR found quad register %i - PC=%08x", quad, js_->blockStart);
emit_->ADDI2R(R0, CTXREG, GetMipsRegOffset(r), R1);
emit_->VST1_lane(F_32, (ARMReg)mr[r].reg, R0, mr[r].lane, true);
}
@ -609,11 +610,11 @@ ARMReg ArmRegCacheFPU::R(int mipsReg) {
return (ARMReg)(mr[mipsReg].reg + S0);
} else {
if (mipsReg < 32) {
ERROR_LOG(JIT, "FReg %i not in ARM reg. compilerPC = %08x : %s", mipsReg, js_->compilerPC, MIPSDisasmAt(js_->compilerPC));
ERROR_LOG(JIT, "FReg %i not in ARM reg. %s", mipsReg, js_->irBlock->DisasmAt(js_->irBlockPos));
} else if (mipsReg < 32 + 128) {
ERROR_LOG(JIT, "VReg %i not in ARM reg. compilerPC = %08x : %s", mipsReg - 32, js_->compilerPC, MIPSDisasmAt(js_->compilerPC));
ERROR_LOG(JIT, "VReg %i not in ARM reg. %s", mipsReg - 32, js_->irBlock->DisasmAt(js_->irBlockPos));
} else {
ERROR_LOG(JIT, "Tempreg %i not in ARM reg. compilerPC = %08x : %s", mipsReg - 128 - 32, js_->compilerPC, MIPSDisasmAt(js_->compilerPC));
ERROR_LOG(JIT, "Tempreg %i not in ARM reg. %s", mipsReg - 128 - 32, js_->irBlock->DisasmAt(js_->irBlockPos));
}
return INVALID_REG; // BAAAD
}

397
Core/MIPS/IR.cpp Normal file
View file

@ -0,0 +1,397 @@
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#ifdef _WIN32
#include <Windows.h>
#endif
#include <algorithm>
#include "Core/MIPS/IR.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MemMap.h"
#include "Core/MIPS/MIPSDis.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSDebugInterface.h"
#include "Core/MIPS/JitCommon/JitCommon.h"
#include "Core/MIPS/JitCommon/NativeJit.h"
#include "Core/HLE/ReplaceTables.h"
// Jit brokenness to fix: 08866fe8 in Star Soldier - a b generates unnecessary stuff
namespace MIPSComp {
// Reorderings to do:
// * Hoist loads and stores upwards as far as they will go
// * Sort sequences of loads and stores by register to allow compiling into things like LDMIA
// * Join together "pairs" like mfc0/mtv and mfv/mtc0
// Returns true when it changed something. Might have to be called repeatedly to get everything done.
static bool Reorder(IRBlock *block);
static void ComputeLiveness(IRBlock *block);
static void DebugPrintBlock(IRBlock *block);
IREntry &IRBlock::AddIREntry(u32 address) {
MIPSOpcode op = Memory::Read_Opcode_JIT(address);
IREntry e;
e.pseudoInstr = PSEUDO_NONE;
e.origAddress = address;
e.op = op;
e.info = MIPSGetInfo(op);
e.flags = 0;
e.liveGPR = 0;
e.liveFPR = 0;
entries.push_back(e);
return entries.back();
}
void ExtractIR(const JitOptions &jo, u32 address, IRBlock *block) {
static int count = 0;
count++;
block->entries.clear();
block->address = address;
block->analysis = MIPSAnalyst::Analyze(address);
bool joined = false; // flag to debugprint
int exitInInstructions = -1;
std::vector<u32> raStack; // for inlining leaf functions
while (true) {
IREntry &e = block->AddIREntry(address);
if (e.info & DELAYSLOT) {
// Check if replaceable JAL. If not, bail in 2 instructions.
bool replacableJal = false;
if (e.info & IS_JUMP) {
if ((e.op >> 26) == 3) {
// Definitely a JAL
const ReplacementTableEntry *entry;
u32 funcSize;
if (CanReplaceJalTo(MIPSCodeUtils::GetJumpTarget(e.origAddress), &entry, &funcSize))
replacableJal = true;
}
}
if (!replacableJal) {
exitInInstructions = 2;
}
}
u64 gprIn = 0, gprOut = 0;
u32 fprIn = 0, fprOut = 0;
if (e.info & IN_RS) gprIn |= (1ULL << MIPS_GET_RS(e.op));
if (e.info & IN_RT) gprIn |= (1ULL << MIPS_GET_RT(e.op));
if (e.info & IN_LO) gprIn |= (1ULL << MIPS_REG_LO);
if (e.info & IN_HI) gprIn |= (1ULL << MIPS_REG_HI);
if (e.info & IN_VFPU_CC) gprIn |= (1ULL << MIPS_REG_VFPUCC);
if (e.info & IN_FPUFLAG) gprIn |= (1ULL << MIPS_REG_FPCOND);
if (e.info & IN_FS) fprIn |= (1 << MIPS_GET_FS(e.op));
if (e.info & IN_FT) fprIn |= (1 << MIPS_GET_FT(e.op));
if (e.info & OUT_RT) gprOut |= (1ULL << MIPS_GET_RT(e.op));
if (e.info & OUT_RD) gprOut |= (1ULL << MIPS_GET_RD(e.op));
if (e.info & OUT_RA) gprOut |= (1ULL << MIPS_REG_RA);
if (e.info & OUT_FD) fprOut |= (1 << MIPS_GET_FD(e.op));
if (e.info & OUT_FS) fprOut |= (1 << MIPS_GET_FS(e.op));
if (e.info & OUT_FT) fprOut |= (1 << MIPS_GET_FT(e.op));
if (e.info & OUT_LO) gprOut |= (1ULL << MIPS_REG_LO);
if (e.info & OUT_HI) gprOut |= (1ULL << MIPS_REG_HI);
if (e.info & OUT_VFPU_CC) gprOut |= (1ULL << MIPS_REG_VFPUCC);
if (e.info & OUT_FPUFLAG) gprOut |= (1ULL << MIPS_REG_FPCOND);
if (e.pseudoInstr == PSEUDO_SAVE_RA) gprOut |= (1ULL << MIPS_REG_RA);
// TODO: Add VFPU analysis as well...
e.gprIn = gprIn & ~1; // The zero register doesn't count.
e.gprOut = gprOut & ~1;
e.fprIn = fprIn;
e.fprOut = fprOut;
if ((e.info & IS_JUMP) && jo.continueBranches) {
// Figure out exactly what instruction it is.
if ((e.op >> 26) == 2) { // It's a plain j instruction
// joined = true;
exitInInstructions = -1;
// Remove the just added jump instruction
block->RemoveLast();
// Add the delay slot to the block
block->AddIREntry(address + 4);
address = MIPSCodeUtils::GetJumpTarget(address);
// NOTICE_LOG(JIT, "Blocks joined! %08x->%08x", block->address, target);
continue;
} else if ((e.op >> 26) == 3) {
// jal instruction. Same as above but save RA. This can be optimized away later if needed.
// joined = true;
exitInInstructions = -1;
// Turn the just added jal instruction into a pseudo SaveRA
block->entries.back().MakePseudo(PSEUDO_SAVE_RA);
raStack.push_back(address + 8);
// Add the delay slot
block->AddIREntry(address + 4);
address = MIPSCodeUtils::GetJumpTarget(address);
// NOTICE_LOG(JIT, "Blocks jal-joined! %08x->%08x", block->address, address);
continue;
} else if (e.op == MIPS_MAKE_JR_RA()) {
// TODO: This is only safe if we don't write to RA manually anywhere.
MIPSOpcode next = Memory::Read_Opcode_JIT(address + 4);
if (!MIPSAnalyst::IsSyscall(next) && raStack.size()) {
exitInInstructions = -1;
// Remove the just added jump instruction, and add the delay slot
block->RemoveLast();
block->AddIREntry(address + 4);
// We know the return address! Keep compiling there.
// NOTICE_LOG(JIT, "Inlined leaf function! %08x", block->address);
u32 returnAddr = raStack.back();
raStack.pop_back();
address = returnAddr;
continue;
}
// Else do nothing special, compile as usual.
}
}
address += 4;
if (exitInInstructions > 0)
exitInInstructions--;
if (exitInInstructions == 0)
break;
}
// Reorder until no changes are made.
while (Reorder(block))
;
// Computing liveness must be done _after_ reordering, of course.
ComputeLiveness(block);
if (joined) {
DebugPrintBlock(block);
}
// TODO: Compute the proxy blocks from the addresses in the IR instructions.
}
static bool Reorder(IRBlock *block) {
bool changed = false;
// TODO: We only do some really safe optimizations now. Can't do fun stuff like hoisting loads/stores until we have unfolded all branch delay slots!
// Well, maybe we could do some of it, but let's not..
// Sweep downwards
for (int i = 0; i < (int)block->entries.size() - 1; i++) {
IREntry &e1 = block->entries[i];
IREntry &e2 = block->entries[i + 1];
// Reorder SW, LWC1, SWC1
if ((MIPSAnalyst::IsSWInstr(e1.op) && MIPSAnalyst::IsSWInstr(e2.op)) ||
(MIPSAnalyst::IsLWC1Instr(e1.op) && MIPSAnalyst::IsLWC1Instr(e2.op)) ||
(MIPSAnalyst::IsSWC1Instr(e1.op) && MIPSAnalyst::IsSWC1Instr(e2.op))) {
// Compare register numbers and swap if possible.
if (MIPS_GET_RT(e1.op) > MIPS_GET_RT(e2.op) &&
(MIPS_GET_IMM16(e1.op) != MIPS_GET_IMM16(e2.op) ||
MIPS_GET_RS(e1.op) != MIPS_GET_RS(e2.op))) {
std::swap(e1, e2);
#if 0
const char *type = "SW";
if (MIPSAnalyst::IsLWC1Instr(e1.op)) type = "LWC1";
else if (MIPSAnalyst::IsSWC1Instr(e1.op)) type = "SWC1";
NOTICE_LOG(JIT, "Reordered %s at %08x (%08x)", type, e1.origAddress, block->address);
#endif
changed = true;
}
}
// LW is tricker because we need to check against the destination of one instruction being used
// as the base register of the other.
if (MIPSAnalyst::IsLWInstr(e1.op) && MIPSAnalyst::IsLWInstr(e2.op)) {
// Compare register numbers and swap if possible.
if (MIPS_GET_RT(e1.op) != MIPS_GET_RS(e2.op) &&
MIPS_GET_RS(e1.op) != MIPS_GET_RT(e2.op) &&
MIPS_GET_RT(e1.op) > MIPS_GET_RT(e2.op) &&
(MIPS_GET_RT(e1.op) != MIPS_GET_RT(e2.op) ||
MIPS_GET_IMM16(e1.op) != MIPS_GET_IMM16(e2.op))) {
std::swap(e1, e2);
// NOTICE_LOG(JIT, "Reordered LW at %08x (%08x)", e1.origAddress, block->address);
changed = true;
}
}
}
// Then sweep upwards
/*
for (int i = (int)block->entries.size() - 1; i >= 0; i--) {
IREntry &e1 = block->entries[i];
IREntry &e2 = block->entries[i + 1];
// Do stuff!
}
*/
return changed;
}
void ToBitString(char *ptr, u64 bits, int numBits) {
for (int i = numBits - 1; i >= 0; i--) {
*(ptr++) = (bits & (1ULL << i)) ? '1' : '.';
}
*ptr = '\0';
}
#define RN(i) currentDebugMIPS->GetRegName(0,i)
void ToGprLivenessString(char *str, int bufsize, u64 bits) {
str[0] = 0;
for (int i = 0; i < 32; i++) {
if (bits & (1ULL << i)) {
sprintf(str + strlen(str), "%s ", RN(i));
}
}
if (bits & (1ULL << MIPS_REG_LO)) strcat(str, "lo ");
if (bits & (1ULL << MIPS_REG_HI)) strcat(str, "hi ");
}
void ToFprLivenessString(char *str, int bufsize, u64 bits) {
str[0] = 0;
for (int i = 0; i < 32; i++) {
if (bits & (1ULL << i)) {
sprintf(str + strlen(str), "f%d ", i);
}
}
}
static void ComputeLiveness(IRBlock *block) {
// Okay, now let's work backwards and compute liveness information.
//
// NOTE: This will not be accurate until all branch delay slots have been unfolded!
// TODO: By following calling conventions etc, it may be possible to eliminate
// additional register liveness from "jr ra" upwards. However, this is not guaranteed to work on all games.
u64 gprLiveness = 0; // note - nine Fs, for HI/LO/flags-in-registers. To define later.
u32 fprLiveness = 0;
u64 gprClobber = 0;
u32 fprClobber = 0;
for (int i = (int)block->entries.size() - 1; i >= 0; i--) {
IREntry &e = block->entries[i];
if (e.op == 0) { // nop
continue;
}
// These are already cleaned from the zero register
e.liveGPR = gprLiveness;
e.liveFPR = fprLiveness;
e.clobberedGPR = gprClobber;
e.clobberedFPR = fprClobber;
gprLiveness &= ~e.gprOut;
fprLiveness &= ~e.fprOut;
gprLiveness |= e.gprIn;
fprLiveness |= e.fprIn;
gprClobber |= e.gprOut;
fprClobber |= e.fprOut;
gprClobber &= ~e.gprIn;
fprClobber &= ~e.fprIn;
}
}
std::vector<std::string> IRBlock::ToStringVector() {
std::vector<std::string> vec;
char buf[1024];
for (int i = 0; i < (int)entries.size(); i++) {
IREntry &e = entries[i];
char instr[256], liveness1[36 * 3], liveness2[32 * 3];
memset(instr, 0, sizeof(instr));
ToGprLivenessString(liveness1, sizeof(liveness1), e.clobberedGPR);
ToFprLivenessString(liveness2, sizeof(liveness2), e.clobberedFPR);
const char *pseudo = " ";
switch (e.pseudoInstr) {
case PSEUDO_SAVE_RA:
pseudo = " save_ra / ";
break;
}
MIPSDisAsm(e.op, e.origAddress, instr, true);
snprintf(buf, sizeof(buf), "%08x%s%s : %s %s", e.origAddress, pseudo, instr, liveness1, liveness2);
vec.push_back(std::string(buf));
}
return vec;
}
static void DebugPrintBlock(IRBlock *block) {
std::vector<std::string> vec = block->ToStringVector();
for (auto &s : vec) {
printf("%s\n", s.c_str());
}
fflush(stdout);
}
IRBlock::RegisterUsage IRBlock::DetermineRegisterUsage(MIPSGPReg reg, int pos, int instrs) {
const int start = pos;
int end = pos + instrs;
if (end > (int)entries.size())
end = (int)entries.size();
bool canClobber = true;
u64 mask = 1ULL << (int)reg;
for (; pos < end; pos++) {
IREntry &e = entries[pos];
if (e.gprIn & mask)
return USAGE_INPUT;
bool clobbered = false;
if (e.gprOut & mask) {
clobbered = true;
}
if (clobbered) {
if (!canClobber || (e.info & IS_CONDMOVE))
return USAGE_UNKNOWN;
return USAGE_CLOBBERED;
}
// Bail early if we hit a branch (could follow each path for continuing?)
if ((e.info & IS_CONDBRANCH) || (e.info & IS_JUMP)) {
// Still need to check the delay slot (so end after it.)
// We'll assume likely are taken.
end = pos + 2;
// The reason for the start != addr check is that we compile delay slots before branches.
// That means if we're starting at the branch, it's not safe to allow the delay slot
// to clobber, since it might have already been compiled.
// As for LIKELY, we don't know if it'll run the branch or not.
canClobber = (e.info & LIKELY) == 0 && start != pos;
}
}
return USAGE_UNKNOWN;
}
bool IRBlock::IsRegisterUsed(MIPSGPReg reg, int pos, int instrs) {
return DetermineRegisterUsage(reg, pos, instrs) == USAGE_INPUT;
}
bool IRBlock::IsRegisterClobbered(MIPSGPReg reg, int pos, int instrs) {
return DetermineRegisterUsage(reg, pos, instrs) == USAGE_CLOBBERED;
}
const char *IRBlock::DisasmAt(int pos) {
static char temp[256];
MIPSDisAsm(entries[pos].op, 0, temp);
return temp;
}
void IRBlock::RemoveLast() {
entries.pop_back();
}
} // namespace

120
Core/MIPS/IR.h Normal file
View file

@ -0,0 +1,120 @@
#pragma once
// Copyright (c) 2014- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <vector>
#include <string>
#include "Common/CommonTypes.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/JitCommon/JitState.h"
// MIPS IR
// A very pragmatic intermediate representation for the recompilers.
// It's essentially just the MIPS instructions, but with an option to also have
// pseudo operations. Some MIPS instructions decompose into these for easier optimization.
// The main goals are:
// * Compute accurate liveness information so that some redundant computations can be removed
// * Move instructions around to permit various peephole optimizations
// * Unfold delay slots into sequential instructions for ease of optimization
// *
// Flags
enum {
IR_FLAG_SKIP = 1,
// To be used on unfolded branches
IR_FLAG_NO_DELAY_SLOT = 2,
IR_FLAG_CMP_REPLACE_LEFT = 4,
IR_FLAG_CMP_REPLACE_RIGHT = 8,
};
enum {
PSEUDO_NONE,
PSEUDO_SAVE_RA,
};
// Keep this as small as possible!
struct IREntry {
u32 origAddress; // Note - doesn't have to be contiguous.
MIPSInfo info; // not strictly needed as can be recomputed but speeds things up considerably so worth the space
MIPSOpcode op;
u32 flags;
int pseudoInstr; // 0 = no pseudo. Could be combined with flags?
// We include LO, HI, VFPUCC, FPUFlag as mapped GPRs.
u64 gprIn;
u64 gprOut;
u32 fprIn;
u32 fprOut;
// Register live state, as bitfields.
u64 liveGPR; // Bigger than 32 to accommodate pseudo-GPRs like HI and LO
u32 liveFPR;
// Clobbered state. Can discard registers marked as clobbered later.
u64 clobberedGPR;
u32 clobberedFPR;
// u32 liveVPR[4]; // TODO: For now we assume all VPRs are live at all times.
void MakeNOP() { op.encoding = 0; info = 0; }
void MakePseudo(int pseudo) { pseudoInstr = pseudo; info = 0; }
bool IsGPRAlive(int reg) const { return (liveGPR & (1ULL << reg)) != 0; }
bool IsFPRAlive(int freg) const { return (liveFPR & (1UL << freg)) != 0; }
bool IsGPRClobbered(int reg) const { return (clobberedGPR & (1ULL << reg)) != 0; }
bool IsFPRClobbered(int freg) const { return (clobberedFPR & (1UL << freg)) != 0; }
};
namespace MIPSComp {
class IRBlock {
public:
u32 address;
std::vector<IREntry> entries;
MIPSAnalyst::AnalysisResults analysis;
enum RegisterUsage {
USAGE_CLOBBERED,
USAGE_INPUT,
USAGE_UNKNOWN,
};
RegisterUsage DetermineInOutUsage(u64 inFlag, u64 outFlag, int pos, int instrs);
RegisterUsage DetermineRegisterUsage(MIPSGPReg reg, int pos, int instrs);
// This tells us if the reg is used within instrs of addr (also includes likely delay slots.)
bool IsRegisterUsed(MIPSGPReg reg, int pos, int instrs);
bool IsRegisterClobbered(MIPSGPReg reg, int pos, int instrs);
// TODO: Change this awful interface
const char *DisasmAt(int pos);
std::vector<std::string> ToStringVector();
IREntry &AddIREntry(u32 address);
void RemoveLast();
};
void ExtractIR(const JitOptions &jo, u32 address, IRBlock *block);
}

View file

@ -21,7 +21,11 @@
namespace MIPSComp {
JitOptions::JitOptions() {
// x86
#if defined(_M_IX86) || defined(_M_X64)
enableVFPUSIMD = true;
#else
enableVFPUSIMD = false;
#endif
// Set by Asm if needed.
reserveR15ForAsm = false;
@ -31,18 +35,23 @@ namespace MIPSComp {
cachePointers = true;
// ARM only
#ifdef ARM
downcountInRegister = true;
#else
downcountInRegister = false;
#endif
useNEONVFPU = false; // true
if (!cpu_info.bNEON)
useNEONVFPU = false;
//ARM64
// ARM64
useASIMDVFPU = false; // true
// Common
useClobberOpt = false;
enableBlocklink = true;
immBranches = false;
continueBranches = false;
continueBranches = false; // true
continueJumps = false;
continueMaxInstructions = 300;
}

View file

@ -24,6 +24,8 @@ struct JitBlock;
namespace MIPSComp {
class IRBlock;
enum CompileDelaySlotFlags
{
// Easy, nothing extra.
@ -62,7 +64,8 @@ namespace MIPSComp {
prefixTFlag(PREFIX_UNKNOWN),
prefixDFlag(PREFIX_UNKNOWN) {}
u32 compilerPC;
IRBlock *irBlock;
u32 irBlockPos;
u32 blockStart;
u32 lastContinuedPC;
u32 initialBlockSize;
@ -73,7 +76,6 @@ namespace MIPSComp {
int afterOp;
int downcountAmount;
int numInstructions;
bool compiling; // TODO: get rid of this in favor of using analysis results to determine end of block
JitBlock *curBlock;
u8 hasSetRounding;
@ -194,6 +196,7 @@ namespace MIPSComp {
bool useASIMDVFPU;
// Common
bool useClobberOpt;
bool enableBlocklink;
bool immBranches;
bool continueBranches;

View file

@ -46,3 +46,4 @@ namespace MIPSComp {
typedef void (NativeJit::*MIPSCompileFunc)(MIPSOpcode opcode);
typedef int (NativeJit::*MIPSReplaceFunc)();
}

View file

@ -91,6 +91,10 @@ enum MIPSGPReg {
MIPS_REG_FPCOND = 34,
MIPS_REG_VFPUCC = 35,
// For branch slot unfolding later. We will move conflicting comparands to these special hidden registers and replace the compare operation, instead of stashing the flags.
MIPS_REG_COMP_LEFT = 36,
MIPS_REG_COMP_RIGHT = 37,
MIPS_REG_INVALID=-1,
};

View file

@ -180,23 +180,24 @@ const u8 *MipsJit::DoJit(u32 em_address, JitBlock *b)
js.PrefixStart();
b->normalEntry = GetCodePtr();
js.numInstructions = 0;
while (js.compiling)
{
MIPSOpcode inst = Memory::Read_Opcode_JIT(js.compilerPC);
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
MIPSCompileOp(inst);
IROptions iropt;
ExtractIR(iropt, js.compilerPC, &irblock);
js.compilerPC += 4;
js.irBlock = &irblock;
while (js.irBlockPos < irblock.entries.size()) {
IREntry &entry = irblock.entries[js.irBlockPos];
if (entry.flags & IR_FLAG_SKIP)
goto skip_entry;
js.downcountAmount += MIPSGetInstructionCycleEstimate(entry.op);
MIPSCompileOp(entry.op);
skip_entry:
js.irBlockPos++;
js.numInstructions++;
// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
if (GetSpaceLeft() < 0x800 || js.numInstructions >= JitBlockCache::MAX_BLOCK_INSTRUCTIONS)
{
FlushAll();
WriteExit(js.compilerPC, js.nextExit++);
js.compiling = false;
}
}
b->codeSize = GetCodePtr() - b->normalEntry;

View file

@ -19,6 +19,7 @@
#include "Core/MIPS/JitCommon/JitState.h"
#include "Core/MIPS/JitCommon/JitBlockCache.h"
#include "Core/MIPS/IR.h"
#include "../MIPSVFPUUtils.h"
#ifndef offsetof
@ -156,7 +157,7 @@ private:
JitBlockCache blocks;
JitOptions jo;
JitState js;
IRBlock ir;
MIPSState *mips_;
int dontLogBlocks;

View file

@ -64,8 +64,6 @@ static std::set<HashMapFunc> hashMap;
static std::string hashmapFileName;
#define MIPSTABLE_IMM_MASK 0xFC000000
// Similar to HashMapFunc but has a char pointer for the name for efficiency.
struct HardHashTableEntry {
uint64_t hash;
@ -537,33 +535,6 @@ namespace MIPSAnalyst {
return (op >> 26) == 0 && (op & 0x3f) == 12;
}
static bool IsSWInstr(MIPSOpcode op) {
return (op & MIPSTABLE_IMM_MASK) == 0xAC000000;
}
static bool IsSBInstr(MIPSOpcode op) {
return (op & MIPSTABLE_IMM_MASK) == 0xA0000000;
}
static bool IsSHInstr(MIPSOpcode op) {
return (op & MIPSTABLE_IMM_MASK) == 0xA4000000;
}
static bool IsSWLInstr(MIPSOpcode op) {
return (op & MIPSTABLE_IMM_MASK) == 0xA8000000;
}
static bool IsSWRInstr(MIPSOpcode op) {
return (op & MIPSTABLE_IMM_MASK) == 0xB8000000;
}
static bool IsSWC1Instr(MIPSOpcode op) {
return (op & MIPSTABLE_IMM_MASK) == 0xE4000000;
}
static bool IsSVSInstr(MIPSOpcode op) {
return (op & MIPSTABLE_IMM_MASK) == 0xE8000000;
}
static bool IsSVQInstr(MIPSOpcode op) {
return (op & MIPSTABLE_IMM_MASK) == 0xF8000000;
}
bool OpWouldChangeMemory(u32 pc, u32 addr, u32 size) {
const auto op = Memory::Read_Instruction(pc, true);
@ -633,7 +604,7 @@ namespace MIPSAnalyst {
}
for (u32 addr = address, endAddr = address + MAX_ANALYZE; addr <= endAddr; addr += 4) {
MIPSOpcode op = Memory::Read_Instruction(addr, true);
MIPSOpcode op = Memory::Read_Opcode_JIT(addr);
MIPSInfo info = MIPSGetInfo(op);
MIPSGPReg rs = MIPS_GET_RS(op);
@ -698,113 +669,6 @@ namespace MIPSAnalyst {
}
}
enum RegisterUsage {
USAGE_CLOBBERED,
USAGE_INPUT,
USAGE_UNKNOWN,
};
static RegisterUsage DetermineInOutUsage(u64 inFlag, u64 outFlag, u32 addr, int instrs) {
const u32 start = addr;
u32 end = addr + instrs * sizeof(u32);
bool canClobber = true;
while (addr < end) {
const MIPSOpcode op = Memory::Read_Instruction(addr, true);
const MIPSInfo info = MIPSGetInfo(op);
// Yes, used.
if (info & inFlag)
return USAGE_INPUT;
// Clobbered, so not used.
if (info & outFlag)
return canClobber ? USAGE_CLOBBERED : USAGE_UNKNOWN;
// Bail early if we hit a branch (could follow each path for continuing?)
if ((info & IS_CONDBRANCH) || (info & IS_JUMP)) {
// Still need to check the delay slot (so end after it.)
// We'll assume likely are taken.
end = addr + 8;
// The reason for the start != addr check is that we compile delay slots before branches.
// That means if we're starting at the branch, it's not safe to allow the delay slot
// to clobber, since it might have already been compiled.
// As for LIKELY, we don't know if it'll run the branch or not.
canClobber = (info & LIKELY) == 0 && start != addr;
}
addr += 4;
}
return USAGE_UNKNOWN;
}
static RegisterUsage DetermineRegisterUsage(MIPSGPReg reg, u32 addr, int instrs) {
switch (reg) {
case MIPS_REG_HI:
return DetermineInOutUsage(IN_HI, OUT_HI, addr, instrs);
case MIPS_REG_LO:
return DetermineInOutUsage(IN_LO, OUT_LO, addr, instrs);
case MIPS_REG_FPCOND:
return DetermineInOutUsage(IN_FPUFLAG, OUT_FPUFLAG, addr, instrs);
case MIPS_REG_VFPUCC:
return DetermineInOutUsage(IN_VFPU_CC, OUT_VFPU_CC, addr, instrs);
default:
break;
}
if (reg > 32) {
return USAGE_UNKNOWN;
}
const u32 start = addr;
u32 end = addr + instrs * sizeof(u32);
bool canClobber = true;
while (addr < end) {
const MIPSOpcode op = Memory::Read_Instruction(addr, true);
const MIPSInfo info = MIPSGetInfo(op);
// Yes, used.
if ((info & IN_RS) && (MIPS_GET_RS(op) == reg))
return USAGE_INPUT;
if ((info & IN_RT) && (MIPS_GET_RT(op) == reg))
return USAGE_INPUT;
// Clobbered, so not used.
bool clobbered = false;
if ((info & OUT_RT) && (MIPS_GET_RT(op) == reg))
clobbered = true;
if ((info & OUT_RD) && (MIPS_GET_RD(op) == reg))
clobbered = true;
if ((info & OUT_RA) && (reg == MIPS_REG_RA))
clobbered = true;
if (clobbered) {
if (!canClobber || (info & IS_CONDMOVE))
return USAGE_UNKNOWN;
return USAGE_CLOBBERED;
}
// Bail early if we hit a branch (could follow each path for continuing?)
if ((info & IS_CONDBRANCH) || (info & IS_JUMP)) {
// Still need to check the delay slot (so end after it.)
// We'll assume likely are taken.
end = addr + 8;
// The reason for the start != addr check is that we compile delay slots before branches.
// That means if we're starting at the branch, it's not safe to allow the delay slot
// to clobber, since it might have already been compiled.
// As for LIKELY, we don't know if it'll run the branch or not.
canClobber = (info & LIKELY) == 0 && start != addr;
}
addr += 4;
}
return USAGE_UNKNOWN;
}
bool IsRegisterUsed(MIPSGPReg reg, u32 addr, int instrs) {
return DetermineRegisterUsage(reg, addr, instrs) == USAGE_INPUT;
}
bool IsRegisterClobbered(MIPSGPReg reg, u32 addr, int instrs) {
return DetermineRegisterUsage(reg, addr, instrs) == USAGE_CLOBBERED;
}
void HashFunctions() {
lock_guard guard(functions_lock);
std::vector<u32> buffer;

View file

@ -77,11 +77,6 @@ namespace MIPSAnalyst
AnalysisResults Analyze(u32 address);
// This tells us if the reg is used within intrs of addr (also includes likely delay slots.)
bool IsRegisterUsed(MIPSGPReg reg, u32 addr, int instrs);
// This tells us if the reg is clobbered within intrs of addr (e.g. it is surely not used.)
bool IsRegisterClobbered(MIPSGPReg reg, u32 addr, int instrs);
struct AnalyzedFunction {
u32 start;
u32 end;
@ -98,7 +93,6 @@ namespace MIPSAnalyst
void Reset();
bool IsRegisterUsed(u32 reg, u32 addr);
// This will not only create a database of "AnalyzedFunction" structs, it also
// will insert all the functions it finds into the symbol map, if insertSymbols is true.
@ -130,6 +124,27 @@ namespace MIPSAnalyst
bool IsDelaySlotNiceFPU(MIPSOpcode branchOp, MIPSOpcode op);
bool IsSyscall(MIPSOpcode op);
#define MIPSTABLE_IMM_MASK 0xFC000000
inline bool IsLBInstr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0x80000000; }
inline bool IsLWInstr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0x8C000000; }
inline bool IsSWInstr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0xAC000000; }
inline bool IsSBInstr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0xA0000000; }
inline bool IsSHInstr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0xA4000000; }
inline bool IsSWLInstr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0xA8000000; }
inline bool IsSWRInstr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0xB8000000; }
inline bool IsSWC1Instr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0xE4000000; }
inline bool IsSVSInstr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0xE8000000; }
inline bool IsSVQInstr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0xF8000000; }
inline bool IsLWC1Instr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0xC4000000; }
inline bool IsLVSInstr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0xC8000000; }
inline bool IsANDIInstr(MIPSOpcode op) { return (op & MIPSTABLE_IMM_MASK) == 0x30000000; }
bool OpWouldChangeMemory(u32 pc, u32 addr, u32 size);
void Shutdown();

View file

@ -51,11 +51,13 @@
#define MIPS_GET_VS(op) ((op>>8) & 0x7F)
#define MIPS_GET_VT(op) ((op>>16) & 0x7F)
#define MIPS_GET_IMM16(op) ((op) & 0xFFFF)
namespace MIPSCodeUtils
{
u32 GetCallTarget(u32 addr);
u32 GetBranchTarget(u32 addr);
// Ignores bltzal/etc. instructions that change RA.
u32 GetBranchTargetNoRA(u32 addr);
u32 GetBranchTargetNoRA(u32 addr, MIPSOpcode op);

View file

@ -28,10 +28,12 @@ struct MIPSInfo {
explicit MIPSInfo(u64 v) : value(v) {
}
u64 operator & (const u64 &arg) const {
u64 operator & (const u64 arg) const {
return value & arg;
}
void operator=(u64 val) {val = value;}
u64 value;
};

View file

@ -25,13 +25,11 @@
#include "stddef.h"
#endif
namespace MIPSComp
{
namespace MIPSComp {
typedef int FakeReg;
class FakeJit : public FakeGen::FakeXCodeBlock
{
class FakeJit : public FakeGen::FakeXCodeBlock {
public:
FakeJit(MIPSState *mips);

View file

@ -61,10 +61,15 @@ namespace MIPSComp
u32 uimm = (u16)(op & 0xFFFF);
MIPSGPReg rt = _RT;
MIPSGPReg rs = _RS;
gpr.Lock(rt, rs);
gpr.MapReg(rt, rt == rs, true);
if (rt != rs)
MOV(32, gpr.R(rt), gpr.R(rs));
if (gpr.R(rs).IsSimpleReg() && !GetIREntry().IsGPRAlive(rs)) {
// NOTICE_LOG(JIT,"immlogic remap at %08x : %08x", js.blockStart, GetCompilerPC());
gpr.FlushRemap(rs, rt, GetIREntry().IsGPRClobbered(rs));
} else {
gpr.Lock(rt, rs);
gpr.MapReg(rt, rt == rs, true);
if (rt != rs)
MOV(32, gpr.R(rt), gpr.R(rs));
}
(this->*arith)(32, gpr.R(rt), Imm32(uimm));
gpr.UnlockAll();
}
@ -101,6 +106,16 @@ namespace MIPSComp
} else if (simm < 0) {
SUB(32, gpr.R(rt), UImmAuto(-simm));
}
} else if (gpr.R(rs).IsSimpleReg() && !GetIREntry().IsGPRAlive(rs)) {
// NOTICE_LOG(JIT, "Reg remap at %08x", js.blockStart);
// Can avoid a MOV by taking over the dest register. It keeps its contents
// but is now reassigned to rt.
gpr.FlushRemap(rs, rt, GetIREntry().IsGPRClobbered(rs));
if (simm > 0)
ADD(32, gpr.R(rt), UImmAuto(simm));
else if (simm < 0) {
SUB(32, gpr.R(rt), UImmAuto(-simm));
}
} else if (gpr.R(rs).IsSimpleReg()) {
LEA(32, gpr.RX(rt), MDisp(gpr.RX(rs), simm));
} else {
@ -145,16 +160,22 @@ namespace MIPSComp
// This is often used before a branch. If rs is not already mapped, let's leave it.
gpr.MapReg(rt, rt == rs, true);
bool needsTemp = !HasLowSubregister(gpr.R(rt)) || rt == rs;
if (needsTemp) {
CMP(32, gpr.R(rs), Imm32(suimm));
SETcc(CC_B, R(TEMPREG));
MOVZX(32, 8, gpr.RX(rt), R(TEMPREG));
if (false && rt == rs) {
SUB(32, gpr.R(rt), Imm32(suimm));
SHR(32, gpr.R(rt), Imm8(31));
} else {
XOR(32, gpr.R(rt), gpr.R(rt));
CMP(32, gpr.R(rs), Imm32(suimm));
SETcc(CC_B, gpr.R(rt));
bool needsTemp = !HasLowSubregister(gpr.R(rt)) || rt == rs;
if (needsTemp) {
CMP(32, gpr.R(rs), Imm32(suimm));
SETcc(CC_B, R(TEMPREG));
MOVZX(32, 8, gpr.RX(rt), R(TEMPREG));
} else {
XOR(32, gpr.R(rt), gpr.R(rt));
CMP(32, gpr.R(rs), Imm32(suimm));
SETcc(CC_B, gpr.R(rt));
}
}
// TODO: If we can figure out that it's ok to replace the source register
gpr.UnlockAll();
}
break;
@ -358,6 +379,13 @@ namespace MIPSComp
if (invertResult) {
NOT(32, gpr.R(rd));
}
} else if (rd != rt && rd != rs && gpr.R(rs).IsSimpleReg() && !GetIREntry().IsGPRAlive(rs)) {
// NOTICE_LOG(JIT, "TriArith liveness at %08x", js.blockStart);
gpr.FlushRemap(rs, rd, GetIREntry().IsGPRClobbered(rs));
(this->*arith)(32, gpr.R(rd), gpr.R(rt));
if (invertResult) {
NOT(32, gpr.R(rd));
}
} else {
// Use TEMPREG as a temporary if we'd overwrite it.
if (rd == rt)
@ -485,7 +513,7 @@ namespace MIPSComp
cc = SwapCCFlag(cc);
} else if (!gpr.R(lhs).CanDoOpWith(gpr.R(rhs))) {
// Let's try to pick which makes more sense to load.
if (MIPSAnalyst::IsRegisterUsed(rhs, GetCompilerPC() + 4, 3)) {
if (irblock.IsRegisterUsed(rhs, js.irBlockPos + 1, 3)) {
std::swap(lhs, rhs);
cc = SwapCCFlag(cc);
}
@ -525,7 +553,7 @@ namespace MIPSComp
cc = SwapCCFlag(cc);
} else if (!gpr.R(lhs).CanDoOpWith(gpr.R(rhs))) {
// Let's try to pick which makes more sense to load.
if (MIPSAnalyst::IsRegisterUsed(rhs, GetCompilerPC() + 4, 3)) {
if (irblock.IsRegisterUsed(rhs, js.irBlockPos + 1, 3)) {
std::swap(lhs, rhs);
cc = SwapCCFlag(cc);
}

View file

@ -195,6 +195,7 @@ bool Jit::PredictTakeBranch(u32 targetAddr, bool likely) {
void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink) {
// We may want to try to continue along this branch a little while, to reduce reg flushing.
#if 0
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
if (CanContinueBranch(predictTakeBranch ? targetAddr : notTakenAddr))
{
@ -244,9 +245,7 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
GetCompilerPC() = targetAddr - 4;
}
else
{
@ -262,12 +261,11 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
CONDITIONAL_LOG_EXIT(notTakenAddr);
// Account for the delay slot.
js.compilerPC += 4;
// In case the delay slot was a break or something.
js.compiling = true;
GetCompilerPC() += 4;
}
}
else
#endif
{
Gen::FixupBranch ptr;
if (!likely)
@ -295,7 +293,6 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(notTakenAddr);
WriteExit(notTakenAddr, js.nextExit++);
js.compiling = false;
}
}
@ -311,7 +308,6 @@ void Jit::CompBranchExit(bool taken, u32 targetAddr, u32 notTakenAddr, bool dela
const u32 destAddr = taken ? targetAddr : notTakenAddr;
CONDITIONAL_LOG_EXIT(destAddr);
WriteExit(destAddr, js.nextExit++);
js.compiling = false;
}
void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
@ -344,24 +340,13 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
immBranchTaken = !immBranchNotTaken;
}
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions)
{
if (!immBranchTaken)
{
if (jo.immBranches && immBranch) {
if (!immBranchTaken) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
irblock.entries[js.irBlockPos + 1].flags |= IR_FLAG_SKIP;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
@ -426,7 +411,7 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
{
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
irblock.entries[js.irBlockPos + 1].flags |= IR_FLAG_SKIP;
return;
}
@ -435,12 +420,7 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
// AddContinuedBlock(targetAddr);
}
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
@ -461,22 +441,21 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
}
}
void Jit::Comp_RelBranch(MIPSOpcode op)
{
switch (op>>26)
{
case 4: BranchRSRTComp(op, CC_NZ, false); break;//beq
case 5: BranchRSRTComp(op, CC_Z, false); break;//bne
case 4: BranchRSRTComp(op, CC_NZ, false); break; //beq
case 5: BranchRSRTComp(op, CC_Z, false); break; //bne
case 6: BranchRSZeroComp(op, CC_G, false, false); break;//blez
case 7: BranchRSZeroComp(op, CC_LE, false, false); break;//bgtz
case 6: BranchRSZeroComp(op, CC_G, false, false); break; //blez
case 7: BranchRSZeroComp(op, CC_LE, false, false); break; //bgtz
case 20: BranchRSRTComp(op, CC_NZ, true); break;//beql
case 21: BranchRSRTComp(op, CC_Z, true); break;//bnel
case 20: BranchRSRTComp(op, CC_NZ, true); break; //beql
case 21: BranchRSRTComp(op, CC_Z, true); break; //bnel
case 22: BranchRSZeroComp(op, CC_G, false, true); break;//blezl
case 23: BranchRSZeroComp(op, CC_LE, false, true); break;//bgtzl
case 22: BranchRSZeroComp(op, CC_G, false, true); break; //blezl
case 23: BranchRSZeroComp(op, CC_LE, false, true); break; //bgtzl
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
@ -603,8 +582,6 @@ void Jit::Comp_Jump(MIPSOpcode op) {
if (!Memory::IsValidAddress(targetAddr)) {
if (js.nextExit == 0) {
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x PC %08x LR %08x", targetAddr, GetCompilerPC(), currentMIPS->r[MIPS_REG_RA]);
} else {
js.compiling = false;
}
// TODO: Mark this block dirty or something? May be indication it will be changed by imports.
return;
@ -613,15 +590,6 @@ void Jit::Comp_Jump(MIPSOpcode op) {
switch (op >> 26) {
case 2: //j
CompileDelaySlot(DELAYSLOT_NICE);
if (CanContinueJump(targetAddr))
{
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
FlushAll();
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
@ -632,21 +600,9 @@ void Jit::Comp_Jump(MIPSOpcode op) {
if (ReplaceJalTo(targetAddr))
return;
// Check for small function inlining (future)
// Save return address - might be overwritten by delay slot.
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
CompileDelaySlot(DELAYSLOT_NICE);
if (CanContinueJump(targetAddr))
{
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
FlushAll();
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
@ -656,11 +612,11 @@ void Jit::Comp_Jump(MIPSOpcode op) {
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
static u32 savedPC;
// jr ra << helpful for grep
void Jit::Comp_JumpReg(MIPSOpcode op)
{
CONDITIONAL_LOG;
@ -678,6 +634,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
delaySlotIsNice = false;
CONDITIONAL_NICE_DELAYSLOT;
X64Reg destReg = EAX;
if (IsSyscall(delaySlotOp))
{
// If this is a syscall, write the pc (for thread switching and other good reasons.)
@ -688,7 +645,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
CompileDelaySlot(DELAYSLOT_FLUSH);
// Syscalls write the exit code for us.
_dbg_assert_msg_(JIT, !js.compiling, "Expected syscall to write an exit code.");
// _dbg_assert_msg_(JIT, !js.compiling, "Expected syscall to write an exit code.");
return;
}
else if (delaySlotIsNice)
@ -709,17 +666,11 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
gpr.DiscardRegContentsIfCached(MIPS_REG_T9);
}
if (gpr.IsImm(rs) && CanContinueJump(gpr.GetImm(rs)))
{
AddContinuedBlock(gpr.GetImm(rs));
// Account for the increment in the loop.
js.compilerPC = gpr.GetImm(rs) - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
if (gpr.R(rs).IsSimpleReg()) {
destReg = gpr.R(rs).GetSimpleReg();
} else {
MOV(32, R(EAX), gpr.R(rs));
}
MOV(32, R(EAX), gpr.R(rs));
FlushAll();
}
else
@ -731,6 +682,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
gpr.SetImm(rd, GetCompilerPC() + 8);
CompileDelaySlot(DELAYSLOT_NICE);
MOV(32, R(EAX), M(&savedPC));
destReg = EAX;
FlushAll();
}
@ -746,8 +698,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
}
CONDITIONAL_LOG_EXIT_EAX();
WriteExitDestInEAX();
js.compiling = false;
WriteExitDestInReg(destReg);
}
void Jit::Comp_Syscall(MIPSOpcode op)
@ -784,14 +735,12 @@ void Jit::Comp_Syscall(MIPSOpcode op)
ApplyRoundingMode();
WriteSyscallExit();
js.compiling = false;
}
void Jit::Comp_Break(MIPSOpcode op)
{
Comp_Generic(op);
WriteSyscallExit();
js.compiling = false;
}
} // namespace Mipscomp

View file

@ -60,12 +60,17 @@ void Jit::CompFPTriArith(MIPSOpcode op, void (XEmitter::*arith)(X64Reg reg, OpAr
fpr.MapReg(fd, true, true);
(this->*arith)(fpr.RX(fd), fpr.R(fs));
} else if (ft != fd) {
// fs can't be fd (handled above.)
fpr.MapReg(fd, false, true);
MOVSS(fpr.RX(fd), fpr.R(fs));
if (fpr.R(fs).IsSimpleReg() && !GetIREntry().IsFPRAlive(fs) && fs != ft) {
// NOTICE_LOG(JIT, "tri mov eliminated at %08x", js.blockStart);
fpr.FlushRemap(fs, fd, GetIREntry().IsFPRClobbered(fs));
} else {
// fs can't be fd (handled above.)
fpr.MapReg(fd, false, true);
MOVSS(fpr.RX(fd), fpr.R(fs));
}
(this->*arith)(fpr.RX(fd), fpr.R(ft));
} else {
// fd must be ft.
// fd must be ft and order matters.
fpr.MapReg(fd, true, true);
MOVSS(XMM0, fpr.R(fs));
(this->*arith)(XMM0, fpr.R(ft));
@ -265,7 +270,7 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
};
switch (op & 0x3f) {
case 5: //F(fd) = fabsf(F(fs)); break; //abs
case 5: //F(fd) = fabsf(F(fs)); break; //abs.s
fpr.SpillLock(fd, fs);
fpr.MapReg(fd, fd == fs, true);
if (fd != fs) {
@ -274,11 +279,15 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
ANDPS(fpr.RX(fd), M(ssNoSignMask));
break;
case 6: //F(fd) = F(fs); break; //mov
case 6: //F(fd) = F(fs); break; //mov.s
if (fd != fs) {
fpr.SpillLock(fd, fs);
fpr.MapReg(fd, fd == fs, true);
MOVSS(fpr.RX(fd), fpr.R(fs));
if (fpr.R(fs).IsSimpleReg() && !GetIREntry().IsFPRAlive(fs)) {
fpr.FlushRemap(fs, fd, GetIREntry().IsFPRClobbered(fs));
} else {
fpr.SpillLock(fd, fs);
fpr.MapReg(fd, fd == fs, true);
MOVSS(fpr.RX(fd), fpr.R(fs));
}
}
break;
@ -286,7 +295,12 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
fpr.SpillLock(fd, fs);
fpr.MapReg(fd, fd == fs, true);
if (fd != fs) {
MOVSS(fpr.RX(fd), fpr.R(fs));
if (fpr.R(fs).IsSimpleReg() && !GetIREntry().IsFPRAlive(fs)) {
// NOTICE_LOG(JIT,"fneg mov eliminated at %08x", js.blockStart);
fpr.FlushRemap(fs, fd, GetIREntry().IsFPRClobbered(fs));
} else {
MOVSS(fpr.RX(fd), fpr.R(fs));
}
}
XORPS(fpr.RX(fd), M(ssSignBits2));
break;

View file

@ -308,7 +308,18 @@ namespace MIPSComp {
break;
case 32: //R(rt) = (u32)(s32)(s8) ReadMem8 (addr); break; //lb
CompITypeMemRead(op, 8, &XEmitter::MOVSX, safeMemFuncs.readU8);
{
IREntry &next = irblock.entries[js.irBlockPos + 1];
if (MIPSAnalyst::IsANDIInstr(next.op) && _RT == MIPS_GET_RT(next.op) && MIPS_GET_IMM16(next.op) == 0xFF) {
// Seen, but not very often.
// TODO: Move to an IR postprocess cleanly as an exercise :)
NOTICE_LOG(JIT, "Found lb+andi! Turning lb into lbu %08x", irblock.address);
CompITypeMemRead(op, 8, &XEmitter::MOVZX, safeMemFuncs.readU8);
next.flags |= IR_FLAG_SKIP;
} else {
CompITypeMemRead(op, 8, &XEmitter::MOVSX, safeMemFuncs.readU8);
}
}
break;
case 33: //R(rt) = (u32)(s32)(s16)ReadMem16(addr); break; //lh

View file

@ -2455,12 +2455,17 @@ void Jit::Comp_VMatrixInit(MIPSOpcode op) {
MatrixSize sz = GetMtxSize(op);
int n = GetMatrixSide(sz);
int vd = _VD;
if (n == 4) {
// Just remove the transposed-ness. All modes are transpose-invariant.
vd &= ~0x20;
}
// Not really about trying here, it will work if enabled.
if (jo.enableVFPUSIMD) {
VectorSize vsz = GetVectorSize(sz);
u8 vecs[4];
GetMatrixColumns(_VD, sz, vecs);
GetMatrixColumns(vd, sz, vecs);
for (int i = 0; i < n; i++) {
u8 vec[4];
GetVectorRegs(vec, vsz, vecs[i]);
@ -2482,7 +2487,7 @@ void Jit::Comp_VMatrixInit(MIPSOpcode op) {
}
u8 dregs[16];
GetMatrixRegs(dregs, sz, _VD);
GetMatrixRegs(dregs, sz, vd);
// Flush SIMD.
fpr.SimpleRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
@ -3401,7 +3406,7 @@ void Jit::Comp_VRot(MIPSOpcode op) {
// Pair of vrot with the same angle argument. Let's join them (can share sin/cos results).
vd2 = MIPS_GET_VD(nextOp);
imm2 = (nextOp >> 16) & 0x1f;
// NOTICE_LOG(JIT, "Joint VFPU at %08x", js.blockStart);
// NOTICE_LOG(JIT, "Joint vrot at %08x", js.blockStart);
}
u8 sreg;
@ -3436,7 +3441,7 @@ void Jit::Comp_VRot(MIPSOpcode op) {
// If the negsin setting differs between the two joint invocations, we need to flip the second one.
bool negSin2 = (imm2 & 0x10) ? true : false;
CompVrotShuffle(dregs2, imm2, n, negSin1 != negSin2);
js.compilerPC += 4;
// TODO later in merge: irblock.entries[js.irBlockPos + 1].skipped = true
}
fpr.ReleaseSpillLocks();
}

View file

@ -32,6 +32,7 @@
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/IR.h"
#include "Core/HLE/ReplaceTables.h"
#include "RegCache.h"
@ -307,15 +308,18 @@ void Jit::InvalidateCache()
void Jit::CompileDelaySlot(int flags, RegCacheState *state)
{
IREntry &entry = irblock.entries[js.irBlockPos + 1];
// Need to offset the downcount which was already incremented for the branch + delay slot.
CheckJitBreakpoint(GetCompilerPC() + 4, -2);
CheckJitBreakpoint(entry.origAddress, -2);
if (flags & DELAYSLOT_SAFE)
SAVE_FLAGS; // preserve flag around the delay slot!
js.inDelaySlot = true;
MIPSOpcode op = GetOffsetInstruction(1);
MIPSOpcode op = entry.op;
MIPSCompileOp(op);
entry.flags |= IR_FLAG_SKIP;
js.inDelaySlot = false;
if (flags & DELAYSLOT_FLUSH)
@ -341,7 +345,7 @@ void Jit::EatInstruction(MIPSOpcode op)
CheckJitBreakpoint(GetCompilerPC() + 4, 0);
js.numInstructions++;
js.compilerPC += 4;
js.irBlockPos++;
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
}
@ -389,23 +393,22 @@ void Jit::RunLoopUntil(u64 globalticks)
}
u32 Jit::GetCompilerPC() {
return js.compilerPC;
return irblock.entries[js.irBlockPos].origAddress;
}
MIPSOpcode Jit::GetOffsetInstruction(int offset) {
return Memory::Read_Instruction(GetCompilerPC() + 4 * offset);
return irblock.entries[js.irBlockPos + offset].op;
}
const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
{
js.cancel = false;
js.blockStart = js.compilerPC = mips_->pc;
js.blockStart = mips_->pc;
js.lastContinuedPC = 0;
js.initialBlockSize = 0;
js.nextExit = 0;
js.downcountAmount = 0;
js.curBlock = b;
js.compiling = true;
js.inDelaySlot = false;
js.afterOp = JitState::AFTER_NONE;
js.PrefixStart();
@ -420,21 +423,28 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
b->normalEntry = GetCodePtr();
MIPSAnalyst::AnalysisResults analysis = MIPSAnalyst::Analyze(em_address);
ExtractIR(jo, em_address, &irblock);
gpr.Start(mips_, &js, &jo, analysis);
fpr.Start(mips_, &js, &jo, analysis);
gpr.Start(mips_, &js, &jo, irblock.analysis);
fpr.Start(mips_, &js, &jo, irblock.analysis);
js.numInstructions = 0;
while (js.compiling) {
js.irBlockPos = 0;
js.irBlock = &irblock;
// - 1 to avoid the final delay slot.
while (js.irBlockPos < irblock.entries.size()) {
IREntry &entry = irblock.entries[js.irBlockPos];
if (entry.flags & IR_FLAG_SKIP)
goto skip_entry;
CheckJitBreakpoint(entry.origAddress, 0);
if (entry.pseudoInstr != PSEUDO_NONE) {
CompPseudoOp(entry.pseudoInstr, entry.op);
} else {
MIPSCompileOp(entry.op);
}
js.downcountAmount += MIPSGetInstructionCycleEstimate(entry.op);
// Jit breakpoints are quite fast, so let's do them in release too.
CheckJitBreakpoint(GetCompilerPC(), 0);
MIPSOpcode inst = Memory::Read_Opcode_JIT(GetCompilerPC());
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
MIPSCompileOp(inst);
if (js.afterOp & JitState::AFTER_CORE_STATE) {
// TODO: Save/restore?
FlushAll();
@ -445,9 +455,9 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
CMP(32, M(&coreState), Imm32(CORE_NEXTFRAME));
FixupBranch skipCheck = J_CC(CC_LE);
if (js.afterOp & JitState::AFTER_REWIND_PC_BAD_STATE)
MOV(32, M(&mips_->pc), Imm32(GetCompilerPC()));
MOV(32, M(&mips_->pc), Imm32(entry.origAddress));
else
MOV(32, M(&mips_->pc), Imm32(GetCompilerPC() + 4));
MOV(32, M(&mips_->pc), Imm32(irblock.entries[js.irBlockPos + 1].origAddress));
WriteSyscallExit();
SetJumpTarget(skipCheck);
@ -456,30 +466,23 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
if (js.afterOp & JitState::AFTER_MEMCHECK_CLEANUP) {
js.afterOp &= ~JitState::AFTER_MEMCHECK_CLEANUP;
}
js.compilerPC += 4;
js.numInstructions++;
// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
if (GetSpaceLeft() < 0x800 || js.numInstructions >= JitBlockCache::MAX_BLOCK_INSTRUCTIONS)
{
FlushAll();
WriteExit(GetCompilerPC(), js.nextExit++);
js.compiling = false;
}
skip_entry:
js.irBlockPos++;
}
b->codeSize = (u32)(GetCodePtr() - b->normalEntry);
NOP();
AlignCode4();
if (js.lastContinuedPC == 0)
b->originalSize = js.numInstructions;
else
b->originalSize = js.numInstructions;
/*
{
// We continued at least once. Add the last proxy and set the originalSize correctly.
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (GetCompilerPC() - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
b->originalSize = js.initialBlockSize;
}
}*/
return b->normalEntry;
}
@ -560,11 +563,11 @@ bool Jit::ReplaceJalTo(u32 dest) {
ApplyRoundingMode();
}
js.compilerPC += 4;
js.irBlockPos++;
// No writing exits, keep going!
// Add a trigger so that if the inlined code changes, we invalidate this block.
blocks.ProxyBlock(js.blockStart, dest, funcSize / sizeof(u32), GetCodePtr());
blocks.ProxyBlock(js.blockStart, dest, sizeof(u32), GetCodePtr());
return true;
}
@ -595,20 +598,19 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op)
}
if (disabled) {
// We probably won't get here anymore.
MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true));
} else if (entry->jitReplaceFunc) {
MIPSReplaceFunc repl = entry->jitReplaceFunc;
int cycles = (this->*repl)();
if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {
// Compile the original instruction at this address. We ignore cycles for hooks.
MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true));
} else {
FlushAll();
MOV(32, R(ECX), M(&mips_->r[MIPS_REG_RA]));
js.downcountAmount += cycles;
WriteExitDestInReg(ECX);
js.compiling = false;
}
} else if (entry->replaceFunc) {
FlushAll();
@ -630,7 +632,6 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op)
// Need to set flags again, ApplyRoundingMode destroyed them (and EAX.)
SUB(32, M(&mips_->downcount), Imm8(0));
WriteExitDestInReg(ECX);
js.compiling = false;
}
} else {
ERROR_LOG(HLE, "Replacement function %s has neither jit nor regular impl", entry->name);
@ -838,4 +839,19 @@ void Jit::CallProtectedFunction(const void *func, const OpArg &arg1, const u32 a
void Jit::Comp_DoNothing(MIPSOpcode op) { }
void Jit::Comp_IR_SaveRA(MIPSOpcode op) {
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
}
void Jit::CompPseudoOp(int pseudo, MIPSOpcode op) {
switch (pseudo) {
case PSEUDO_SAVE_RA:
Comp_IR_SaveRA(op);
break;
default:
ERROR_LOG(JIT, "Invalid pseudo op %i", pseudo);
break;
}
}
} // namespace

View file

@ -32,8 +32,11 @@
#include "Core/MIPS/x86/JitSafeMem.h"
#include "Core/MIPS/x86/RegCache.h"
#include "Core/MIPS/x86/RegCacheFPU.h"
#include "Core/MIPS/IR.h"
class PointerWrap;
struct ReplacementTableEntry;
namespace MIPSComp
{
@ -47,8 +50,7 @@ struct RegCacheState {
FPURegCacheState fpr;
};
class Jit : public Gen::XCodeBlock
{
class Jit : public Gen::XCodeBlock {
public:
Jit(MIPSState *mips);
virtual ~Jit();
@ -56,6 +58,8 @@ public:
void DoState(PointerWrap &p);
static void DoDummyState(PointerWrap &p);
const JitOptions &GetJitOptions() { return jo; }
// Compiled ops should ignore delay slots
// the compiler will take care of them by itself
// OR NOT
@ -66,6 +70,9 @@ public:
void Compile(u32 em_address); // Compiles a block at current MIPS PC
const u8 *DoJit(u32 em_address, JitBlock *b);
IRBlock *GetIRBlock() { return &irblock; }
IREntry &GetIREntry() { return irblock.entries[js.irBlockPos]; }
bool DescribeCodePtr(const u8 *ptr, std::string &name);
void Comp_RunBlock(MIPSOpcode op);
@ -139,6 +146,10 @@ public:
void Comp_DoNothing(MIPSOpcode op);
// Pseudo instructions emitted by the IR extractor
void Comp_IR_SaveRA(MIPSOpcode op);
void CompPseudoOp(int pseudo, MIPSOpcode op);
int Replace_fabsf();
int Replace_dl_write_matrix();
@ -192,7 +203,6 @@ private:
void WriteExit(u32 destination, int exit_num);
void WriteExitDestInReg(Gen::X64Reg reg);
void WriteExitDestInEAX() { WriteExitDestInReg(Gen::EAX); }
// void WriteRfiExitDestInEAX();
void WriteSyscallExit();
@ -258,6 +268,7 @@ private:
}
bool PredictTakeBranch(u32 targetAddr, bool likely);
/*
bool CanContinueBranch(u32 targetAddr) {
if (!jo.continueBranches || js.numInstructions >= jo.continueMaxInstructions) {
return false;
@ -280,18 +291,14 @@ private:
return false;
}
return true;
}
bool CanContinueImmBranch(u32 targetAddr) {
if (!jo.immBranches || js.numInstructions >= jo.continueMaxInstructions) {
return false;
}
return true;
}
}*/
JitBlockCache blocks;
JitOptions jo;
JitState js;
IRBlock irblock;
GPRRegCache gpr;
FPURegCache fpr;

View file

@ -61,7 +61,7 @@ JitSafeMem::JitSafeMem(Jit *jit, MIPSGPReg raddr, s32 offset, u32 alignMask)
// If raddr_ is going to get loaded soon, load it now for more optimal code.
// We assume that it was already locked.
const int LOOKAHEAD_OPS = 3;
if (!jit_->gpr.R(raddr_).IsImm() && MIPSAnalyst::IsRegisterUsed(raddr_, jit_->GetCompilerPC() + 4, LOOKAHEAD_OPS))
if (!jit_->gpr.R(raddr_).IsImm() && jit_->GetIRBlock()->IsRegisterUsed(raddr_, jit_->js.irBlockPos + 1, LOOKAHEAD_OPS))
jit_->gpr.MapReg(raddr_, true, false);
}

View file

@ -20,6 +20,7 @@
#include "Common/x64Emitter.h"
#include "Core/Reporting.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/IR.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/x86/Jit.h"
@ -102,7 +103,6 @@ void GPRRegCache::Start(MIPSState *mips, MIPSComp::JitState *js, MIPSComp::JitOp
jo_ = jo;
}
// these are MIPS reg indices
void GPRRegCache::Lock(MIPSGPReg p1, MIPSGPReg p2, MIPSGPReg p3, MIPSGPReg p4) {
regs[p1].locked = true;
@ -149,13 +149,13 @@ X64Reg GPRRegCache::FindBestToSpill(bool unusedOnly, bool *clobbered) {
continue;
// Awesome, a clobbered reg. Let's use it.
if (MIPSAnalyst::IsRegisterClobbered(xregs[reg].mipsReg, js_->compilerPC, UNUSED_LOOKAHEAD_OPS)) {
if (js_->irBlock->IsRegisterClobbered(xregs[reg].mipsReg, js_->irBlockPos, UNUSED_LOOKAHEAD_OPS)) {
*clobbered = true;
return reg;
}
// Not awesome. A used reg. Let's try to avoid spilling.
if (unusedOnly && MIPSAnalyst::IsRegisterUsed(xregs[reg].mipsReg, js_->compilerPC, UNUSED_LOOKAHEAD_OPS)) {
if (unusedOnly && js_->irBlock->IsRegisterUsed(xregs[reg].mipsReg, js_->irBlockPos, UNUSED_LOOKAHEAD_OPS)) {
continue;
}
@ -178,7 +178,7 @@ X64Reg GPRRegCache::GetFreeXReg()
}
}
//Okay, not found :( Force grab one
// Okay, not found :( Force grab one
bool clobbered;
X64Reg bestToSpill = FindBestToSpill(true, &clobbered);
if (bestToSpill == INVALID_REG) {
@ -208,7 +208,7 @@ void GPRRegCache::FlushR(X64Reg reg)
StoreFromRegister(xregs[reg].mipsReg);
}
void GPRRegCache::FlushRemap(MIPSGPReg oldreg, MIPSGPReg newreg) {
void GPRRegCache::FlushRemap(MIPSGPReg oldreg, MIPSGPReg newreg, bool clobbered) {
OpArg oldLocation = regs[oldreg].location;
if (!oldLocation.IsSimpleReg()) {
PanicAlert("FlushRemap: Must already be in an x86 register");
@ -221,7 +221,11 @@ void GPRRegCache::FlushRemap(MIPSGPReg oldreg, MIPSGPReg newreg) {
return;
}
StoreFromRegister(oldreg);
if (clobbered && jo_->useClobberOpt) {
DiscardRegContentsIfCached(oldreg);
} else {
StoreFromRegister(oldreg);
}
// Now, if newreg already was mapped somewhere, get rid of that.
DiscardRegContentsIfCached(newreg);

View file

@ -20,6 +20,7 @@
#include "Common/x64Emitter.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/IR.h"
namespace X64JitConstants {
#ifdef _M_X64
@ -88,7 +89,8 @@ public:
void FlushBeforeCall();
// Flushes one register and reuses the register for another one. Dirtyness is implied.
void FlushRemap(MIPSGPReg oldreg, MIPSGPReg newreg);
// If clobbered, just discards the first register instead of flushing it.
void FlushRemap(MIPSGPReg oldreg, MIPSGPReg newreg, bool clobbered);
int SanityCheck() const;
void KillImmediate(MIPSGPReg preg, bool doLoad, bool makeDirty);

View file

@ -108,7 +108,7 @@ void FPURegCache::ReduceSpillLockV(const u8 *vec, VectorSize sz) {
}
}
void FPURegCache::FlushRemap(int oldreg, int newreg) {
void FPURegCache::FlushRemap(int oldreg, int newreg, bool clobbered) {
OpArg oldLocation = regs[oldreg].location;
if (!oldLocation.IsSimpleReg()) {
PanicAlert("FlushRemap: Must already be in an x86 SSE register");
@ -124,7 +124,11 @@ void FPURegCache::FlushRemap(int oldreg, int newreg) {
return;
}
StoreFromRegister(oldreg);
if (clobbered && jo_->useClobberOpt) {
DiscardR(oldreg);
} else {
StoreFromRegister(oldreg);
}
// Now, if newreg already was mapped somewhere, get rid of that.
DiscardR(newreg);
@ -651,11 +655,11 @@ static int MMShuffleSwapTo0(int lane) {
void FPURegCache::StoreFromRegister(int i) {
_assert_msg_(JIT, !regs[i].location.IsImm(), "WTF - FPURegCache::StoreFromRegister - it's an imm");
_assert_msg_(JIT, i >= 0 && i < NUM_MIPS_FPRS, "WTF - FPURegCache::StoreFromRegister - invalid mipsreg %i PC=%08x", i, js_->compilerPC);
_assert_msg_(JIT, i >= 0 && i < NUM_MIPS_FPRS, "WTF - FPURegCache::StoreFromRegister - invalid mipsreg %i PC=%08x", i, mips->pc);
if (regs[i].away) {
X64Reg xr = regs[i].location.GetSimpleReg();
_assert_msg_(JIT, xr >= 0 && xr < NUM_X_FPREGS, "WTF - FPURegCache::StoreFromRegister - invalid reg: x %i (mr: %i). PC=%08x", (int)xr, i, js_->compilerPC);
_assert_msg_(JIT, xr >= 0 && xr < NUM_X_FPREGS, "WTF - FPURegCache::StoreFromRegister - invalid reg: x %i (mr: %i). PC=%08x", (int)xr, i, mips->pc);
if (regs[i].lane != 0) {
const int *mri = xregs[xr].mipsRegs;
int seq = 1;

View file

@ -123,6 +123,8 @@ public:
void FlushRemap(int oldreg, int newreg);
void Flush();
void FlushRemap(int oldreg, int newreg, bool clobbered);
int SanityCheck() const;
const Gen::OpArg &R(int freg) const {return regs[freg].location;}

View file

@ -32,6 +32,7 @@
#include "Core/Config.h"
#include "Core/System.h"
#include "Core/CoreParameter.h"
#include "Core/MIPS/IR.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/JitCommon/NativeJit.h"
#include "Core/MIPS/JitCommon/JitCommon.h"
@ -547,32 +548,46 @@ void JitCompareScreen::CreateViews() {
root_ = new LinearLayout(ORIENT_HORIZONTAL);
((LinearLayout *)root_)->SetSpacing(0.0f);
ScrollView *leftColumnScroll = root_->Add(new ScrollView(ORIENT_VERTICAL, new LinearLayoutParams(1.0f)));
LinearLayout *leftColumn = leftColumnScroll->Add(new LinearLayout(ORIENT_VERTICAL));
ScrollView *midColumnScroll = root_->Add(new ScrollView(ORIENT_VERTICAL, new LinearLayoutParams(2.0f)));
LinearLayout *midColumn = midColumnScroll->Add(new LinearLayout(ORIENT_VERTICAL));
midColumnScroll_ = root_->Add(new ScrollView(ORIENT_VERTICAL, new LinearLayoutParams(2.0f)));
LinearLayout *midColumn = midColumnScroll_->Add(new LinearLayout(ORIENT_VERTICAL));
leftDisasm_ = midColumn->Add(new LinearLayout(ORIENT_VERTICAL));
leftDisasm_->SetSpacing(0.0f);
ScrollView *rightColumnScroll = root_->Add(new ScrollView(ORIENT_VERTICAL, new LinearLayoutParams(2.0f)));
LinearLayout *rightColumn = rightColumnScroll->Add(new LinearLayout(ORIENT_VERTICAL));
rightColumnScroll_ = root_->Add(new ScrollView(ORIENT_VERTICAL, new LinearLayoutParams(2.0f)));
LinearLayout *rightColumn = rightColumnScroll_->Add(new LinearLayout(ORIENT_VERTICAL));
rightDisasm_ = rightColumn->Add(new LinearLayout(ORIENT_VERTICAL));
rightDisasm_->SetSpacing(0.0f);
irColumnScroll_ = root_->Add(new ScrollView(ORIENT_VERTICAL, new LinearLayoutParams(4.0f)));
LinearLayout *irColumn = irColumnScroll_->Add(new LinearLayout(ORIENT_VERTICAL));
irDisasm_ = irColumn->Add(new LinearLayout(ORIENT_VERTICAL));
irColumnScroll_->SetVisibility(V_GONE);
irDisasm_->SetSpacing(0.0f);
leftColumn->Add(new Choice(de->T("Current")))->OnClick.Handle(this, &JitCompareScreen::OnCurrentBlock);
#ifdef MOBILE_DEVICE
leftColumn->Add(new Choice(de->T("By Address")))->OnClick.Handle(this, &JitCompareScreen::OnSelectBlock);
leftColumn->Add(new Choice(de->T("Prev")))->OnClick.Handle(this, &JitCompareScreen::OnPrevBlock);
leftColumn->Add(new Choice(de->T("Next")))->OnClick.Handle(this, &JitCompareScreen::OnNextBlock);
leftColumn->Add(new Choice(de->T("Random")))->OnClick.Handle(this, &JitCompareScreen::OnRandomBlock);
leftColumn->Add(new Choice(de->T("FPU")))->OnClick.Handle(this, &JitCompareScreen::OnRandomFPUBlock);
leftColumn->Add(new Choice(de->T("VFPU")))->OnClick.Handle(this, &JitCompareScreen::OnRandomVFPUBlock);
leftColumn->Add(new Choice(de->T("Stats")))->OnClick.Handle(this, &JitCompareScreen::OnShowStats);
leftColumn->Add(new Choice(d->T("Back")))->OnClick.Handle<UIScreen>(this, &UIScreen::OnBack);
#endif
LinearLayout *lin = leftColumn->Add(new LinearLayout(ORIENT_HORIZONTAL, new LayoutParams(FILL_PARENT, WRAP_CONTENT)));
lin->Add(new Choice("<<", new LinearLayoutParams(1.0)))->OnClick.Handle(this, &JitCompareScreen::OnPrevBlock);
lin->Add(new Choice(">>", new LinearLayoutParams(1.0)))->OnClick.Handle(this, &JitCompareScreen::OnNextBlock);
lin = leftColumn->Add(new LinearLayout(ORIENT_HORIZONTAL, new LayoutParams(FILL_PARENT, WRAP_CONTENT)));
lin->Add(new Choice(de->T("Rn")))->OnClick.Handle(this, &JitCompareScreen::OnRandomBlock);
lin->Add(new Choice(de->T("FP")))->OnClick.Handle(this, &JitCompareScreen::OnRandomFPUBlock);
lin->Add(new Choice(de->T("VFP")))->OnClick.Handle(this, &JitCompareScreen::OnRandomVFPUBlock);
leftColumn->Add(new Choice(de->T("Show IR")))->OnClick.Handle(this, &JitCompareScreen::OnShowIR);
blockName_ = leftColumn->Add(new TextView(de->T("No block")));
blockAddr_ = leftColumn->Add(new TextEdit("", "", new LayoutParams(FILL_PARENT, WRAP_CONTENT)));
blockAddr_->OnTextChange.Handle(this, &JitCompareScreen::OnAddressChange);
blockStats_ = leftColumn->Add(new TextView(""));
leftColumn->Add(new Choice(de->T("Stats")))->OnClick.Handle(this, &JitCompareScreen::OnShowStats);
leftColumn->Add(new Choice(d->T("Back")))->OnClick.Handle<UIScreen>(this, &UIScreen::OnBack);
EventParams ignore = {0};
OnCurrentBlock(ignore);
@ -581,6 +596,7 @@ void JitCompareScreen::CreateViews() {
void JitCompareScreen::UpdateDisasm() {
leftDisasm_->Clear();
rightDisasm_->Clear();
irDisasm_->Clear();
using namespace UI;
@ -595,6 +611,7 @@ void JitCompareScreen::UpdateDisasm() {
if (currentBlock_ < 0 || currentBlock_ >= blockCache->GetNumBlocks()) {
leftDisasm_->Add(new TextView(de->T("No block")));
rightDisasm_->Add(new TextView(de->T("No block")));
irDisasm_->Add(new TextView(de->T("No block")));
blockStats_->SetText("");
return;
}
@ -625,6 +642,15 @@ void JitCompareScreen::UpdateDisasm() {
rightDisasm_->Add(new TextView(targetDis[i]))->SetFocusable(true);
}
if (showIR_) {
MIPSComp::IRBlock irblock;
MIPSComp::ExtractIR(MIPSComp::jit->GetJitOptions(), block->originalAddress, &irblock);
std::vector<std::string> irDis = irblock.ToStringVector();
for (size_t i = 0; i < irDis.size(); i++) {
irDisasm_->Add(new TextView(irDis[i]));
}
}
int numMips = leftDisasm_->GetNumSubviews();
int numHost = rightDisasm_->GetNumSubviews();
@ -654,7 +680,7 @@ UI::EventReturn JitCompareScreen::OnShowStats(UI::EventParams &e) {
BlockCacheStats bcStats;
blockCache->ComputeStats(bcStats);
NOTICE_LOG(JIT, "Num blocks: %i", bcStats.numBlocks);
NOTICE_LOG(JIT, "Average Bloat: %0.2f%%", 100 * bcStats.avgBloat);
NOTICE_LOG(JIT, "Average Bloat (bytes!): %0.2f%%", 100 * bcStats.avgBloat);
NOTICE_LOG(JIT, "Min Bloat: %0.2f%% (%08x)", 100 * bcStats.minBloat, bcStats.minBloatBlock);
NOTICE_LOG(JIT, "Max Bloat: %0.2f%% (%08x)", 100 * bcStats.maxBloat, bcStats.maxBloatBlock);
@ -681,6 +707,16 @@ UI::EventReturn JitCompareScreen::OnSelectBlock(UI::EventParams &e) {
return UI::EVENT_DONE;
}
UI::EventReturn JitCompareScreen::OnShowIR(UI::EventParams &e) {
using namespace UI;
showIR_ = !showIR_;
midColumnScroll_->SetVisibility(showIR_ ? V_GONE : V_VISIBLE);
rightColumnScroll_->SetVisibility(showIR_ ? V_GONE : V_VISIBLE);
irColumnScroll_->SetVisibility(showIR_ ? V_VISIBLE : V_GONE);
UpdateDisasm();
return UI::EVENT_DONE;
}
UI::EventReturn JitCompareScreen::OnPrevBlock(UI::EventParams &e) {
currentBlock_--;
UpdateDisasm();
@ -722,26 +758,21 @@ UI::EventReturn JitCompareScreen::OnRandomBlock(UI::EventParams &e) {
return UI::EVENT_DONE;
}
UI::EventReturn JitCompareScreen::OnRandomVFPUBlock(UI::EventParams &e) {
OnRandomBlock(IS_VFPU);
return UI::EVENT_DONE;
}
UI::EventReturn JitCompareScreen::OnRandomFPUBlock(UI::EventParams &e) {
OnRandomBlock(IS_FPU);
return UI::EVENT_DONE;
return OnRandomBlockWithFlag(e, IS_FPU);
}
void JitCompareScreen::OnRandomBlock(int flag) {
if (!MIPSComp::jit) {
return;
}
UI::EventReturn JitCompareScreen::OnRandomVFPUBlock(UI::EventParams &e) {
return OnRandomBlockWithFlag(e, IS_VFPU);
}
UI::EventReturn JitCompareScreen::OnRandomBlockWithFlag(UI::EventParams &e, u64 flag) {
JitBlockCache *blockCache = MIPSComp::jit->GetBlockCache();
int numBlocks = blockCache->GetNumBlocks();
if (numBlocks > 0) {
bool anyWanted = false;
bool anyWithFlag = false;
int tries = 0;
while (!anyWanted && tries < 10000) {
while (!anyWithFlag && tries < 10000) {
currentBlock_ = rand() % numBlocks;
const JitBlock *b = blockCache->GetBlock(currentBlock_);
for (u32 addr = b->originalAddress; addr <= b->originalAddress + b->originalSize; addr += 4) {
@ -750,7 +781,7 @@ void JitCompareScreen::OnRandomBlock(int flag) {
char temp[256];
MIPSDisAsm(opcode, addr, temp);
// INFO_LOG(HLE, "Stopping VFPU instruction: %s", temp);
anyWanted = true;
anyWithFlag = true;
break;
}
}
@ -758,6 +789,7 @@ void JitCompareScreen::OnRandomBlock(int flag) {
}
}
UpdateDisasm();
return UI::EVENT_DONE;
}

View file

@ -114,7 +114,7 @@ private:
class JitCompareScreen : public UIDialogScreenWithBackground {
public:
JitCompareScreen() : currentBlock_(-1) {}
JitCompareScreen() : currentBlock_(-1), showIR_(false) {}
virtual void CreateViews();
private:
@ -122,14 +122,14 @@ private:
UI::EventReturn OnRandomBlock(UI::EventParams &e);
UI::EventReturn OnRandomFPUBlock(UI::EventParams &e);
UI::EventReturn OnRandomVFPUBlock(UI::EventParams &e);
void OnRandomBlock(int flag);
UI::EventReturn OnRandomBlockWithFlag(UI::EventParams &e, u64 flag);
UI::EventReturn OnCurrentBlock(UI::EventParams &e);
UI::EventReturn OnSelectBlock(UI::EventParams &e);
UI::EventReturn OnPrevBlock(UI::EventParams &e);
UI::EventReturn OnNextBlock(UI::EventParams &e);
UI::EventReturn OnBlockAddress(UI::EventParams &e);
UI::EventReturn OnAddressChange(UI::EventParams &e);
UI::EventReturn OnShowIR(UI::EventParams &e);
UI::EventReturn OnShowStats(UI::EventParams &e);
int currentBlock_;
@ -140,4 +140,11 @@ private:
UI::LinearLayout *leftDisasm_;
UI::LinearLayout *rightDisasm_;
UI::LinearLayout *irDisasm_;
UI::ScrollView *midColumnScroll_;
UI::ScrollView *rightColumnScroll_;
UI::ScrollView *irColumnScroll_;
bool showIR_;
};

View file

@ -120,6 +120,7 @@ endif
EXEC_AND_LIB_FILES := \
$(ARCH_FILES) \
TestRunner.cpp \
$(SRC)/Core/MIPS/IR.cpp \
$(SRC)/Core/MIPS/MIPS.cpp.arm \
$(SRC)/Core/MIPS/MIPSAnalyst.cpp \
$(SRC)/Core/MIPS/MIPSDis.cpp \