#include #include #include "ppsspp_config.h" #include "Common/BitScan.h" #include "Common/Common.h" #include "Common/Data/Convert/SmallDataConvert.h" #include "Common/Math/math_util.h" #ifdef _M_SSE #include #endif #if PPSSPP_ARCH(ARM_NEON) #if defined(_MSC_VER) && PPSSPP_ARCH(ARM64) #include #else #include #endif #endif #include "Core/Core.h" #include "Core/CoreTiming.h" #include "Core/Debugger/Breakpoints.h" #include "Core/HLE/HLE.h" #include "Core/HLE/ReplaceTables.h" #include "Core/MemMap.h" #include "Core/MIPS/MIPS.h" #include "Core/MIPS/MIPSTables.h" #include "Core/MIPS/MIPSVFPUUtils.h" #include "Core/MIPS/IR/IRInst.h" #include "Core/MIPS/IR/IRInterpreter.h" #include "Core/System.h" #ifdef mips // Why do MIPS compilers define something so generic? Try to keep defined, at least... #undef mips #define mips mips #endif alignas(16) static const float vec4InitValues[8][4] = { { 0.0f, 0.0f, 0.0f, 0.0f }, { 1.0f, 1.0f, 1.0f, 1.0f }, { -1.0f, -1.0f, -1.0f, -1.0f }, { 1.0f, 0.0f, 0.0f, 0.0f }, { 0.0f, 1.0f, 0.0f, 0.0f }, { 0.0f, 0.0f, 1.0f, 0.0f }, { 0.0f, 0.0f, 0.0f, 1.0f }, }; alignas(16) static const uint32_t signBits[4] = { 0x80000000, 0x80000000, 0x80000000, 0x80000000, }; alignas(16) static const uint32_t noSignMask[4] = { 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, }; alignas(16) static const uint32_t lowBytesMask[4] = { 0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF, }; u32 RunBreakpoint(u32 pc) { // Should we skip this breakpoint? if (CBreakPoints::CheckSkipFirst() == pc) return 0; CBreakPoints::ExecBreakPoint(currentMIPS->pc); return coreState != CORE_RUNNING ? 1 : 0; } u32 RunMemCheck(u32 pc, u32 addr) { // Should we skip this breakpoint? if (CBreakPoints::CheckSkipFirst() == pc) return 0; CBreakPoints::ExecOpMemCheck(addr, pc); return coreState != CORE_RUNNING ? 1 : 0; } template u32 RunValidateAddress(u32 pc, u32 addr, u32 isWrite) { const auto toss = [&](MemoryExceptionType t) { Core_MemoryException(addr, alignment, pc, t); return coreState != CORE_RUNNING ? 1 : 0; }; if (!Memory::IsValidRange(addr, alignment)) { MemoryExceptionType t = isWrite == 1 ? MemoryExceptionType::WRITE_WORD : MemoryExceptionType::READ_WORD; if constexpr (alignment > 4) t = isWrite ? MemoryExceptionType::WRITE_BLOCK : MemoryExceptionType::READ_BLOCK; return toss(t); } if constexpr (alignment > 1) if ((addr & (alignment - 1)) != 0) return toss(MemoryExceptionType::ALIGNMENT); return 0; } // We cannot use NEON on ARM32 here until we make it a hard dependency. We can, however, on ARM64. u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) { const IRInst *end = inst + count; while (inst != end) { switch (inst->op) { case IROp::Nop: _assert_(false); break; case IROp::SetConst: mips->r[inst->dest] = inst->constant; break; case IROp::SetConstF: memcpy(&mips->f[inst->dest], &inst->constant, 4); break; case IROp::Add: mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2]; break; case IROp::Sub: mips->r[inst->dest] = mips->r[inst->src1] - mips->r[inst->src2]; break; case IROp::And: mips->r[inst->dest] = mips->r[inst->src1] & mips->r[inst->src2]; break; case IROp::Or: mips->r[inst->dest] = mips->r[inst->src1] | mips->r[inst->src2]; break; case IROp::Xor: mips->r[inst->dest] = mips->r[inst->src1] ^ mips->r[inst->src2]; break; case IROp::Mov: mips->r[inst->dest] = mips->r[inst->src1]; break; case IROp::AddConst: mips->r[inst->dest] = mips->r[inst->src1] + inst->constant; break; case IROp::SubConst: mips->r[inst->dest] = mips->r[inst->src1] - inst->constant; break; case IROp::AndConst: mips->r[inst->dest] = mips->r[inst->src1] & inst->constant; break; case IROp::OrConst: mips->r[inst->dest] = mips->r[inst->src1] | inst->constant; break; case IROp::XorConst: mips->r[inst->dest] = mips->r[inst->src1] ^ inst->constant; break; case IROp::Neg: mips->r[inst->dest] = -(s32)mips->r[inst->src1]; break; case IROp::Not: mips->r[inst->dest] = ~mips->r[inst->src1]; break; case IROp::Ext8to32: mips->r[inst->dest] = SignExtend8ToU32(mips->r[inst->src1]); break; case IROp::Ext16to32: mips->r[inst->dest] = SignExtend16ToU32(mips->r[inst->src1]); break; case IROp::ReverseBits: mips->r[inst->dest] = ReverseBits32(mips->r[inst->src1]); break; case IROp::ValidateAddress8: if (RunValidateAddress<1>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) { CoreTiming::ForceCheck(); return mips->pc; } break; case IROp::ValidateAddress16: if (RunValidateAddress<2>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) { CoreTiming::ForceCheck(); return mips->pc; } break; case IROp::ValidateAddress32: if (RunValidateAddress<4>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) { CoreTiming::ForceCheck(); return mips->pc; } break; case IROp::ValidateAddress128: if (RunValidateAddress<16>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) { CoreTiming::ForceCheck(); return mips->pc; } break; case IROp::Load8: mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant); break; case IROp::Load8Ext: mips->r[inst->dest] = SignExtend8ToU32(Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant)); break; case IROp::Load16: mips->r[inst->dest] = Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant); break; case IROp::Load16Ext: mips->r[inst->dest] = SignExtend16ToU32(Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant)); break; case IROp::Load32: mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + inst->constant); break; case IROp::Load32Left: { u32 addr = mips->r[inst->src1] + inst->constant; u32 shift = (addr & 3) * 8; u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc); u32 destMask = 0x00ffffff >> shift; mips->r[inst->dest] = (mips->r[inst->dest] & destMask) | (mem << (24 - shift)); break; } case IROp::Load32Right: { u32 addr = mips->r[inst->src1] + inst->constant; u32 shift = (addr & 3) * 8; u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc); u32 destMask = 0xffffff00 << (24 - shift); mips->r[inst->dest] = (mips->r[inst->dest] & destMask) | (mem >> shift); break; } case IROp::Load32Linked: if (inst->dest != MIPS_REG_ZERO) mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + inst->constant); mips->llBit = 1; break; case IROp::LoadFloat: mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + inst->constant); break; case IROp::Store8: Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + inst->constant); break; case IROp::Store16: Memory::WriteUnchecked_U16(mips->r[inst->src3], mips->r[inst->src1] + inst->constant); break; case IROp::Store32: Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + inst->constant); break; case IROp::Store32Left: { u32 addr = mips->r[inst->src1] + inst->constant; u32 shift = (addr & 3) * 8; u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc); u32 memMask = 0xffffff00 << shift; u32 result = (mips->r[inst->src3] >> (24 - shift)) | (mem & memMask); Memory::WriteUnchecked_U32(result, addr & 0xfffffffc); break; } case IROp::Store32Right: { u32 addr = mips->r[inst->src1] + inst->constant; u32 shift = (addr & 3) * 8; u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc); u32 memMask = 0x00ffffff >> (24 - shift); u32 result = (mips->r[inst->src3] << shift) | (mem & memMask); Memory::WriteUnchecked_U32(result, addr & 0xfffffffc); break; } case IROp::Store32Conditional: if (mips->llBit) { Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + inst->constant); if (inst->dest != MIPS_REG_ZERO) { mips->r[inst->dest] = 1; } } else if (inst->dest != MIPS_REG_ZERO) { mips->r[inst->dest] = 0; } break; case IROp::StoreFloat: Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + inst->constant); break; case IROp::LoadVec4: { u32 base = mips->r[inst->src1] + inst->constant; #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_load_ps((const float *)Memory::GetPointerUnchecked(base))); #else for (int i = 0; i < 4; i++) mips->f[inst->dest + i] = Memory::ReadUnchecked_Float(base + 4 * i); #endif break; } case IROp::StoreVec4: { u32 base = mips->r[inst->src1] + inst->constant; #if defined(_M_SSE) _mm_store_ps((float *)Memory::GetPointerUnchecked(base), _mm_load_ps(&mips->f[inst->dest])); #else for (int i = 0; i < 4; i++) Memory::WriteUnchecked_Float(mips->f[inst->dest + i], base + 4 * i); #endif break; } case IROp::Vec4Init: { #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_load_ps(vec4InitValues[inst->src1])); #else memcpy(&mips->f[inst->dest], vec4InitValues[inst->src1], 4 * sizeof(float)); #endif break; } case IROp::Vec4Shuffle: { // Can't use the SSE shuffle here because it takes an immediate. pshufb with a table would work though, // or a big switch - there are only 256 shuffles possible (4^4) float temp[4]; for (int i = 0; i < 4; i++) temp[i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)]; for (int i = 0; i < 4; i++) mips->f[inst->dest + i] = temp[i]; break; } case IROp::Vec4Blend: // Could use _mm_blendv_ps (SSE4+BMI), vbslq_f32 (ARM), __riscv_vmerge_vvm (RISC-V) for (int i = 0; i < 4; i++) mips->f[inst->dest + i] = ((inst->constant >> i) & 1) ? mips->f[inst->src2 + i] : mips->f[inst->src1 + i]; break; case IROp::Vec4Mov: { #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_load_ps(&mips->f[inst->src1])); #elif PPSSPP_ARCH(ARM64_NEON) vst1q_f32(&mips->f[inst->dest], vld1q_f32(&mips->f[inst->src1])); #else memcpy(&mips->f[inst->dest], &mips->f[inst->src1], 4 * sizeof(float)); #endif break; } case IROp::Vec4Add: { #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_add_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2]))); #elif PPSSPP_ARCH(ARM64_NEON) vst1q_f32(&mips->f[inst->dest], vaddq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2]))); #else for (int i = 0; i < 4; i++) mips->f[inst->dest + i] = mips->f[inst->src1 + i] + mips->f[inst->src2 + i]; #endif break; } case IROp::Vec4Sub: { #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_sub_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2]))); #elif PPSSPP_ARCH(ARM64_NEON) vst1q_f32(&mips->f[inst->dest], vsubq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2]))); #else for (int i = 0; i < 4; i++) mips->f[inst->dest + i] = mips->f[inst->src1 + i] - mips->f[inst->src2 + i]; #endif break; } case IROp::Vec4Mul: { #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2]))); #elif PPSSPP_ARCH(ARM64_NEON) vst1q_f32(&mips->f[inst->dest], vmulq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2]))); #else for (int i = 0; i < 4; i++) mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2 + i]; #endif break; } case IROp::Vec4Div: { #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_div_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2]))); #else for (int i = 0; i < 4; i++) mips->f[inst->dest + i] = mips->f[inst->src1 + i] / mips->f[inst->src2 + i]; #endif break; } case IROp::Vec4Scale: { #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_set1_ps(mips->f[inst->src2]))); #else for (int i = 0; i < 4; i++) mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2]; #endif break; } case IROp::Vec4Neg: { #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_xor_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)signBits))); #elif PPSSPP_ARCH(ARM64_NEON) vst1q_f32(&mips->f[inst->dest], vnegq_f32(vld1q_f32(&mips->f[inst->src1]))); #else for (int i = 0; i < 4; i++) mips->f[inst->dest + i] = -mips->f[inst->src1 + i]; #endif break; } case IROp::Vec4Abs: { #if defined(_M_SSE) _mm_store_ps(&mips->f[inst->dest], _mm_and_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)noSignMask))); #elif PPSSPP_ARCH(ARM64_NEON) vst1q_f32(&mips->f[inst->dest], vabsq_f32(vld1q_f32(&mips->f[inst->src1]))); #else for (int i = 0; i < 4; i++) mips->f[inst->dest + i] = fabsf(mips->f[inst->src1 + i]); #endif break; } case IROp::Vec2Unpack16To31: { mips->fi[inst->dest] = (mips->fi[inst->src1] << 16) >> 1; mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000) >> 1; break; } case IROp::Vec2Unpack16To32: { mips->fi[inst->dest] = (mips->fi[inst->src1] << 16); mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000); break; } case IROp::Vec4Unpack8To32: { #if defined(_M_SSE) __m128i src = _mm_cvtsi32_si128(mips->fi[inst->src1]); src = _mm_unpacklo_epi8(src, _mm_setzero_si128()); src = _mm_unpacklo_epi16(src, _mm_setzero_si128()); _mm_store_si128((__m128i *)&mips->fi[inst->dest], _mm_slli_epi32(src, 24)); #else mips->fi[inst->dest] = (mips->fi[inst->src1] << 24); mips->fi[inst->dest + 1] = (mips->fi[inst->src1] << 16) & 0xFF000000; mips->fi[inst->dest + 2] = (mips->fi[inst->src1] << 8) & 0xFF000000; mips->fi[inst->dest + 3] = (mips->fi[inst->src1]) & 0xFF000000; #endif break; } case IROp::Vec2Pack32To16: { u32 val = mips->fi[inst->src1] >> 16; mips->fi[inst->dest] = (mips->fi[inst->src1 + 1] & 0xFFFF0000) | val; break; } case IROp::Vec2Pack31To16: { u32 val = (mips->fi[inst->src1] >> 15) & 0xFFFF; val |= (mips->fi[inst->src1 + 1] << 1) & 0xFFFF0000; mips->fi[inst->dest] = val; break; } case IROp::Vec4Pack32To8: { // Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2. // pshufb or SSE4 instructions can be used instead. u32 val = mips->fi[inst->src1] >> 24; val |= (mips->fi[inst->src1 + 1] >> 16) & 0xFF00; val |= (mips->fi[inst->src1 + 2] >> 8) & 0xFF0000; val |= (mips->fi[inst->src1 + 3]) & 0xFF000000; mips->fi[inst->dest] = val; break; } case IROp::Vec4Pack31To8: { // Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2. // pshufb or SSE4 instructions can be used instead. u32 val = (mips->fi[inst->src1] >> 23) & 0xFF; val |= (mips->fi[inst->src1 + 1] >> 15) & 0xFF00; val |= (mips->fi[inst->src1 + 2] >> 7) & 0xFF0000; val |= (mips->fi[inst->src1 + 3] << 1) & 0xFF000000; mips->fi[inst->dest] = val; break; } case IROp::Vec2ClampToZero: { for (int i = 0; i < 2; i++) { u32 val = mips->fi[inst->src1 + i]; mips->fi[inst->dest + i] = (int)val >= 0 ? val : 0; } break; } case IROp::Vec4ClampToZero: { #if defined(_M_SSE) // Trickery: Expand the sign bit, and use andnot to zero negative values. __m128i val = _mm_load_si128((const __m128i *)&mips->fi[inst->src1]); __m128i mask = _mm_srai_epi32(val, 31); val = _mm_andnot_si128(mask, val); _mm_store_si128((__m128i *)&mips->fi[inst->dest], val); #else for (int i = 0; i < 4; i++) { u32 val = mips->fi[inst->src1 + i]; mips->fi[inst->dest + i] = (int)val >= 0 ? val : 0; } #endif break; } case IROp::Vec4DuplicateUpperBitsAndShift1: // For vuc2i, the weird one. { for (int i = 0; i < 4; i++) { u32 val = mips->fi[inst->src1 + i]; val = val | (val >> 8); val = val | (val >> 16); val >>= 1; mips->fi[inst->dest + i] = val; } break; } case IROp::FCmpVfpuBit: { int op = inst->dest & 0xF; int bit = inst->dest >> 4; int result = 0; switch (op) { case VC_EQ: result = mips->f[inst->src1] == mips->f[inst->src2]; break; case VC_NE: result = mips->f[inst->src1] != mips->f[inst->src2]; break; case VC_LT: result = mips->f[inst->src1] < mips->f[inst->src2]; break; case VC_LE: result = mips->f[inst->src1] <= mips->f[inst->src2]; break; case VC_GT: result = mips->f[inst->src1] > mips->f[inst->src2]; break; case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break; case VC_EZ: result = mips->f[inst->src1] == 0.0f; break; case VC_NZ: result = mips->f[inst->src1] != 0.0f; break; case VC_EN: result = my_isnan(mips->f[inst->src1]); break; case VC_NN: result = !my_isnan(mips->f[inst->src1]); break; case VC_EI: result = my_isinf(mips->f[inst->src1]); break; case VC_NI: result = !my_isinf(mips->f[inst->src1]); break; case VC_ES: result = my_isnanorinf(mips->f[inst->src1]); break; case VC_NS: result = !my_isnanorinf(mips->f[inst->src1]); break; case VC_TR: result = 1; break; case VC_FL: result = 0; break; default: result = 0; } if (result != 0) { mips->vfpuCtrl[VFPU_CTRL_CC] |= (1 << bit); } else { mips->vfpuCtrl[VFPU_CTRL_CC] &= ~(1 << bit); } break; } case IROp::FCmpVfpuAggregate: { u32 mask = inst->dest; u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC]; int anyBit = (cc & mask) ? 0x10 : 0x00; int allBit = (cc & mask) == mask ? 0x20 : 0x00; mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | anyBit | allBit; break; } case IROp::FCmovVfpuCC: if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0xf)) & 1) == ((u32)inst->src2 >> 7)) { mips->f[inst->dest] = mips->f[inst->src1]; } break; // Not quickly implementable on all platforms, unfortunately. case IROp::Vec4Dot: { float dot = mips->f[inst->src1] * mips->f[inst->src2]; for (int i = 1; i < 4; i++) dot += mips->f[inst->src1 + i] * mips->f[inst->src2 + i]; mips->f[inst->dest] = dot; break; } case IROp::FSin: mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]); break; case IROp::FCos: mips->f[inst->dest] = vfpu_cos(mips->f[inst->src1]); break; case IROp::FRSqrt: mips->f[inst->dest] = 1.0f / sqrtf(mips->f[inst->src1]); break; case IROp::FRecip: mips->f[inst->dest] = 1.0f / mips->f[inst->src1]; break; case IROp::FAsin: mips->f[inst->dest] = vfpu_asin(mips->f[inst->src1]); break; case IROp::ShlImm: mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2; break; case IROp::ShrImm: mips->r[inst->dest] = mips->r[inst->src1] >> (int)inst->src2; break; case IROp::SarImm: mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (int)inst->src2; break; case IROp::RorImm: { u32 x = mips->r[inst->src1]; int sa = inst->src2; mips->r[inst->dest] = (x >> sa) | (x << (32 - sa)); } break; case IROp::Shl: mips->r[inst->dest] = mips->r[inst->src1] << (mips->r[inst->src2] & 31); break; case IROp::Shr: mips->r[inst->dest] = mips->r[inst->src1] >> (mips->r[inst->src2] & 31); break; case IROp::Sar: mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (mips->r[inst->src2] & 31); break; case IROp::Ror: { u32 x = mips->r[inst->src1]; int sa = mips->r[inst->src2] & 31; mips->r[inst->dest] = (x >> sa) | (x << (32 - sa)); break; } case IROp::Clz: { mips->r[inst->dest] = clz32(mips->r[inst->src1]); break; } case IROp::Slt: mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2]; break; case IROp::SltU: mips->r[inst->dest] = mips->r[inst->src1] < mips->r[inst->src2]; break; case IROp::SltConst: mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)inst->constant; break; case IROp::SltUConst: mips->r[inst->dest] = mips->r[inst->src1] < inst->constant; break; case IROp::MovZ: if (mips->r[inst->src1] == 0) mips->r[inst->dest] = mips->r[inst->src2]; break; case IROp::MovNZ: if (mips->r[inst->src1] != 0) mips->r[inst->dest] = mips->r[inst->src2]; break; case IROp::Max: mips->r[inst->dest] = (s32)mips->r[inst->src1] > (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2]; break; case IROp::Min: mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2]; break; case IROp::MtLo: mips->lo = mips->r[inst->src1]; break; case IROp::MtHi: mips->hi = mips->r[inst->src1]; break; case IROp::MfLo: mips->r[inst->dest] = mips->lo; break; case IROp::MfHi: mips->r[inst->dest] = mips->hi; break; case IROp::Mult: { s64 result = (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2]; memcpy(&mips->lo, &result, 8); break; } case IROp::MultU: { u64 result = (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2]; memcpy(&mips->lo, &result, 8); break; } case IROp::Madd: { s64 result; memcpy(&result, &mips->lo, 8); result += (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2]; memcpy(&mips->lo, &result, 8); break; } case IROp::MaddU: { s64 result; memcpy(&result, &mips->lo, 8); result += (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2]; memcpy(&mips->lo, &result, 8); break; } case IROp::Msub: { s64 result; memcpy(&result, &mips->lo, 8); result -= (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2]; memcpy(&mips->lo, &result, 8); break; } case IROp::MsubU: { s64 result; memcpy(&result, &mips->lo, 8); result -= (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2]; memcpy(&mips->lo, &result, 8); break; } case IROp::Div: { s32 numerator = (s32)mips->r[inst->src1]; s32 denominator = (s32)mips->r[inst->src2]; if (numerator == (s32)0x80000000 && denominator == -1) { mips->lo = 0x80000000; mips->hi = -1; } else if (denominator != 0) { mips->lo = (u32)(numerator / denominator); mips->hi = (u32)(numerator % denominator); } else { mips->lo = numerator < 0 ? 1 : -1; mips->hi = numerator; } break; } case IROp::DivU: { u32 numerator = mips->r[inst->src1]; u32 denominator = mips->r[inst->src2]; if (denominator != 0) { mips->lo = numerator / denominator; mips->hi = numerator % denominator; } else { mips->lo = numerator <= 0xFFFF ? 0xFFFF : -1; mips->hi = numerator; } break; } case IROp::BSwap16: { u32 x = mips->r[inst->src1]; mips->r[inst->dest] = ((x & 0xFF00FF00) >> 8) | ((x & 0x00FF00FF) << 8); break; } case IROp::BSwap32: { u32 x = mips->r[inst->src1]; mips->r[inst->dest] = ((x & 0xFF000000) >> 24) | ((x & 0x00FF0000) >> 8) | ((x & 0x0000FF00) << 8) | ((x & 0x000000FF) << 24); break; } case IROp::FAdd: mips->f[inst->dest] = mips->f[inst->src1] + mips->f[inst->src2]; break; case IROp::FSub: mips->f[inst->dest] = mips->f[inst->src1] - mips->f[inst->src2]; break; case IROp::FMul: if ((my_isinf(mips->f[inst->src1]) && mips->f[inst->src2] == 0.0f) || (my_isinf(mips->f[inst->src2]) && mips->f[inst->src1] == 0.0f)) { mips->fi[inst->dest] = 0x7fc00000; } else { mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2]; } break; case IROp::FDiv: mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2]; break; case IROp::FMin: if (my_isnan(mips->f[inst->src1]) || my_isnan(mips->f[inst->src2])) { // See interpreter for this logic: this is for vmin, we're comparing mantissa+exp. if (mips->fi[inst->src1] < 0 && mips->fi[inst->src2] < 0) { mips->fi[inst->dest] = std::max(mips->fi[inst->src1], mips->fi[inst->src2]); } else { mips->fi[inst->dest] = std::min(mips->fi[inst->src1], mips->fi[inst->src2]); } } else { mips->f[inst->dest] = std::min(mips->f[inst->src1], mips->f[inst->src2]); } break; case IROp::FMax: if (my_isnan(mips->f[inst->src1]) || my_isnan(mips->f[inst->src2])) { // See interpreter for this logic: this is for vmax, we're comparing mantissa+exp. if (mips->fi[inst->src1] < 0 && mips->fi[inst->src2] < 0) { mips->fi[inst->dest] = std::min(mips->fi[inst->src1], mips->fi[inst->src2]); } else { mips->fi[inst->dest] = std::max(mips->fi[inst->src1], mips->fi[inst->src2]); } } else { mips->f[inst->dest] = std::max(mips->f[inst->src1], mips->f[inst->src2]); } break; case IROp::FMov: mips->f[inst->dest] = mips->f[inst->src1]; break; case IROp::FAbs: mips->f[inst->dest] = fabsf(mips->f[inst->src1]); break; case IROp::FSqrt: mips->f[inst->dest] = sqrtf(mips->f[inst->src1]); break; case IROp::FNeg: mips->f[inst->dest] = -mips->f[inst->src1]; break; case IROp::FSat0_1: // We have to do this carefully to handle NAN and -0.0f. mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], 0.0f, 1.0f); break; case IROp::FSatMinus1_1: mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], -1.0f, 1.0f); break; // Bitwise trickery case IROp::FSign: { u32 val; memcpy(&val, &mips->f[inst->src1], sizeof(u32)); if (val == 0 || val == 0x80000000) mips->f[inst->dest] = 0.0f; else if ((val >> 31) == 0) mips->f[inst->dest] = 1.0f; else mips->f[inst->dest] = -1.0f; break; } case IROp::FpCondToReg: mips->r[inst->dest] = mips->fpcond; break; case IROp::FpCtrlFromReg: mips->fcr31 = mips->r[inst->src1] & 0x0181FFFF; // Extract the new fpcond value. // TODO: Is it really helping us to keep it separate? mips->fpcond = (mips->fcr31 >> 23) & 1; break; case IROp::FpCtrlToReg: // Update the fpcond bit first. mips->fcr31 = (mips->fcr31 & ~(1 << 23)) | ((mips->fpcond & 1) << 23); mips->r[inst->dest] = mips->fcr31; break; case IROp::VfpuCtrlToReg: mips->r[inst->dest] = mips->vfpuCtrl[inst->src1]; break; case IROp::FRound: { float value = mips->f[inst->src1]; if (my_isnanorinf(value)) { mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL; break; } else { mips->fs[inst->dest] = (int)round_ieee_754(value); } break; } case IROp::FTrunc: { float value = mips->f[inst->src1]; if (my_isnanorinf(value)) { mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL; break; } else { if (value >= 0.0f) { mips->fs[inst->dest] = (int)floorf(value); // Overflow, but it was positive. if (mips->fs[inst->dest] == -2147483648LL) { mips->fs[inst->dest] = 2147483647LL; } } else { // Overflow happens to be the right value anyway. mips->fs[inst->dest] = (int)ceilf(value); } break; } } case IROp::FCeil: { float value = mips->f[inst->src1]; if (my_isnanorinf(value)) { mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL; break; } else { mips->fs[inst->dest] = (int)ceilf(value); } break; } case IROp::FFloor: { float value = mips->f[inst->src1]; if (my_isnanorinf(value)) { mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL; break; } else { mips->fs[inst->dest] = (int)floorf(value); } break; } case IROp::FCmp: switch (inst->dest) { case IRFpCompareMode::False: mips->fpcond = 0; break; case IRFpCompareMode::EitherUnordered: { float a = mips->f[inst->src1]; float b = mips->f[inst->src2]; mips->fpcond = !(a > b || a < b || a == b); break; } case IRFpCompareMode::EqualOrdered: mips->fpcond = mips->f[inst->src1] == mips->f[inst->src2]; break; case IRFpCompareMode::EqualUnordered: mips->fpcond = mips->f[inst->src1] == mips->f[inst->src2] || my_isnan(mips->f[inst->src1]) || my_isnan(mips->f[inst->src2]); break; case IRFpCompareMode::LessEqualOrdered: mips->fpcond = mips->f[inst->src1] <= mips->f[inst->src2]; break; case IRFpCompareMode::LessEqualUnordered: mips->fpcond = !(mips->f[inst->src1] > mips->f[inst->src2]); break; case IRFpCompareMode::LessOrdered: mips->fpcond = mips->f[inst->src1] < mips->f[inst->src2]; break; case IRFpCompareMode::LessUnordered: mips->fpcond = !(mips->f[inst->src1] >= mips->f[inst->src2]); break; } break; case IROp::FCvtSW: mips->f[inst->dest] = (float)mips->fs[inst->src1]; break; case IROp::FCvtWS: { float src = mips->f[inst->src1]; if (my_isnanorinf(src)) { mips->fs[inst->dest] = my_isinf(src) && src < 0.0f ? -2147483648LL : 2147483647LL; break; } switch (mips->fcr31 & 3) { case 0: mips->fs[inst->dest] = (int)round_ieee_754(src); break; // RINT_0 case 1: mips->fs[inst->dest] = (int)src; break; // CAST_1 case 2: mips->fs[inst->dest] = (int)ceilf(src); break; // CEIL_2 case 3: mips->fs[inst->dest] = (int)floorf(src); break; // FLOOR_3 } break; //cvt.w.s } case IROp::FCvtScaledSW: mips->f[inst->dest] = (float)mips->fs[inst->src1] * (1.0f / (1UL << (inst->src2 & 0x1F))); break; case IROp::FCvtScaledWS: { float src = mips->f[inst->src1]; if (my_isnan(src)) { // TODO: True for negatives too? mips->fs[inst->dest] = 2147483647L; break; } float mult = (float)(1UL << (inst->src2 & 0x1F)); double sv = src * mult; // (float)0x7fffffff == (float)0x80000000 // Cap/floor it to 0x7fffffff / 0x80000000 if (sv > (double)0x7fffffff) { mips->fs[inst->dest] = 0x7fffffff; } else if (sv <= (double)(int)0x80000000) { mips->fs[inst->dest] = 0x80000000; } else { switch (inst->src2 >> 6) { case 0: mips->fs[inst->dest] = (int)round_ieee_754(sv); break; case 1: mips->fs[inst->dest] = src >= 0 ? (int)floor(sv) : (int)ceil(sv); break; case 2: mips->fs[inst->dest] = (int)ceil(sv); break; case 3: mips->fs[inst->dest] = (int)floor(sv); break; } } break; } case IROp::ZeroFpCond: mips->fpcond = 0; break; case IROp::FMovFromGPR: memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4); break; case IROp::FMovToGPR: memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4); break; case IROp::ExitToConst: return inst->constant; case IROp::ExitToReg: return mips->r[inst->src1]; case IROp::ExitToConstIfEq: if (mips->r[inst->src1] == mips->r[inst->src2]) return inst->constant; break; case IROp::ExitToConstIfNeq: if (mips->r[inst->src1] != mips->r[inst->src2]) return inst->constant; break; case IROp::ExitToConstIfGtZ: if ((s32)mips->r[inst->src1] > 0) return inst->constant; break; case IROp::ExitToConstIfGeZ: if ((s32)mips->r[inst->src1] >= 0) return inst->constant; break; case IROp::ExitToConstIfLtZ: if ((s32)mips->r[inst->src1] < 0) return inst->constant; break; case IROp::ExitToConstIfLeZ: if ((s32)mips->r[inst->src1] <= 0) return inst->constant; break; case IROp::Downcount: mips->downcount -= inst->constant; break; case IROp::SetPC: mips->pc = mips->r[inst->src1]; break; case IROp::SetPCConst: mips->pc = inst->constant; break; case IROp::Syscall: // IROp::SetPC was (hopefully) executed before. { MIPSOpcode op(inst->constant); CallSyscall(op); if (coreState != CORE_RUNNING) CoreTiming::ForceCheck(); break; } case IROp::ExitToPC: return mips->pc; case IROp::Interpret: // SLOW fallback. Can be made faster. Ideally should be removed but may be useful for debugging. { MIPSOpcode op(inst->constant); MIPSInterpret(op); break; } case IROp::CallReplacement: { int funcIndex = inst->constant; const ReplacementTableEntry *f = GetReplacementFunc(funcIndex); int cycles = f->replaceFunc(); mips->downcount -= cycles; break; } case IROp::Break: Core_Break(mips->pc); return mips->pc + 4; case IROp::SetCtrlVFPU: mips->vfpuCtrl[inst->dest] = inst->constant; break; case IROp::SetCtrlVFPUReg: mips->vfpuCtrl[inst->dest] = mips->r[inst->src1]; break; case IROp::SetCtrlVFPUFReg: memcpy(&mips->vfpuCtrl[inst->dest], &mips->f[inst->src1], 4); break; case IROp::Breakpoint: if (RunBreakpoint(mips->pc)) { CoreTiming::ForceCheck(); return mips->pc; } break; case IROp::MemoryCheck: if (RunMemCheck(mips->pc, mips->r[inst->src1] + inst->constant)) { CoreTiming::ForceCheck(); return mips->pc; } break; case IROp::ApplyRoundingMode: // TODO: Implement break; case IROp::RestoreRoundingMode: // TODO: Implement break; case IROp::UpdateRoundingMode: // TODO: Implement break; default: // Unimplemented IR op. Bad. Crash(); } #ifdef _DEBUG if (mips->r[0] != 0) Crash(); #endif inst++; } // We hit count. If this is a full block, it was badly constructed. return 0; }