diff --git a/Common/Arm64Emitter.cpp b/Common/Arm64Emitter.cpp index cb58a7193..8865ba3bb 100644 --- a/Common/Arm64Emitter.cpp +++ b/Common/Arm64Emitter.cpp @@ -2928,10 +2928,29 @@ void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) int imm = size * 2 - scale; EmitShiftImm(IsQuad(Rd), 1, imm >> 3, imm & 7, 0x1C, Rd, Rn); } - +void ARM64FloatEmitter::SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(false, 0, dest_size >> 4, 0x14, Rd, Rn); +} +void ARM64FloatEmitter::SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(true, 0, dest_size >> 4, 0x14, Rd, Rn); +} +void ARM64FloatEmitter::UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(false, 1, dest_size >> 4, 0x14, Rd, Rn); +} +void ARM64FloatEmitter::UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(true, 1, dest_size >> 4, 0x14, Rd, Rn); +} void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) { - Emit2RegMisc(IsQuad(Rd), 0, dest_size >> 4, 0x12, Rd, Rn); + Emit2RegMisc(false, 0, dest_size >> 4, 0x12, Rd, Rn); +} +void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) +{ + Emit2RegMisc(true, 0, dest_size >> 4, 0x12, Rd, Rn); } // Move @@ -3341,12 +3360,13 @@ void ARM64FloatEmitter::STP(IndexType index_type, ARM64Reg Rt, ARM64Reg Rt2, ARM m_emit->EncodeLoadStorePair(0, true, 0, index_type, Rt, Rt2, Rn, imm); } +// TODO: According to the ABI, we really only need to save the bottom 64 bits of D8-D15. void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers) { for (auto it : registers) STR(128, INDEX_PRE, (ARM64Reg)(Q0 + it), SP, -16); - } + void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask) { for (int i = 31; i >= 0; --i) diff --git a/Common/Arm64Emitter.h b/Common/Arm64Emitter.h index d3d2a118a..f1c41d1fc 100644 --- a/Common/Arm64Emitter.h +++ b/Common/Arm64Emitter.h @@ -89,6 +89,10 @@ enum ARM64Reg INVALID_REG = 0xFFFFFFFF }; +// R19-R28, R29 (FP), R30 (LR). FP seems questionable? +const u32 ALL_CALLEE_SAVED = 0x7FF80000; +const u32 ALL_CALLEE_SAVED_FP = 0x0000FF00; // d8-d15 + inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; } inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; } inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; } @@ -827,6 +831,11 @@ public: void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale); void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale); void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); + void SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); + void SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); + void UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); + void UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); + void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); // Move void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn); diff --git a/Core/MIPS/ARM64/Arm64Asm.cpp b/Core/MIPS/ARM64/Arm64Asm.cpp index cb1380ff7..12b903ef0 100644 --- a/Core/MIPS/ARM64/Arm64Asm.cpp +++ b/Core/MIPS/ARM64/Arm64Asm.cpp @@ -95,8 +95,7 @@ using namespace Arm64JitConstants; void Arm64Jit::GenerateFixedCode() { enterCode = AlignCode16(); - const u32 ALL_CALLEE_SAVED = 0x7FF80000; - BitSet32 regs_to_save(ALL_CALLEE_SAVED); + BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED); enterCode = GetCodePtr(); ABI_PushRegisters(regs_to_save); diff --git a/Core/MIPS/ARM64/Arm64CompLoadStore.cpp b/Core/MIPS/ARM64/Arm64CompLoadStore.cpp index 6e8719727..0aa1a0e68 100644 --- a/Core/MIPS/ARM64/Arm64CompLoadStore.cpp +++ b/Core/MIPS/ARM64/Arm64CompLoadStore.cpp @@ -330,11 +330,9 @@ namespace MIPSComp } } else { _dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?"); - _dbg_assert_msg_(JIT, g_Config.bFastMemory, "Slow mem doesn't work yet in ARM64! Turn on Fast Memory in system settings"); load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs); if (!g_Config.bFastMemory && rs != MIPS_REG_SP) { - // TODO: This doesn't work! SetCCAndSCRATCH1ForSafeAddress(rs, offset, SCRATCH2); doCheck = true; } else { diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp index 453bb6848..34d987d7c 100644 --- a/GPU/Common/VertexDecoderArm64.cpp +++ b/GPU/Common/VertexDecoderArm64.cpp @@ -24,7 +24,7 @@ #include "GPU/GPUState.h" #include "GPU/Common/VertexDecoderCommon.h" -static float MEMORY_ALIGNED16(bones[16 * 8]); // First two are kept in registers +static float MEMORY_ALIGNED16(bones[16 * 8]); // First four are kept in registers static float MEMORY_ALIGNED16(boneMask[4]) = {1.0f, 1.0f, 1.0f, 0.0f}; static const float by128 = 1.0f / 128.0f; @@ -135,8 +135,6 @@ static const JitLookup jitLookup[] = { JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) { dec_ = &dec; - const u32 ALL_CALLEE_SAVED = 0x7FF80000; - BitSet32 regs_to_save(ALL_CALLEE_SAVED); const u8 *start = AlignCode16(); @@ -145,7 +143,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) { bool prescaleStep = false; bool skinning = false; - bool log = false; + bool log = true; // Look for prescaled texcoord steps for (int i = 0; i < dec.numSteps_; i++) { @@ -163,7 +161,10 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) { // if (skinning) log = true; + BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED); + BitSet32 regs_to_save_fp(Arm64Gen::ALL_CALLEE_SAVED_FP); ABI_PushRegisters(regs_to_save); + fp.ABI_PushRegisters(regs_to_save_fp); // Keep the scale/offset in a few fp registers if we need it. if (prescaleStep) { @@ -245,6 +246,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) { SetJumpTarget(skip); } + fp.ABI_PopRegisters(regs_to_save_fp); ABI_PopRegisters(regs_to_save); RET(); diff --git a/android/jni/Arm64EmitterTest.cpp b/android/jni/Arm64EmitterTest.cpp index 7e15731e5..4005065d7 100644 --- a/android/jni/Arm64EmitterTest.cpp +++ b/android/jni/Arm64EmitterTest.cpp @@ -37,8 +37,7 @@ void TestCode::Generate() { testCodePtr = this->GetCodePtr(); - const u32 ALL_CALLEE_SAVED = 0x7FF80000; - BitSet32 regs_to_save(ALL_CALLEE_SAVED); + BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED); const u8 *start = AlignCode16();