ARM64: Fix bug where we didn't save the FP registers correctly in the vertex decoder.
Also port a few ops from dolphin's ARM64 emitter.
This commit is contained in:
parent
4f3d18fcb7
commit
2c05334d47
6 changed files with 40 additions and 13 deletions
|
@ -2928,10 +2928,29 @@ void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale)
|
|||
int imm = size * 2 - scale;
|
||||
EmitShiftImm(IsQuad(Rd), 1, imm >> 3, imm & 7, 0x1C, Rd, Rn);
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(false, 0, dest_size >> 4, 0x14, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(true, 0, dest_size >> 4, 0x14, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(false, 1, dest_size >> 4, 0x14, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(true, 1, dest_size >> 4, 0x14, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(IsQuad(Rd), 0, dest_size >> 4, 0x12, Rd, Rn);
|
||||
Emit2RegMisc(false, 0, dest_size >> 4, 0x12, Rd, Rn);
|
||||
}
|
||||
void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
|
||||
{
|
||||
Emit2RegMisc(true, 0, dest_size >> 4, 0x12, Rd, Rn);
|
||||
}
|
||||
|
||||
// Move
|
||||
|
@ -3341,12 +3360,13 @@ void ARM64FloatEmitter::STP(IndexType index_type, ARM64Reg Rt, ARM64Reg Rt2, ARM
|
|||
m_emit->EncodeLoadStorePair(0, true, 0, index_type, Rt, Rt2, Rn, imm);
|
||||
}
|
||||
|
||||
// TODO: According to the ABI, we really only need to save the bottom 64 bits of D8-D15.
|
||||
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
|
||||
{
|
||||
for (auto it : registers)
|
||||
STR(128, INDEX_PRE, (ARM64Reg)(Q0 + it), SP, -16);
|
||||
|
||||
}
|
||||
|
||||
void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask)
|
||||
{
|
||||
for (int i = 31; i >= 0; --i)
|
||||
|
|
|
@ -89,6 +89,10 @@ enum ARM64Reg
|
|||
INVALID_REG = 0xFFFFFFFF
|
||||
};
|
||||
|
||||
// R19-R28, R29 (FP), R30 (LR). FP seems questionable?
|
||||
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
|
||||
const u32 ALL_CALLEE_SAVED_FP = 0x0000FF00; // d8-d15
|
||||
|
||||
inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }
|
||||
inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
|
||||
inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
|
||||
|
@ -827,6 +831,11 @@ public:
|
|||
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
|
||||
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
// Move
|
||||
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
|
|
@ -95,8 +95,7 @@ using namespace Arm64JitConstants;
|
|||
void Arm64Jit::GenerateFixedCode() {
|
||||
enterCode = AlignCode16();
|
||||
|
||||
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
|
||||
BitSet32 regs_to_save(ALL_CALLEE_SAVED);
|
||||
BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
|
||||
enterCode = GetCodePtr();
|
||||
|
||||
ABI_PushRegisters(regs_to_save);
|
||||
|
|
|
@ -330,11 +330,9 @@ namespace MIPSComp
|
|||
}
|
||||
} else {
|
||||
_dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?");
|
||||
_dbg_assert_msg_(JIT, g_Config.bFastMemory, "Slow mem doesn't work yet in ARM64! Turn on Fast Memory in system settings");
|
||||
load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);
|
||||
|
||||
if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {
|
||||
// TODO: This doesn't work!
|
||||
SetCCAndSCRATCH1ForSafeAddress(rs, offset, SCRATCH2);
|
||||
doCheck = true;
|
||||
} else {
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Common/VertexDecoderCommon.h"
|
||||
|
||||
static float MEMORY_ALIGNED16(bones[16 * 8]); // First two are kept in registers
|
||||
static float MEMORY_ALIGNED16(bones[16 * 8]); // First four are kept in registers
|
||||
static float MEMORY_ALIGNED16(boneMask[4]) = {1.0f, 1.0f, 1.0f, 0.0f};
|
||||
|
||||
static const float by128 = 1.0f / 128.0f;
|
||||
|
@ -135,8 +135,6 @@ static const JitLookup jitLookup[] = {
|
|||
JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
|
||||
dec_ = &dec;
|
||||
|
||||
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
|
||||
BitSet32 regs_to_save(ALL_CALLEE_SAVED);
|
||||
|
||||
const u8 *start = AlignCode16();
|
||||
|
||||
|
@ -145,7 +143,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
|
|||
bool prescaleStep = false;
|
||||
bool skinning = false;
|
||||
|
||||
bool log = false;
|
||||
bool log = true;
|
||||
|
||||
// Look for prescaled texcoord steps
|
||||
for (int i = 0; i < dec.numSteps_; i++) {
|
||||
|
@ -163,7 +161,10 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
|
|||
|
||||
// if (skinning) log = true;
|
||||
|
||||
BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
|
||||
BitSet32 regs_to_save_fp(Arm64Gen::ALL_CALLEE_SAVED_FP);
|
||||
ABI_PushRegisters(regs_to_save);
|
||||
fp.ABI_PushRegisters(regs_to_save_fp);
|
||||
|
||||
// Keep the scale/offset in a few fp registers if we need it.
|
||||
if (prescaleStep) {
|
||||
|
@ -245,6 +246,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
|
|||
SetJumpTarget(skip);
|
||||
}
|
||||
|
||||
fp.ABI_PopRegisters(regs_to_save_fp);
|
||||
ABI_PopRegisters(regs_to_save);
|
||||
|
||||
RET();
|
||||
|
|
|
@ -37,8 +37,7 @@ void TestCode::Generate()
|
|||
{
|
||||
testCodePtr = this->GetCodePtr();
|
||||
|
||||
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
|
||||
BitSet32 regs_to_save(ALL_CALLEE_SAVED);
|
||||
BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
|
||||
|
||||
const u8 *start = AlignCode16();
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue