ARM64: Fix bug where we didn't save the FP registers correctly in the vertex decoder.

Also port a few ops from dolphin's ARM64 emitter.
This commit is contained in:
Henrik Rydgard 2015-06-14 12:56:44 +02:00
parent 4f3d18fcb7
commit 2c05334d47
6 changed files with 40 additions and 13 deletions

View file

@ -2928,10 +2928,29 @@ void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale)
int imm = size * 2 - scale; int imm = size * 2 - scale;
EmitShiftImm(IsQuad(Rd), 1, imm >> 3, imm & 7, 0x1C, Rd, Rn); EmitShiftImm(IsQuad(Rd), 1, imm >> 3, imm & 7, 0x1C, Rd, Rn);
} }
void ARM64FloatEmitter::SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(false, 0, dest_size >> 4, 0x14, Rd, Rn);
}
void ARM64FloatEmitter::SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(true, 0, dest_size >> 4, 0x14, Rd, Rn);
}
void ARM64FloatEmitter::UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(false, 1, dest_size >> 4, 0x14, Rd, Rn);
}
void ARM64FloatEmitter::UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(true, 1, dest_size >> 4, 0x14, Rd, Rn);
}
void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn) void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
{ {
Emit2RegMisc(IsQuad(Rd), 0, dest_size >> 4, 0x12, Rd, Rn); Emit2RegMisc(false, 0, dest_size >> 4, 0x12, Rd, Rn);
}
void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(true, 0, dest_size >> 4, 0x12, Rd, Rn);
} }
// Move // Move
@ -3341,12 +3360,13 @@ void ARM64FloatEmitter::STP(IndexType index_type, ARM64Reg Rt, ARM64Reg Rt2, ARM
m_emit->EncodeLoadStorePair(0, true, 0, index_type, Rt, Rt2, Rn, imm); m_emit->EncodeLoadStorePair(0, true, 0, index_type, Rt, Rt2, Rn, imm);
} }
// TODO: According to the ABI, we really only need to save the bottom 64 bits of D8-D15.
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers) void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
{ {
for (auto it : registers) for (auto it : registers)
STR(128, INDEX_PRE, (ARM64Reg)(Q0 + it), SP, -16); STR(128, INDEX_PRE, (ARM64Reg)(Q0 + it), SP, -16);
} }
void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask) void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask)
{ {
for (int i = 31; i >= 0; --i) for (int i = 31; i >= 0; --i)

View file

@ -89,6 +89,10 @@ enum ARM64Reg
INVALID_REG = 0xFFFFFFFF INVALID_REG = 0xFFFFFFFF
}; };
// R19-R28, R29 (FP), R30 (LR). FP seems questionable?
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
const u32 ALL_CALLEE_SAVED_FP = 0x0000FF00; // d8-d15
inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; } inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }
inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; } inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; } inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
@ -827,6 +831,11 @@ public:
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale); void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale); void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn); void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
// Move // Move
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn); void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);

View file

@ -95,8 +95,7 @@ using namespace Arm64JitConstants;
void Arm64Jit::GenerateFixedCode() { void Arm64Jit::GenerateFixedCode() {
enterCode = AlignCode16(); enterCode = AlignCode16();
const u32 ALL_CALLEE_SAVED = 0x7FF80000; BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
BitSet32 regs_to_save(ALL_CALLEE_SAVED);
enterCode = GetCodePtr(); enterCode = GetCodePtr();
ABI_PushRegisters(regs_to_save); ABI_PushRegisters(regs_to_save);

View file

@ -330,11 +330,9 @@ namespace MIPSComp
} }
} else { } else {
_dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?"); _dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?");
_dbg_assert_msg_(JIT, g_Config.bFastMemory, "Slow mem doesn't work yet in ARM64! Turn on Fast Memory in system settings");
load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs); load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);
if (!g_Config.bFastMemory && rs != MIPS_REG_SP) { if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {
// TODO: This doesn't work!
SetCCAndSCRATCH1ForSafeAddress(rs, offset, SCRATCH2); SetCCAndSCRATCH1ForSafeAddress(rs, offset, SCRATCH2);
doCheck = true; doCheck = true;
} else { } else {

View file

@ -24,7 +24,7 @@
#include "GPU/GPUState.h" #include "GPU/GPUState.h"
#include "GPU/Common/VertexDecoderCommon.h" #include "GPU/Common/VertexDecoderCommon.h"
static float MEMORY_ALIGNED16(bones[16 * 8]); // First two are kept in registers static float MEMORY_ALIGNED16(bones[16 * 8]); // First four are kept in registers
static float MEMORY_ALIGNED16(boneMask[4]) = {1.0f, 1.0f, 1.0f, 0.0f}; static float MEMORY_ALIGNED16(boneMask[4]) = {1.0f, 1.0f, 1.0f, 0.0f};
static const float by128 = 1.0f / 128.0f; static const float by128 = 1.0f / 128.0f;
@ -135,8 +135,6 @@ static const JitLookup jitLookup[] = {
JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) { JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
dec_ = &dec; dec_ = &dec;
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
BitSet32 regs_to_save(ALL_CALLEE_SAVED);
const u8 *start = AlignCode16(); const u8 *start = AlignCode16();
@ -145,7 +143,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
bool prescaleStep = false; bool prescaleStep = false;
bool skinning = false; bool skinning = false;
bool log = false; bool log = true;
// Look for prescaled texcoord steps // Look for prescaled texcoord steps
for (int i = 0; i < dec.numSteps_; i++) { for (int i = 0; i < dec.numSteps_; i++) {
@ -163,7 +161,10 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
// if (skinning) log = true; // if (skinning) log = true;
BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
BitSet32 regs_to_save_fp(Arm64Gen::ALL_CALLEE_SAVED_FP);
ABI_PushRegisters(regs_to_save); ABI_PushRegisters(regs_to_save);
fp.ABI_PushRegisters(regs_to_save_fp);
// Keep the scale/offset in a few fp registers if we need it. // Keep the scale/offset in a few fp registers if we need it.
if (prescaleStep) { if (prescaleStep) {
@ -245,6 +246,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
SetJumpTarget(skip); SetJumpTarget(skip);
} }
fp.ABI_PopRegisters(regs_to_save_fp);
ABI_PopRegisters(regs_to_save); ABI_PopRegisters(regs_to_save);
RET(); RET();

View file

@ -37,8 +37,7 @@ void TestCode::Generate()
{ {
testCodePtr = this->GetCodePtr(); testCodePtr = this->GetCodePtr();
const u32 ALL_CALLEE_SAVED = 0x7FF80000; BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
BitSet32 regs_to_save(ALL_CALLEE_SAVED);
const u8 *start = AlignCode16(); const u8 *start = AlignCode16();