ARM64: Fix bug where we didn't save the FP registers correctly in the vertex decoder.

Also port a few ops from dolphin's ARM64 emitter.
This commit is contained in:
Henrik Rydgard 2015-06-14 12:56:44 +02:00
parent 4f3d18fcb7
commit 2c05334d47
6 changed files with 40 additions and 13 deletions

View file

@ -2928,10 +2928,29 @@ void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale)
int imm = size * 2 - scale;
EmitShiftImm(IsQuad(Rd), 1, imm >> 3, imm & 7, 0x1C, Rd, Rn);
}
void ARM64FloatEmitter::SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(false, 0, dest_size >> 4, 0x14, Rd, Rn);
}
void ARM64FloatEmitter::SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(true, 0, dest_size >> 4, 0x14, Rd, Rn);
}
void ARM64FloatEmitter::UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(false, 1, dest_size >> 4, 0x14, Rd, Rn);
}
void ARM64FloatEmitter::UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(true, 1, dest_size >> 4, 0x14, Rd, Rn);
}
void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(IsQuad(Rd), 0, dest_size >> 4, 0x12, Rd, Rn);
Emit2RegMisc(false, 0, dest_size >> 4, 0x12, Rd, Rn);
}
void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
{
Emit2RegMisc(true, 0, dest_size >> 4, 0x12, Rd, Rn);
}
// Move
@ -3341,12 +3360,13 @@ void ARM64FloatEmitter::STP(IndexType index_type, ARM64Reg Rt, ARM64Reg Rt2, ARM
m_emit->EncodeLoadStorePair(0, true, 0, index_type, Rt, Rt2, Rn, imm);
}
// TODO: According to the ABI, we really only need to save the bottom 64 bits of D8-D15.
void ARM64FloatEmitter::ABI_PushRegisters(BitSet32 registers)
{
for (auto it : registers)
STR(128, INDEX_PRE, (ARM64Reg)(Q0 + it), SP, -16);
}
void ARM64FloatEmitter::ABI_PopRegisters(BitSet32 registers, BitSet32 ignore_mask)
{
for (int i = 31; i >= 0; --i)

View file

@ -89,6 +89,10 @@ enum ARM64Reg
INVALID_REG = 0xFFFFFFFF
};
// R19-R28, R29 (FP), R30 (LR). FP seems questionable?
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
const u32 ALL_CALLEE_SAVED_FP = 0x0000FF00; // d8-d15
inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }
inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
@ -827,6 +831,11 @@ public:
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
// Move
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);

View file

@ -95,8 +95,7 @@ using namespace Arm64JitConstants;
void Arm64Jit::GenerateFixedCode() {
enterCode = AlignCode16();
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
BitSet32 regs_to_save(ALL_CALLEE_SAVED);
BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
enterCode = GetCodePtr();
ABI_PushRegisters(regs_to_save);

View file

@ -330,11 +330,9 @@ namespace MIPSComp
}
} else {
_dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?");
_dbg_assert_msg_(JIT, g_Config.bFastMemory, "Slow mem doesn't work yet in ARM64! Turn on Fast Memory in system settings");
load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);
if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {
// TODO: This doesn't work!
SetCCAndSCRATCH1ForSafeAddress(rs, offset, SCRATCH2);
doCheck = true;
} else {

View file

@ -24,7 +24,7 @@
#include "GPU/GPUState.h"
#include "GPU/Common/VertexDecoderCommon.h"
static float MEMORY_ALIGNED16(bones[16 * 8]); // First two are kept in registers
static float MEMORY_ALIGNED16(bones[16 * 8]); // First four are kept in registers
static float MEMORY_ALIGNED16(boneMask[4]) = {1.0f, 1.0f, 1.0f, 0.0f};
static const float by128 = 1.0f / 128.0f;
@ -135,8 +135,6 @@ static const JitLookup jitLookup[] = {
JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
dec_ = &dec;
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
BitSet32 regs_to_save(ALL_CALLEE_SAVED);
const u8 *start = AlignCode16();
@ -145,7 +143,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
bool prescaleStep = false;
bool skinning = false;
bool log = false;
bool log = true;
// Look for prescaled texcoord steps
for (int i = 0; i < dec.numSteps_; i++) {
@ -163,7 +161,10 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
// if (skinning) log = true;
BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
BitSet32 regs_to_save_fp(Arm64Gen::ALL_CALLEE_SAVED_FP);
ABI_PushRegisters(regs_to_save);
fp.ABI_PushRegisters(regs_to_save_fp);
// Keep the scale/offset in a few fp registers if we need it.
if (prescaleStep) {
@ -245,6 +246,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec) {
SetJumpTarget(skip);
}
fp.ABI_PopRegisters(regs_to_save_fp);
ABI_PopRegisters(regs_to_save);
RET();

View file

@ -37,8 +37,7 @@ void TestCode::Generate()
{
testCodePtr = this->GetCodePtr();
const u32 ALL_CALLEE_SAVED = 0x7FF80000;
BitSet32 regs_to_save(ALL_CALLEE_SAVED);
BitSet32 regs_to_save(Arm64Gen::ALL_CALLEE_SAVED);
const u8 *start = AlignCode16();