instead of using a literal constant, so the hook works for other games using the same function but a different offset (e.g. Zero no Kiseki). Also remove the memcpy() since the original function will do it anyway (and may do it differently if other games call the function with different arguments).
783 lines
24 KiB
C++
783 lines
24 KiB
C++
// Copyright (c) 2013- PPSSPP Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0 or later versions.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official git repository and contact information can be found at
|
|
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
|
|
|
#include <algorithm>
|
|
#include <map>
|
|
|
|
#include "base/basictypes.h"
|
|
#include "base/logging.h"
|
|
#include "Core/Config.h"
|
|
#include "Core/Debugger/Breakpoints.h"
|
|
#include "Core/MemMap.h"
|
|
#include "Core/MIPS/JitCommon/JitCommon.h"
|
|
#include "Core/MIPS/MIPSCodeUtils.h"
|
|
#include "Core/MIPS/MIPSAnalyst.h"
|
|
#include "Core/HLE/ReplaceTables.h"
|
|
#include "Core/HLE/FunctionWrappers.h"
|
|
|
|
#include "GPU/Math3D.h"
|
|
#include "GPU/GPUInterface.h"
|
|
|
|
#if defined(_M_IX86) || defined(_M_X64)
|
|
#include <emmintrin.h>
|
|
#endif
|
|
|
|
// I think these have to be pretty accurate as these are libc replacements,
|
|
// but we can probably get away with approximating the VFPU vsin/vcos and vrot
|
|
// pretty roughly.
|
|
static int Replace_sinf() {
|
|
float f = PARAMF(0);
|
|
RETURNF(sinf(f));
|
|
return 80; // guess number of cycles
|
|
}
|
|
|
|
static int Replace_cosf() {
|
|
float f = PARAMF(0);
|
|
RETURNF(cosf(f));
|
|
return 80; // guess number of cycles
|
|
}
|
|
|
|
static int Replace_tanf() {
|
|
float f = PARAMF(0);
|
|
RETURNF(tanf(f));
|
|
return 80; // guess number of cycles
|
|
}
|
|
|
|
static int Replace_acosf() {
|
|
float f = PARAMF(0);
|
|
RETURNF(acosf(f));
|
|
return 80; // guess number of cycles
|
|
}
|
|
|
|
static int Replace_asinf() {
|
|
float f = PARAMF(0);
|
|
RETURNF(asinf(f));
|
|
return 80; // guess number of cycles
|
|
}
|
|
|
|
static int Replace_atanf() {
|
|
float f = PARAMF(0);
|
|
RETURNF(atanf(f));
|
|
return 80; // guess number of cycles
|
|
}
|
|
|
|
static int Replace_sqrtf() {
|
|
float f = PARAMF(0);
|
|
RETURNF(sqrtf(f));
|
|
return 80; // guess number of cycles
|
|
}
|
|
|
|
static int Replace_atan2f() {
|
|
float f1 = PARAMF(0);
|
|
float f2 = PARAMF(1);
|
|
RETURNF(atan2f(f1, f2));
|
|
return 120; // guess number of cycles
|
|
}
|
|
|
|
static int Replace_floorf() {
|
|
float f1 = PARAMF(0);
|
|
RETURNF(floorf(f1));
|
|
return 30; // guess number of cycles
|
|
}
|
|
|
|
static int Replace_ceilf() {
|
|
float f1 = PARAMF(0);
|
|
RETURNF(ceilf(f1));
|
|
return 30; // guess number of cycles
|
|
}
|
|
|
|
// Should probably do JIT versions of this, possibly ones that only delegate
|
|
// large copies to a C function.
|
|
static int Replace_memcpy() {
|
|
u32 destPtr = PARAM(0);
|
|
u32 srcPtr = PARAM(1);
|
|
u32 bytes = PARAM(2);
|
|
bool skip = false;
|
|
|
|
// Some games use memcpy on executable code. We need to flush emuhack ops.
|
|
currentMIPS->InvalidateICache(srcPtr, bytes);
|
|
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
|
|
skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes);
|
|
}
|
|
if (!skip && bytes != 0) {
|
|
u8 *dst = Memory::GetPointer(destPtr);
|
|
const u8 *src = Memory::GetPointer(srcPtr);
|
|
|
|
if (!dst || !src) {
|
|
// Already logged.
|
|
} else if (std::min(destPtr, srcPtr) + bytes > std::max(destPtr, srcPtr)) {
|
|
// Overlap. Star Ocean breaks if it's not handled in 16 bytes blocks.
|
|
const u32 blocks = bytes & ~0x0f;
|
|
for (u32 offset = 0; offset < blocks; offset += 0x10) {
|
|
memcpy(dst + offset, src + offset, 0x10);
|
|
}
|
|
for (u32 offset = blocks; offset < bytes; ++offset) {
|
|
dst[offset] = src[offset];
|
|
}
|
|
} else {
|
|
memmove(dst, src, bytes);
|
|
}
|
|
}
|
|
RETURN(destPtr);
|
|
#ifndef MOBILE_DEVICE
|
|
CBreakPoints::ExecMemCheck(srcPtr, false, bytes, currentMIPS->pc);
|
|
CBreakPoints::ExecMemCheck(destPtr, true, bytes, currentMIPS->pc);
|
|
#endif
|
|
return 10 + bytes / 4; // approximation
|
|
}
|
|
|
|
static int Replace_memcpy16() {
|
|
u32 destPtr = PARAM(0);
|
|
u32 srcPtr = PARAM(1);
|
|
u32 bytes = PARAM(2) * 16;
|
|
bool skip = false;
|
|
|
|
// Some games use memcpy on executable code. We need to flush emuhack ops.
|
|
currentMIPS->InvalidateICache(srcPtr, bytes);
|
|
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
|
|
skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes);
|
|
}
|
|
if (!skip && bytes != 0) {
|
|
u8 *dst = Memory::GetPointer(destPtr);
|
|
const u8 *src = Memory::GetPointer(srcPtr);
|
|
if (dst && src) {
|
|
memmove(dst, src, bytes);
|
|
}
|
|
}
|
|
RETURN(destPtr);
|
|
#ifndef MOBILE_DEVICE
|
|
CBreakPoints::ExecMemCheck(srcPtr, false, bytes, currentMIPS->pc);
|
|
CBreakPoints::ExecMemCheck(destPtr, true, bytes, currentMIPS->pc);
|
|
#endif
|
|
return 10 + bytes / 4; // approximation
|
|
}
|
|
|
|
static int Replace_memcpy_swizzled() {
|
|
u32 destPtr = PARAM(0);
|
|
u32 srcPtr = PARAM(1);
|
|
u32 pitch = PARAM(2);
|
|
u32 h = PARAM(4);
|
|
if (Memory::IsVRAMAddress(srcPtr)) {
|
|
gpu->PerformMemoryDownload(srcPtr, pitch * h);
|
|
}
|
|
u8 *dstp = Memory::GetPointer(destPtr);
|
|
const u8 *srcp = Memory::GetPointer(srcPtr);
|
|
|
|
if (dstp && srcp) {
|
|
const u8 *ysrcp = srcp;
|
|
for (u32 y = 0; y < h; y += 8) {
|
|
const u8 *xsrcp = ysrcp;
|
|
for (u32 x = 0; x < pitch; x += 16) {
|
|
const u8 *src = xsrcp;
|
|
for (int n = 0; n < 8; ++n) {
|
|
memcpy(dstp, src, 16);
|
|
src += pitch;
|
|
dstp += 16;
|
|
}
|
|
xsrcp += 16;
|
|
}
|
|
ysrcp += 8 * pitch;
|
|
}
|
|
}
|
|
|
|
RETURN(0);
|
|
#ifndef MOBILE_DEVICE
|
|
CBreakPoints::ExecMemCheck(srcPtr, false, pitch * h, currentMIPS->pc);
|
|
CBreakPoints::ExecMemCheck(destPtr, true, pitch * h, currentMIPS->pc);
|
|
#endif
|
|
return 10 + (pitch * h) / 4; // approximation
|
|
}
|
|
|
|
static int Replace_memmove() {
|
|
u32 destPtr = PARAM(0);
|
|
u32 srcPtr = PARAM(1);
|
|
u32 bytes = PARAM(2);
|
|
bool skip = false;
|
|
|
|
// Some games use memcpy on executable code. We need to flush emuhack ops.
|
|
currentMIPS->InvalidateICache(srcPtr, bytes);
|
|
if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) {
|
|
skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes);
|
|
}
|
|
if (!skip && bytes != 0) {
|
|
u8 *dst = Memory::GetPointer(destPtr);
|
|
const u8 *src = Memory::GetPointer(srcPtr);
|
|
if (dst && src) {
|
|
memmove(dst, src, bytes);
|
|
}
|
|
}
|
|
RETURN(destPtr);
|
|
#ifndef MOBILE_DEVICE
|
|
CBreakPoints::ExecMemCheck(srcPtr, false, bytes, currentMIPS->pc);
|
|
CBreakPoints::ExecMemCheck(destPtr, true, bytes, currentMIPS->pc);
|
|
#endif
|
|
return 10 + bytes / 4; // approximation
|
|
}
|
|
|
|
static int Replace_memset() {
|
|
u32 destPtr = PARAM(0);
|
|
u8 value = PARAM(1);
|
|
u32 bytes = PARAM(2);
|
|
bool skip = false;
|
|
if (Memory::IsVRAMAddress(destPtr)) {
|
|
skip = gpu->PerformMemorySet(destPtr, value, bytes);
|
|
}
|
|
if (!skip && bytes != 0) {
|
|
u8 *dst = Memory::GetPointer(destPtr);
|
|
if (dst) {
|
|
memset(dst, value, bytes);
|
|
}
|
|
}
|
|
RETURN(destPtr);
|
|
#ifndef MOBILE_DEVICE
|
|
CBreakPoints::ExecMemCheck(destPtr, true, bytes, currentMIPS->pc);
|
|
#endif
|
|
return 10 + bytes / 4; // approximation
|
|
}
|
|
|
|
static int Replace_strlen() {
|
|
u32 srcPtr = PARAM(0);
|
|
const char *src = (const char *)Memory::GetPointer(srcPtr);
|
|
u32 len = src ? (u32)strlen(src) : 0UL;
|
|
RETURN(len);
|
|
return 7 + len * 4; // approximation
|
|
}
|
|
|
|
static int Replace_strcpy() {
|
|
u32 destPtr = PARAM(0);
|
|
char *dst = (char *)Memory::GetPointer(destPtr);
|
|
const char *src = (const char *)Memory::GetPointer(PARAM(1));
|
|
if (dst && src) {
|
|
strcpy(dst, src);
|
|
}
|
|
RETURN(destPtr);
|
|
return 10; // approximation
|
|
}
|
|
|
|
static int Replace_strncpy() {
|
|
u32 destPtr = PARAM(0);
|
|
char *dst = (char *)Memory::GetPointer(destPtr);
|
|
const char *src = (const char *)Memory::GetPointer(PARAM(1));
|
|
u32 bytes = PARAM(2);
|
|
if (dst && src && bytes != 0) {
|
|
strncpy(dst, src, bytes);
|
|
}
|
|
RETURN(destPtr);
|
|
return 10; // approximation
|
|
}
|
|
|
|
static int Replace_strcmp() {
|
|
const char *a = (const char *)Memory::GetPointer(PARAM(0));
|
|
const char *b = (const char *)Memory::GetPointer(PARAM(1));
|
|
if (a && b) {
|
|
RETURN(strcmp(a, b));
|
|
} else {
|
|
RETURN(0);
|
|
}
|
|
return 10; // approximation
|
|
}
|
|
|
|
static int Replace_strncmp() {
|
|
const char *a = (const char *)Memory::GetPointer(PARAM(0));
|
|
const char *b = (const char *)Memory::GetPointer(PARAM(1));
|
|
u32 bytes = PARAM(2);
|
|
if (a && b && bytes != 0) {
|
|
RETURN(strncmp(a, b, bytes));
|
|
} else {
|
|
RETURN(0);
|
|
}
|
|
return 10 + bytes / 4; // approximation
|
|
}
|
|
|
|
static int Replace_fabsf() {
|
|
RETURNF(fabsf(PARAMF(0)));
|
|
return 4;
|
|
}
|
|
|
|
static int Replace_vmmul_q_transp() {
|
|
float *out = (float *)Memory::GetPointer(PARAM(0));
|
|
const float *a = (const float *)Memory::GetPointer(PARAM(1));
|
|
const float *b = (const float *)Memory::GetPointer(PARAM(2));
|
|
|
|
// TODO: Actually use an optimized matrix multiply here...
|
|
if (out && b && a) {
|
|
Matrix4ByMatrix4(out, b, a);
|
|
}
|
|
return 16;
|
|
}
|
|
|
|
// a0 = pointer to destination address
|
|
// a1 = matrix
|
|
// a2 = source address
|
|
static int Replace_gta_dl_write_matrix() {
|
|
u32 *ptr = (u32 *)Memory::GetPointer(PARAM(0));
|
|
u32 *dest = (u32_le *)Memory::GetPointer(ptr[0]);
|
|
u32 *src = (u32_le *)Memory::GetPointer(PARAM(2));
|
|
u32 matrix = PARAM(1) << 24;
|
|
|
|
if (ptr && src && dest) {
|
|
#if defined(_M_IX86) || defined(_M_X64)
|
|
__m128i topBytes = _mm_set1_epi32(matrix);
|
|
__m128i m0 = _mm_loadu_si128((const __m128i *)src);
|
|
__m128i m1 = _mm_loadu_si128((const __m128i *)(src + 4));
|
|
__m128i m2 = _mm_loadu_si128((const __m128i *)(src + 8));
|
|
__m128i m3 = _mm_loadu_si128((const __m128i *)(src + 12));
|
|
m0 = _mm_or_si128(_mm_srli_epi32(m0, 8), topBytes);
|
|
m1 = _mm_or_si128(_mm_srli_epi32(m1, 8), topBytes);
|
|
m2 = _mm_or_si128(_mm_srli_epi32(m2, 8), topBytes);
|
|
m3 = _mm_or_si128(_mm_srli_epi32(m3, 8), topBytes);
|
|
// These three stores overlap by a word, due to the offsets.
|
|
_mm_storeu_si128((__m128i *)dest, m0);
|
|
_mm_storeu_si128((__m128i *)(dest + 3), m1);
|
|
_mm_storeu_si128((__m128i *)(dest + 6), m2);
|
|
// Store the last one in parts to not overwrite forwards (probably mostly risk free though)
|
|
_mm_storel_epi64((__m128i *)(dest + 9), m3);
|
|
m3 = _mm_srli_si128(m3, 8);
|
|
_mm_store_ss((float *)(dest + 11), _mm_castsi128_ps(m3));
|
|
#else
|
|
// Bit tricky to SIMD (note the offsets) but should be doable if not perfect
|
|
dest[0] = matrix | (src[0] >> 8);
|
|
dest[1] = matrix | (src[1] >> 8);
|
|
dest[2] = matrix | (src[2] >> 8);
|
|
dest[3] = matrix | (src[4] >> 8);
|
|
dest[4] = matrix | (src[5] >> 8);
|
|
dest[5] = matrix | (src[6] >> 8);
|
|
dest[6] = matrix | (src[8] >> 8);
|
|
dest[7] = matrix | (src[9] >> 8);
|
|
dest[8] = matrix | (src[10] >> 8);
|
|
dest[9] = matrix | (src[12] >> 8);
|
|
dest[10] = matrix | (src[13] >> 8);
|
|
dest[11] = matrix | (src[14] >> 8);
|
|
#endif
|
|
|
|
(*ptr) += 0x30;
|
|
}
|
|
|
|
RETURN(0);
|
|
return 38;
|
|
}
|
|
|
|
|
|
// TODO: Inline into a few NEON or SSE instructions - especially if a1 is a known immediate!
|
|
// Anyway, not sure if worth it. There's not that many matrices written per frame normally.
|
|
static int Replace_dl_write_matrix() {
|
|
u32 *dlStruct = (u32 *)Memory::GetPointer(PARAM(0));
|
|
u32 *dest = (u32 *)Memory::GetPointer(dlStruct[2]);
|
|
u32 *src = (u32 *)Memory::GetPointer(PARAM(2));
|
|
|
|
if (!dlStruct || !dest || !src) {
|
|
RETURN(0);
|
|
return 60;
|
|
}
|
|
|
|
u32 matrix;
|
|
int count = 12;
|
|
switch (PARAM(1)) {
|
|
case 3:
|
|
matrix = 0x40000000; // tex mtx
|
|
break;
|
|
case 2:
|
|
matrix = 0x3A000000;
|
|
break;
|
|
case 1:
|
|
matrix = 0x3C000000;
|
|
break;
|
|
case 0:
|
|
matrix = 0x3E000000;
|
|
count = 16;
|
|
break;
|
|
}
|
|
|
|
*dest++ = matrix;
|
|
matrix += 0x01000000;
|
|
|
|
if (count == 16) {
|
|
// Ultra SIMD friendly! These intrinsics generate pretty much perfect code,
|
|
// no point in hand rolling.
|
|
#if defined(_M_IX86) || defined(_M_X64)
|
|
__m128i topBytes = _mm_set1_epi32(matrix);
|
|
__m128i m0 = _mm_loadu_si128((const __m128i *)src);
|
|
__m128i m1 = _mm_loadu_si128((const __m128i *)(src + 4));
|
|
__m128i m2 = _mm_loadu_si128((const __m128i *)(src + 8));
|
|
__m128i m3 = _mm_loadu_si128((const __m128i *)(src + 12));
|
|
m0 = _mm_or_si128(_mm_srli_epi32(m0, 8), topBytes);
|
|
m1 = _mm_or_si128(_mm_srli_epi32(m1, 8), topBytes);
|
|
m2 = _mm_or_si128(_mm_srli_epi32(m2, 8), topBytes);
|
|
m3 = _mm_or_si128(_mm_srli_epi32(m3, 8), topBytes);
|
|
_mm_storeu_si128((__m128i *)dest, m0);
|
|
_mm_storeu_si128((__m128i *)(dest + 4), m1);
|
|
_mm_storeu_si128((__m128i *)(dest + 8), m2);
|
|
_mm_storeu_si128((__m128i *)(dest + 12), m3);
|
|
#else
|
|
#if 0
|
|
//TODO: Finish NEON, make conditional somehow
|
|
uint32x4_t topBytes = vdupq_n_u32(matrix);
|
|
uint32x4_t m0 = vld1q_u32(dataPtr);
|
|
uint32x4_t m1 = vld1q_u32(dataPtr + 4);
|
|
uint32x4_t m2 = vld1q_u32(dataPtr + 8);
|
|
uint32x4_t m3 = vld1q_u32(dataPtr + 12);
|
|
m0 = vorr_u32(vsri_n_u32(m0, 8), topBytes); // TODO: look into VSRI
|
|
m1 = vorr_u32(vshr_n_u32(m1, 8), topBytes);
|
|
m2 = vorr_u32(vshr_n_u32(m2, 8), topBytes);
|
|
m3 = vorr_u32(vshr_n_u32(m3, 8), topBytes);
|
|
vst1q_u32(dlPtr, m0);
|
|
vst1q_u32(dlPtr + 4, m1);
|
|
vst1q_u32(dlPtr + 8, m2);
|
|
vst1q_u32(dlPtr + 12, m3);
|
|
#endif
|
|
for (int i = 0; i < count; i++) {
|
|
dest[i] = matrix | (src[i] >> 8);
|
|
}
|
|
#endif
|
|
} else {
|
|
#if defined(_M_IX86) || defined(_M_X64)
|
|
__m128i topBytes = _mm_set1_epi32(matrix);
|
|
__m128i m0 = _mm_loadu_si128((const __m128i *)src);
|
|
__m128i m1 = _mm_loadu_si128((const __m128i *)(src + 4));
|
|
__m128i m2 = _mm_loadu_si128((const __m128i *)(src + 8));
|
|
__m128i m3 = _mm_loadu_si128((const __m128i *)(src + 12));
|
|
m0 = _mm_or_si128(_mm_srli_epi32(m0, 8), topBytes);
|
|
m1 = _mm_or_si128(_mm_srli_epi32(m1, 8), topBytes);
|
|
m2 = _mm_or_si128(_mm_srli_epi32(m2, 8), topBytes);
|
|
m3 = _mm_or_si128(_mm_srli_epi32(m3, 8), topBytes);
|
|
// These three stores overlap by a word, due to the offsets.
|
|
_mm_storeu_si128((__m128i *)dest, m0);
|
|
_mm_storeu_si128((__m128i *)(dest + 3), m1);
|
|
_mm_storeu_si128((__m128i *)(dest + 6), m2);
|
|
// Store the last one in parts to not overwrite forwards (probably mostly risk free though)
|
|
_mm_storel_epi64((__m128i *)(dest + 9), m3);
|
|
m3 = _mm_srli_si128(m3, 8);
|
|
_mm_store_ss((float *)(dest + 11), _mm_castsi128_ps(m3));
|
|
#else
|
|
// Bit tricky to SIMD (note the offsets) but should be doable if not perfect
|
|
dest[0] = matrix | (src[0] >> 8);
|
|
dest[1] = matrix | (src[1] >> 8);
|
|
dest[2] = matrix | (src[2] >> 8);
|
|
dest[3] = matrix | (src[4] >> 8);
|
|
dest[4] = matrix | (src[5] >> 8);
|
|
dest[5] = matrix | (src[6] >> 8);
|
|
dest[6] = matrix | (src[8] >> 8);
|
|
dest[7] = matrix | (src[9] >> 8);
|
|
dest[8] = matrix | (src[10] >> 8);
|
|
dest[9] = matrix | (src[12] >> 8);
|
|
dest[10] = matrix | (src[13] >> 8);
|
|
dest[11] = matrix | (src[14] >> 8);
|
|
#endif
|
|
}
|
|
|
|
#ifndef MOBILE_DEVICE
|
|
CBreakPoints::ExecMemCheck(PARAM(2), false, count * sizeof(float), currentMIPS->pc);
|
|
CBreakPoints::ExecMemCheck(PARAM(0) + 2 * sizeof(u32), true, sizeof(u32), currentMIPS->pc);
|
|
CBreakPoints::ExecMemCheck(dlStruct[2], true, (count + 1) * sizeof(u32), currentMIPS->pc);
|
|
#endif
|
|
|
|
dlStruct[2] += (1 + count) * 4;
|
|
RETURN(dlStruct[2]);
|
|
return 60;
|
|
}
|
|
|
|
static bool GetMIPSStaticAddress(u32 &addr, s32 lui_offset, s32 lw_offset) {
|
|
const MIPSOpcode upper = Memory::Read_Instruction(currentMIPS->pc + lui_offset, true);
|
|
if (upper != MIPS_MAKE_LUI(MIPS_GET_RT(upper), upper & 0xffff)) {
|
|
return false;
|
|
}
|
|
const MIPSOpcode lower = Memory::Read_Instruction(currentMIPS->pc + lw_offset, true);
|
|
if (lower != MIPS_MAKE_LW(MIPS_GET_RT(lower), MIPS_GET_RS(lower), lower & 0xffff)) {
|
|
return false;
|
|
}
|
|
addr = ((upper & 0xffff) << 16) + (s16)(lower & 0xffff);
|
|
return true;
|
|
}
|
|
|
|
static int Hook_godseaterburst_blit_texture() {
|
|
u32 texaddr;
|
|
// Only if there's no texture.
|
|
if (!GetMIPSStaticAddress(texaddr, 0x000c, 0x0030)) {
|
|
return 0;
|
|
}
|
|
u32 fb_infoaddr;
|
|
if (Memory::Read_U32(texaddr) != 0 || !GetMIPSStaticAddress(fb_infoaddr, 0x01d0, 0x01d4)) {
|
|
return 0;
|
|
}
|
|
|
|
const u32 fb_info = Memory::Read_U32(fb_infoaddr);
|
|
const u32 fb_address = Memory::Read_U32(fb_info);
|
|
if (Memory::IsVRAMAddress(fb_address)) {
|
|
gpu->PerformMemoryDownload(fb_address, 0x00044000);
|
|
CBreakPoints::ExecMemCheck(fb_address, true, 0x00044000, currentMIPS->pc);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int Hook_hexyzforce_monoclome_thread() {
|
|
u32 fb_info;
|
|
if (!GetMIPSStaticAddress(fb_info, -4, 0)) {
|
|
return 0;
|
|
}
|
|
|
|
const u32 fb_address = Memory::Read_U32(fb_info);
|
|
if (Memory::IsVRAMAddress(fb_address)) {
|
|
gpu->PerformMemoryDownload(fb_address, 0x00088000);
|
|
CBreakPoints::ExecMemCheck(fb_address, true, 0x00088000, currentMIPS->pc);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int Hook_starocean_write_stencil() {
|
|
u32 fb_address = currentMIPS->r[MIPS_REG_T7];
|
|
if (Memory::IsVRAMAddress(fb_address) && !g_Config.bDisableStencilTest) {
|
|
gpu->PerformStencilUpload(fb_address, 0x00088000);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int Hook_topx_create_saveicon() {
|
|
u32 fb_address = currentMIPS->r[MIPS_REG_V0];
|
|
if (Memory::IsVRAMAddress(fb_address)) {
|
|
gpu->PerformMemoryDownload(fb_address, 0x00044000);
|
|
CBreakPoints::ExecMemCheck(fb_address, true, 0x00044000, currentMIPS->pc);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int Hook_ff1_battle_effect() {
|
|
u32 fb_address = currentMIPS->r[MIPS_REG_A1];
|
|
if (Memory::IsVRAMAddress(fb_address)) {
|
|
gpu->PerformMemoryDownload(fb_address, 0x00088000);
|
|
CBreakPoints::ExecMemCheck(fb_address, true, 0x00088000, currentMIPS->pc);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int Hook_dissidia_recordframe_avi() {
|
|
// This is called once per frame, and records that frame's data to avi.
|
|
u32 fb_address = currentMIPS->r[MIPS_REG_A1];
|
|
if (Memory::IsVRAMAddress(fb_address)) {
|
|
gpu->PerformMemoryDownload(fb_address, 0x00044000);
|
|
CBreakPoints::ExecMemCheck(fb_address, true, 0x00044000, currentMIPS->pc);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static int Hook_brandish_download_frame() {
|
|
u32 fb_infoaddr;
|
|
if (!GetMIPSStaticAddress(fb_infoaddr, 0x2c, 0x30)) {
|
|
return 0;
|
|
}
|
|
const u32 fb_info = Memory::Read_U32(fb_infoaddr);
|
|
const MIPSOpcode fb_index_load = Memory::Read_Instruction(currentMIPS->pc + 0x38, true);
|
|
if (fb_index_load != MIPS_MAKE_LW(MIPS_GET_RT(fb_index_load), MIPS_GET_RS(fb_index_load), fb_index_load & 0xffff)) {
|
|
return 0;
|
|
}
|
|
const u32 fb_index_offset = fb_index_load & 0xffff;
|
|
const u32 fb_index = (Memory::Read_U32(fb_info + fb_index_offset) + 1) & 1;
|
|
const u32 fb_address = 0x4000000 + (0x44000 * fb_index);
|
|
const u32 dest_address = currentMIPS->r[MIPS_REG_A1];
|
|
if (Memory::IsRAMAddress(dest_address)) {
|
|
gpu->PerformMemoryDownload(fb_address, 0x00044000);
|
|
CBreakPoints::ExecMemCheck(fb_address, true, 0x00044000, currentMIPS->pc);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Can either replace with C functions or functions emitted in Asm/ArmAsm.
|
|
static const ReplacementTableEntry entries[] = {
|
|
// TODO: I think some games can be helped quite a bit by implementing the
|
|
// double-precision soft-float routines: __adddf3, __subdf3 and so on. These
|
|
// should of course be implemented JIT style, inline.
|
|
|
|
/* These two collide (same hash) and thus can't be replaced :/
|
|
{ "asinf", &Replace_asinf, 0, REPFLAG_DISABLED },
|
|
{ "acosf", &Replace_acosf, 0, REPFLAG_DISABLED },
|
|
*/
|
|
|
|
{ "sinf", &Replace_sinf, 0, REPFLAG_DISABLED },
|
|
{ "cosf", &Replace_cosf, 0, REPFLAG_DISABLED },
|
|
{ "tanf", &Replace_tanf, 0, REPFLAG_DISABLED },
|
|
|
|
{ "atanf", &Replace_atanf, 0, REPFLAG_DISABLED },
|
|
{ "sqrtf", &Replace_sqrtf, 0, REPFLAG_DISABLED },
|
|
{ "atan2f", &Replace_atan2f, 0, REPFLAG_DISABLED },
|
|
{ "floorf", &Replace_floorf, 0, REPFLAG_DISABLED },
|
|
{ "ceilf", &Replace_ceilf, 0, REPFLAG_DISABLED },
|
|
{ "memcpy", &Replace_memcpy, 0, 0 },
|
|
{ "memcpy16", &Replace_memcpy16, 0, 0 },
|
|
{ "memcpy_swizzled", &Replace_memcpy_swizzled, 0, 0 },
|
|
{ "memmove", &Replace_memmove, 0, 0 },
|
|
{ "memset", &Replace_memset, 0, 0 },
|
|
{ "strlen", &Replace_strlen, 0, REPFLAG_DISABLED },
|
|
{ "strcpy", &Replace_strcpy, 0, REPFLAG_DISABLED },
|
|
{ "strncpy", &Replace_strncpy, 0, REPFLAG_DISABLED },
|
|
{ "strcmp", &Replace_strcmp, 0, REPFLAG_DISABLED },
|
|
{ "strncmp", &Replace_strncmp, 0, REPFLAG_DISABLED },
|
|
{ "fabsf", &Replace_fabsf, &MIPSComp::Jit::Replace_fabsf, REPFLAG_ALLOWINLINE | REPFLAG_DISABLED },
|
|
{ "dl_write_matrix", &Replace_dl_write_matrix, 0, REPFLAG_DISABLED }, // &MIPSComp::Jit::Replace_dl_write_matrix, REPFLAG_DISABLED },
|
|
{ "dl_write_matrix_2", &Replace_dl_write_matrix, 0, REPFLAG_DISABLED },
|
|
{ "gta_dl_write_matrix", &Replace_gta_dl_write_matrix, 0, REPFLAG_DISABLED },
|
|
// dl_write_matrix_3 doesn't take the dl as a parameter, it accesses a global instead. Need to extract the address of the global from the code when replacing...
|
|
// Haven't investigated write_matrix_4 and 5 but I think they are similar to 1 and 2.
|
|
|
|
// { "vmmul_q_transp", &Replace_vmmul_q_transp, 0, REPFLAG_DISABLED },
|
|
|
|
{ "godseaterburst_blit_texture", &Hook_godseaterburst_blit_texture, 0, REPFLAG_HOOKENTER},
|
|
{ "hexyzforce_monoclome_thread", &Hook_hexyzforce_monoclome_thread, 0, REPFLAG_HOOKENTER, 0x58},
|
|
{ "starocean_write_stencil", &Hook_starocean_write_stencil, 0, REPFLAG_HOOKENTER, 0x260},
|
|
{ "topx_create_saveicon", &Hook_topx_create_saveicon, 0, REPFLAG_HOOKENTER, 0x34},
|
|
{ "ff1_battle_effect", &Hook_ff1_battle_effect, 0, REPFLAG_HOOKENTER},
|
|
// This is actually used in other games, not just Dissidia.
|
|
{ "dissidia_recordframe_avi", &Hook_dissidia_recordframe_avi, 0, REPFLAG_HOOKENTER},
|
|
{ "brandish_download_frame", &Hook_brandish_download_frame, 0, REPFLAG_HOOKENTER},
|
|
{}
|
|
};
|
|
|
|
|
|
static std::map<u32, u32> replacedInstructions;
|
|
static std::map<std::string, int> replacementNameLookup;
|
|
|
|
void Replacement_Init() {
|
|
for (int i = 0; i < (int)ARRAY_SIZE(entries); i++) {
|
|
const auto entry = &entries[i];
|
|
if (!entry->name || (entry->flags & REPFLAG_DISABLED) != 0)
|
|
continue;
|
|
replacementNameLookup[entry->name] = i;
|
|
}
|
|
}
|
|
|
|
void Replacement_Shutdown() {
|
|
replacedInstructions.clear();
|
|
replacementNameLookup.clear();
|
|
}
|
|
|
|
// TODO: Do something on load state?
|
|
|
|
int GetNumReplacementFuncs() {
|
|
return ARRAY_SIZE(entries);
|
|
}
|
|
|
|
int GetReplacementFuncIndex(u64 hash, int funcSize) {
|
|
const char *name = MIPSAnalyst::LookupHash(hash, funcSize);
|
|
if (!name) {
|
|
return -1;
|
|
}
|
|
|
|
auto index = replacementNameLookup.find(name);
|
|
if (index != replacementNameLookup.end()) {
|
|
return index->second;
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
const ReplacementTableEntry *GetReplacementFunc(int i) {
|
|
return &entries[i];
|
|
}
|
|
|
|
static void WriteReplaceInstruction(u32 address, int index) {
|
|
const u32 prevInstr = Memory::Read_U32(address);
|
|
if (MIPS_IS_REPLACEMENT(prevInstr)) {
|
|
return;
|
|
}
|
|
if (MIPS_IS_RUNBLOCK(prevInstr)) {
|
|
// Likely already both replaced and jitted. Ignore.
|
|
return;
|
|
}
|
|
replacedInstructions[address] = prevInstr;
|
|
Memory::Write_U32(MIPS_EMUHACK_CALL_REPLACEMENT | index, address);
|
|
}
|
|
|
|
void WriteReplaceInstructions(u32 address, u64 hash, int size) {
|
|
int index = GetReplacementFuncIndex(hash, size);
|
|
if (index >= 0) {
|
|
auto entry = GetReplacementFunc(index);
|
|
if (entry->flags & REPFLAG_HOOKEXIT) {
|
|
// When hooking func exit, we search for jr ra, and replace those.
|
|
for (u32 offset = 0; offset < (u32)size; offset += 4) {
|
|
const u32 op = Memory::Read_U32(address + offset);
|
|
if (op == MIPS_MAKE_JR_RA()) {
|
|
WriteReplaceInstruction(address + offset, index);
|
|
}
|
|
}
|
|
} else if (entry->flags & REPFLAG_HOOKENTER) {
|
|
WriteReplaceInstruction(address + entry->hookOffset, index);
|
|
} else {
|
|
WriteReplaceInstruction(address, index);
|
|
}
|
|
INFO_LOG(HLE, "Replaced %s at %08x with hash %016llx", entries[index].name, address, hash);
|
|
}
|
|
}
|
|
|
|
void RestoreReplacedInstruction(u32 address) {
|
|
const u32 curInstr = Memory::Read_U32(address);
|
|
if (MIPS_IS_REPLACEMENT(curInstr)) {
|
|
Memory::Write_U32(replacedInstructions[address], address);
|
|
}
|
|
INFO_LOG(HLE, "Restored replaced func at %08x", address);
|
|
replacedInstructions.erase(address);
|
|
}
|
|
|
|
void RestoreReplacedInstructions(u32 startAddr, u32 endAddr) {
|
|
// Need to be in order, or we'll hang.
|
|
if (endAddr < startAddr)
|
|
std::swap(endAddr, startAddr);
|
|
const auto start = replacedInstructions.lower_bound(startAddr);
|
|
const auto end = replacedInstructions.upper_bound(endAddr);
|
|
int restored = 0;
|
|
for (auto it = start; it != end; ++it) {
|
|
const u32 addr = it->first;
|
|
const u32 curInstr = Memory::Read_U32(addr);
|
|
if (MIPS_IS_REPLACEMENT(curInstr)) {
|
|
Memory::Write_U32(it->second, addr);
|
|
++restored;
|
|
}
|
|
}
|
|
INFO_LOG(HLE, "Restored %d replaced funcs between %08x-%08x", restored, startAddr, endAddr);
|
|
replacedInstructions.erase(start, end);
|
|
}
|
|
|
|
std::map<u32, u32> SaveAndClearReplacements() {
|
|
std::map<u32, u32> saved;
|
|
for (auto it = replacedInstructions.begin(), end = replacedInstructions.end(); it != end; ++it) {
|
|
const u32 addr = it->first;
|
|
const u32 curInstr = Memory::Read_U32(addr);
|
|
if (MIPS_IS_REPLACEMENT(curInstr)) {
|
|
saved[addr] = curInstr;
|
|
Memory::Write_U32(it->second, addr);
|
|
}
|
|
}
|
|
return saved;
|
|
}
|
|
|
|
void RestoreSavedReplacements(const std::map<u32, u32> &saved) {
|
|
for (auto it = saved.begin(), end = saved.end(); it != end; ++it) {
|
|
const u32 addr = it->first;
|
|
// Just put the replacements back.
|
|
Memory::Write_U32(it->second, addr);
|
|
}
|
|
}
|
|
|
|
bool GetReplacedOpAt(u32 address, u32 *op) {
|
|
u32 instr = Memory::Read_Opcode_JIT(address).encoding;
|
|
if (MIPS_IS_REPLACEMENT(instr)) {
|
|
auto iter = replacedInstructions.find(address);
|
|
if (iter != replacedInstructions.end()) {
|
|
*op = iter->second;
|
|
return true;
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
return false;
|
|
}
|