Merge branch 'master' into Croden1999-patch-lang
This commit is contained in:
commit
fea88b62ec
156 changed files with 3695 additions and 1158 deletions
3
.gitmodules
vendored
3
.gitmodules
vendored
|
@ -50,3 +50,6 @@
|
|||
[submodule "ext/naett"]
|
||||
path = ext/naett
|
||||
url = https://github.com/erkkah/naett.git
|
||||
[submodule "ext/libchdr"]
|
||||
path = ext/libchdr
|
||||
url = https://github.com/rtissera/libchdr.git
|
||||
|
|
|
@ -1343,17 +1343,20 @@ else()
|
|||
SDL/SDLVulkanGraphicsContext.cpp
|
||||
)
|
||||
endif()
|
||||
if(SDL2_ttf_FOUND OR SDL2_ttf_PKGCONFIG_FOUND)
|
||||
if(SDL2_ttf_FOUND OR
|
||||
(SDL2_ttf_PKGCONFIG_FOUND AND
|
||||
SDL2_ttf_PKGCONFIG_VERSION VERSION_GREATER_EQUAL "2.0.18"))
|
||||
add_definitions(-DUSE_SDL2_TTF)
|
||||
if(FONTCONFIG_FOUND)
|
||||
add_definitions(-DUSE_SDL2_TTF_FONTCONFIG)
|
||||
set(nativeExtraLibs ${nativeExtraLibs} Fontconfig::Fontconfig)
|
||||
endif()
|
||||
elseif(SDL2_ttf_PKGCONFIG_FOUND)
|
||||
message(WARNING "Found SDL2_ttf <2.0.18 - this is too old, falling back to atlas")
|
||||
endif()
|
||||
if(SDL2_ttf_FOUND)
|
||||
set(nativeExtraLibs ${nativeExtraLibs} SDL2_ttf::SDL2_ttf)
|
||||
elseif(SDL2_ttf_PKGCONFIG_FOUND)
|
||||
add_definitions(-DUSE_SDL2_TTF_PKGCONFIG)
|
||||
set(nativeExtraLibs ${nativeExtraLibs} PkgConfig::SDL2_ttf_PKGCONFIG)
|
||||
endif()
|
||||
if(APPLE)
|
||||
|
@ -2314,7 +2317,9 @@ else()
|
|||
include_directories(ext/zstd/lib)
|
||||
endif()
|
||||
|
||||
target_link_libraries(${CoreLibName} Common native kirk cityhash sfmt19937 xbrz xxhash rcheevos ${GlslangLibs}
|
||||
include_directories(ext/libchdr/include)
|
||||
|
||||
target_link_libraries(${CoreLibName} Common native chdr kirk cityhash sfmt19937 xbrz xxhash rcheevos ${GlslangLibs}
|
||||
${CoreExtraLibs} ${OPENGL_LIBRARIES} ${X11_LIBRARIES} ${CMAKE_DL_LIBS})
|
||||
|
||||
if(NOT HTTPS_NOT_AVAILABLE)
|
||||
|
|
|
@ -4204,6 +4204,14 @@ void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch, bo
|
|||
if (negate) {
|
||||
FNEG(32, Rd, Rd);
|
||||
}
|
||||
} else if (TryAnyMOVI(32, Rd, ival)) {
|
||||
if (negate) {
|
||||
FNEG(32, Rd, Rd);
|
||||
}
|
||||
} else if (TryAnyMOVI(32, Rd, ival ^ 0x80000000)) {
|
||||
if (!negate) {
|
||||
FNEG(32, Rd, Rd);
|
||||
}
|
||||
} else {
|
||||
_assert_msg_(scratch != INVALID_REG, "Failed to find a way to generate FP immediate %f without scratch", value);
|
||||
if (negate) {
|
||||
|
@ -4214,6 +4222,96 @@ void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch, bo
|
|||
}
|
||||
}
|
||||
|
||||
bool ARM64FloatEmitter::TryMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) {
|
||||
if (size == 8) {
|
||||
// Can always do 8.
|
||||
MOVI(size, Rd, elementValue & 0xFF);
|
||||
return true;
|
||||
} else if (size == 16) {
|
||||
if ((elementValue & 0xFF00) == 0) {
|
||||
MOVI(size, Rd, elementValue & 0xFF, 0);
|
||||
return true;
|
||||
} else if ((elementValue & 0x00FF) == 0) {
|
||||
MOVI(size, Rd, (elementValue >> 8) & 0xFF, 8);
|
||||
return true;
|
||||
} else if ((elementValue & 0xFF00) == 0xFF00) {
|
||||
MVNI(size, Rd, ~elementValue & 0xFF, 0);
|
||||
return true;
|
||||
} else if ((elementValue & 0x00FF) == 0x00FF) {
|
||||
MVNI(size, Rd, (~elementValue >> 8) & 0xFF, 8);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
} else if (size == 32) {
|
||||
for (int shift = 0; shift < 32; shift += 8) {
|
||||
uint32_t mask = 0xFFFFFFFF &~ (0xFF << shift);
|
||||
if ((elementValue & mask) == 0) {
|
||||
MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift);
|
||||
return true;
|
||||
} else if ((elementValue & mask) == mask) {
|
||||
MVNI(size, Rd, (~elementValue >> shift) & 0xFF, shift);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Maybe an MSL shift will work?
|
||||
for (int shift = 8; shift <= 16; shift += 8) {
|
||||
uint32_t mask = 0xFFFFFFFF & ~(0xFF << shift);
|
||||
uint32_t ones = (1 << shift) - 1;
|
||||
uint32_t notOnes = 0xFFFFFF00 << shift;
|
||||
if ((elementValue & mask) == ones) {
|
||||
MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift, true);
|
||||
return true;
|
||||
} else if ((elementValue & mask) == notOnes) {
|
||||
MVNI(size, Rd, (elementValue >> shift) & 0xFF, shift, true);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} else if (size == 64) {
|
||||
uint8_t imm8 = 0;
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
uint8_t byte = (elementValue >> (i * 8)) & 0xFF;
|
||||
if (byte != 0 && byte != 0xFF)
|
||||
return false;
|
||||
|
||||
if (byte == 0xFF)
|
||||
imm8 |= 1 << i;
|
||||
}
|
||||
|
||||
// Didn't run into any partial bytes, so size 64 is doable.
|
||||
MOVI(size, Rd, imm8);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ARM64FloatEmitter::TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) {
|
||||
// Try the original size first in case that's more optimal.
|
||||
if (TryMOVI(size, Rd, elementValue))
|
||||
return true;
|
||||
|
||||
uint64_t value = elementValue;
|
||||
if (size != 64) {
|
||||
uint64_t masked = elementValue & ((1 << size) - 1);
|
||||
for (int i = size; i < 64; ++i) {
|
||||
value |= masked << i;
|
||||
}
|
||||
}
|
||||
|
||||
for (int attempt = 8; attempt <= 64; attempt += attempt) {
|
||||
// Original size was already attempted above.
|
||||
if (attempt != size) {
|
||||
if (TryMOVI(attempt, Rd, value))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
|
||||
u32 val;
|
||||
bool shift;
|
||||
|
|
|
@ -925,6 +925,10 @@ public:
|
|||
void ORR(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0);
|
||||
void BIC(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0);
|
||||
|
||||
bool TryMOVI(u8 size, ARM64Reg Rd, uint64_t value);
|
||||
// Allow using a different size. Unclear if there's a penalty.
|
||||
bool TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t value);
|
||||
|
||||
// One source
|
||||
void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
|
||||
|
||||
|
|
|
@ -1073,6 +1073,9 @@
|
|||
<Filter Include="ext\naett">
|
||||
<UniqueIdentifier>{34f45db9-5c08-49cb-b349-b9e760ce3213}</UniqueIdentifier>
|
||||
</Filter>
|
||||
<Filter Include="ext\libchdr">
|
||||
<UniqueIdentifier>{b681797d-7747-487f-b448-5ef5b2d2805b}</UniqueIdentifier>
|
||||
</Filter>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<Text Include="..\ext\libpng17\CMakeLists.txt">
|
||||
|
|
|
@ -72,7 +72,7 @@ public:
|
|||
}
|
||||
|
||||
bool ContainsKey(const Key &key) const {
|
||||
// Slightly wasteful.
|
||||
// Slightly wasteful, though compiler might optimize it.
|
||||
Value value;
|
||||
return Get(key, &value);
|
||||
}
|
||||
|
@ -135,6 +135,7 @@ public:
|
|||
return false;
|
||||
}
|
||||
|
||||
// This will never crash if you call it without locking - but, the value might not be right.
|
||||
size_t size() const {
|
||||
return count_;
|
||||
}
|
||||
|
|
|
@ -173,7 +173,7 @@ std::string* Section::GetLine(const char* key, std::string* valueOut, std::strin
|
|||
if (!strcasecmp(lineKey.c_str(), key))
|
||||
return &line;
|
||||
}
|
||||
return 0;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const std::string* Section::GetLine(const char* key, std::string* valueOut, std::string* commentOut) const
|
||||
|
@ -186,7 +186,7 @@ const std::string* Section::GetLine(const char* key, std::string* valueOut, std:
|
|||
if (!strcasecmp(lineKey.c_str(), key))
|
||||
return &line;
|
||||
}
|
||||
return 0;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void Section::Set(const char* key, uint32_t newValue) {
|
||||
|
@ -430,7 +430,7 @@ Section* IniFile::GetSection(const char* sectionName) {
|
|||
for (const auto &iter : sections)
|
||||
if (!strcasecmp(iter->name().c_str(), sectionName))
|
||||
return iter.get();
|
||||
return 0;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Section* IniFile::GetOrCreateSection(const char* sectionName) {
|
||||
|
|
|
@ -116,8 +116,9 @@ public:
|
|||
std::string LanguageID();
|
||||
|
||||
std::shared_ptr<I18NCategory> GetCategory(I18NCat category);
|
||||
std::shared_ptr<I18NCategory> GetCategoryByName(const char *name);
|
||||
|
||||
// Translate the string, by looking up "key" in the file, and falling back to either def or key, in that order, if the lookup fails.
|
||||
// def can (and usually is) set to nullptr.
|
||||
const char *T(I18NCat category, const char *key, const char *def = nullptr) {
|
||||
if (category == I18NCat::NONE)
|
||||
return def ? def : key;
|
||||
|
|
|
@ -184,7 +184,7 @@ bool GetFilesInDir(const Path &directory, std::vector<FileInfo> *files, const ch
|
|||
std::string tmp;
|
||||
while (*filter) {
|
||||
if (*filter == ':') {
|
||||
filters.insert(std::move(tmp));
|
||||
filters.insert(tmp);
|
||||
tmp.clear();
|
||||
} else {
|
||||
tmp.push_back(*filter);
|
||||
|
@ -192,7 +192,7 @@ bool GetFilesInDir(const Path &directory, std::vector<FileInfo> *files, const ch
|
|||
filter++;
|
||||
}
|
||||
if (!tmp.empty())
|
||||
filters.insert(std::move(tmp));
|
||||
filters.insert(tmp);
|
||||
}
|
||||
|
||||
#if PPSSPP_PLATFORM(WINDOWS)
|
||||
|
|
|
@ -32,25 +32,25 @@ void GLDeleter::Perform(GLRenderManager *renderManager, bool skipGLCalls) {
|
|||
}
|
||||
pushBuffers.clear();
|
||||
for (auto shader : shaders) {
|
||||
if (skipGLCalls)
|
||||
if (skipGLCalls && shader)
|
||||
shader->shader = 0; // prevent the glDeleteShader
|
||||
delete shader;
|
||||
}
|
||||
shaders.clear();
|
||||
for (auto program : programs) {
|
||||
if (skipGLCalls)
|
||||
if (skipGLCalls && program)
|
||||
program->program = 0; // prevent the glDeleteProgram
|
||||
delete program;
|
||||
}
|
||||
programs.clear();
|
||||
for (auto buffer : buffers) {
|
||||
if (skipGLCalls)
|
||||
if (skipGLCalls && buffer)
|
||||
buffer->buffer_ = 0;
|
||||
delete buffer;
|
||||
}
|
||||
buffers.clear();
|
||||
for (auto texture : textures) {
|
||||
if (skipGLCalls)
|
||||
if (skipGLCalls && texture)
|
||||
texture->texture = 0;
|
||||
delete texture;
|
||||
}
|
||||
|
|
|
@ -349,24 +349,31 @@ public:
|
|||
}
|
||||
|
||||
void DeleteShader(GLRShader *shader) {
|
||||
_dbg_assert_(shader != nullptr);
|
||||
deleter_.shaders.push_back(shader);
|
||||
}
|
||||
void DeleteProgram(GLRProgram *program) {
|
||||
_dbg_assert_(program != nullptr);
|
||||
deleter_.programs.push_back(program);
|
||||
}
|
||||
void DeleteBuffer(GLRBuffer *buffer) {
|
||||
_dbg_assert_(buffer != nullptr);
|
||||
deleter_.buffers.push_back(buffer);
|
||||
}
|
||||
void DeleteTexture(GLRTexture *texture) {
|
||||
_dbg_assert_(texture != nullptr);
|
||||
deleter_.textures.push_back(texture);
|
||||
}
|
||||
void DeleteInputLayout(GLRInputLayout *inputLayout) {
|
||||
_dbg_assert_(inputLayout != nullptr);
|
||||
deleter_.inputLayouts.push_back(inputLayout);
|
||||
}
|
||||
void DeleteFramebuffer(GLRFramebuffer *framebuffer) {
|
||||
_dbg_assert_(framebuffer != nullptr);
|
||||
deleter_.framebuffers.push_back(framebuffer);
|
||||
}
|
||||
void DeletePushBuffer(GLPushBuffer *pushbuffer) {
|
||||
_dbg_assert_(pushbuffer != nullptr);
|
||||
deleter_.pushBuffers.push_back(pushbuffer);
|
||||
}
|
||||
|
||||
|
|
|
@ -934,7 +934,7 @@ void OpenGLTexture::UpdateTextureLevels(GLRenderManager *render, const uint8_t *
|
|||
OpenGLTexture::~OpenGLTexture() {
|
||||
if (tex_) {
|
||||
render_->DeleteTexture(tex_);
|
||||
tex_ = 0;
|
||||
tex_ = nullptr;
|
||||
generatedMips_ = false;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -90,6 +90,19 @@ VKAPI_ATTR VkBool32 VKAPI_CALL VulkanDebugUtilsCallback(
|
|||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
// Can be used to temporarily turn errors into info for easier debugging.
|
||||
switch (messageCode) {
|
||||
case 1544472022:
|
||||
if (messageSeverity & VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) {
|
||||
messageSeverity = (VkDebugUtilsMessageSeverityFlagBitsEXT)((messageSeverity & ~VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT) | VK_DEBUG_UTILS_MESSAGE_SEVERITY_INFO_BIT_EXT);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
*/
|
||||
|
||||
int count;
|
||||
{
|
||||
std::lock_guard<std::mutex> lock(g_errorCountMutex);
|
||||
|
|
|
@ -2,6 +2,35 @@
|
|||
#include "Common/GPU/Vulkan/VulkanFramebuffer.h"
|
||||
#include "Common/GPU/Vulkan/VulkanQueueRunner.h"
|
||||
|
||||
static const char *rpTypeDebugNames[] = {
|
||||
"RENDER",
|
||||
"RENDER_DEPTH",
|
||||
"RENDER_INPUT",
|
||||
"RENDER_DEPTH_INPUT",
|
||||
"MV_RENDER",
|
||||
"MV_RENDER_DEPTH",
|
||||
"MV_RENDER_INPUT",
|
||||
"MV_RENDER_DEPTH_INPUT",
|
||||
"MS_RENDER",
|
||||
"MS_RENDER_DEPTH",
|
||||
"MS_RENDER_INPUT",
|
||||
"MS_RENDER_DEPTH_INPUT",
|
||||
"MS_MV_RENDER",
|
||||
"MS_MV_RENDER_DEPTH",
|
||||
"MS_MV_RENDER_INPUT",
|
||||
"MS_MV_RENDER_DEPTH_INPUT",
|
||||
"BACKBUF",
|
||||
};
|
||||
|
||||
const char *GetRPTypeName(RenderPassType rpType) {
|
||||
uint32_t index = (uint32_t)rpType;
|
||||
if (index < ARRAY_SIZE(rpTypeDebugNames)) {
|
||||
return rpTypeDebugNames[index];
|
||||
} else {
|
||||
return "N/A";
|
||||
}
|
||||
}
|
||||
|
||||
VkSampleCountFlagBits MultiSampleLevelToFlagBits(int count) {
|
||||
// TODO: Check hardware support here, or elsewhere?
|
||||
// Some hardware only supports 4x.
|
||||
|
@ -387,12 +416,25 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas
|
|||
}
|
||||
|
||||
if (isBackbuffer) {
|
||||
// We don't specify any explicit transitions for these, so let's use subpass dependencies.
|
||||
// This makes sure that writes to the depth image are done before we try to write to it again.
|
||||
// From Sascha's examples.
|
||||
deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL;
|
||||
deps[numDeps].dstSubpass = 0;
|
||||
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
|
||||
deps[numDeps].srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
deps[numDeps].dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
|
||||
deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
|
||||
numDeps++;
|
||||
// Dependencies for the color image.
|
||||
deps[numDeps].srcSubpass = VK_SUBPASS_EXTERNAL;
|
||||
deps[numDeps].dstSubpass = 0;
|
||||
deps[numDeps].srcStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
|
||||
deps[numDeps].dstStageMask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
|
||||
deps[numDeps].srcAccessMask = 0;
|
||||
deps[numDeps].srcAccessMask = VK_ACCESS_MEMORY_READ_BIT;
|
||||
deps[numDeps].dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
|
||||
deps[numDeps].dependencyFlags = VK_DEPENDENCY_BY_REGION_BIT;
|
||||
numDeps++;
|
||||
}
|
||||
|
||||
|
@ -494,6 +536,10 @@ VkRenderPass CreateRenderPass(VulkanContext *vulkan, const RPKey &key, RenderPas
|
|||
res = vkCreateRenderPass(vulkan->GetDevice(), &rp, nullptr, &pass);
|
||||
}
|
||||
|
||||
if (pass) {
|
||||
vulkan->SetDebugName(pass, VK_OBJECT_TYPE_RENDER_PASS, GetRPTypeName(rpType));
|
||||
}
|
||||
|
||||
_assert_(res == VK_SUCCESS);
|
||||
_assert_(pass != VK_NULL_HANDLE);
|
||||
return pass;
|
||||
|
|
|
@ -157,3 +157,5 @@ private:
|
|||
VkSampleCountFlagBits sampleCounts[(size_t)RenderPassType::TYPE_COUNT];
|
||||
RPKey key_;
|
||||
};
|
||||
|
||||
const char *GetRPTypeName(RenderPassType rpType);
|
||||
|
|
|
@ -314,7 +314,7 @@ static void VulkanFreeLibrary(VulkanLibraryHandle &h) {
|
|||
}
|
||||
|
||||
void VulkanSetAvailable(bool available) {
|
||||
INFO_LOG(G3D, "Forcing Vulkan availability to true");
|
||||
INFO_LOG(G3D, "Setting Vulkan availability to true");
|
||||
g_vulkanAvailabilityChecked = true;
|
||||
g_vulkanMayBeAvailable = available;
|
||||
}
|
||||
|
|
|
@ -291,7 +291,7 @@ VulkanPushPool::Block VulkanPushPool::CreateBlock(size_t size) {
|
|||
_assert_(result == VK_SUCCESS);
|
||||
|
||||
result = vmaMapMemory(vulkan_->Allocator(), block.allocation, (void **)(&block.writePtr));
|
||||
_assert_msg_(result == VK_SUCCESS, "VulkanPushPool: Failed to map memory (result = %08x)", result);
|
||||
_assert_msg_(result == VK_SUCCESS, "VulkanPushPool: Failed to map memory (result = %s)", VulkanResultToString(result));
|
||||
|
||||
_assert_msg_(block.writePtr != nullptr, "VulkanPushPool: Failed to map memory on block of size %d", (int)block.size);
|
||||
return block;
|
||||
|
|
|
@ -674,26 +674,6 @@ const char *AspectToString(VkImageAspectFlags aspect) {
|
|||
}
|
||||
}
|
||||
|
||||
static const char *rpTypeDebugNames[] = {
|
||||
"RENDER",
|
||||
"RENDER_DEPTH",
|
||||
"RENDER_INPUT",
|
||||
"RENDER_DEPTH_INPUT",
|
||||
"MV_RENDER",
|
||||
"MV_RENDER_DEPTH",
|
||||
"MV_RENDER_INPUT",
|
||||
"MV_RENDER_DEPTH_INPUT",
|
||||
"MS_RENDER",
|
||||
"MS_RENDER_DEPTH",
|
||||
"MS_RENDER_INPUT",
|
||||
"MS_RENDER_DEPTH_INPUT",
|
||||
"MS_MV_RENDER",
|
||||
"MS_MV_RENDER_DEPTH",
|
||||
"MS_MV_RENDER_INPUT",
|
||||
"MS_MV_RENDER_DEPTH_INPUT",
|
||||
"BACKBUF",
|
||||
};
|
||||
|
||||
std::string VulkanQueueRunner::StepToString(VulkanContext *vulkan, const VKRStep &step) {
|
||||
char buffer[256];
|
||||
switch (step.stepType) {
|
||||
|
@ -703,7 +683,7 @@ std::string VulkanQueueRunner::StepToString(VulkanContext *vulkan, const VKRStep
|
|||
int h = step.render.framebuffer ? step.render.framebuffer->height : vulkan->GetBackbufferHeight();
|
||||
int actual_w = step.render.renderArea.extent.width;
|
||||
int actual_h = step.render.renderArea.extent.height;
|
||||
const char *renderCmd = rpTypeDebugNames[(size_t)step.render.renderPassType];
|
||||
const char *renderCmd = GetRPTypeName(step.render.renderPassType);
|
||||
snprintf(buffer, sizeof(buffer), "%s %s %s (draws: %d, %dx%d/%dx%d)", renderCmd, step.tag, step.render.framebuffer ? step.render.framebuffer->Tag() : "", step.render.numDraws, actual_w, actual_h, w, h);
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -288,7 +288,6 @@ bool VulkanRenderManager::CreateBackbuffers() {
|
|||
return false;
|
||||
}
|
||||
|
||||
|
||||
VkCommandBuffer cmdInit = GetInitCmd();
|
||||
|
||||
if (!queueRunner_.CreateSwapchain(cmdInit)) {
|
||||
|
@ -310,6 +309,11 @@ bool VulkanRenderManager::CreateBackbuffers() {
|
|||
|
||||
outOfDateFrames_ = 0;
|
||||
|
||||
for (int i = 0; i < vulkan_->GetInflightFrames(); i++) {
|
||||
auto &frameData = frameData_[i];
|
||||
frameData.readyForFence = true; // Just in case.
|
||||
}
|
||||
|
||||
// Start the thread(s).
|
||||
if (HasBackbuffers()) {
|
||||
run_ = true; // For controlling the compiler thread's exit
|
||||
|
|
|
@ -874,8 +874,11 @@ VKContext::VKContext(VulkanContext *vulkan, bool useRenderThread)
|
|||
caps_.tesselationShaderSupported = vulkan->GetDeviceFeatures().enabled.standard.tessellationShader != 0;
|
||||
caps_.dualSourceBlend = vulkan->GetDeviceFeatures().enabled.standard.dualSrcBlend != 0;
|
||||
caps_.depthClampSupported = vulkan->GetDeviceFeatures().enabled.standard.depthClamp != 0;
|
||||
|
||||
// Comment out these two to test geometry shader culling on any geometry shader-supporting hardware.
|
||||
caps_.clipDistanceSupported = vulkan->GetDeviceFeatures().enabled.standard.shaderClipDistance != 0;
|
||||
caps_.cullDistanceSupported = vulkan->GetDeviceFeatures().enabled.standard.shaderCullDistance != 0;
|
||||
|
||||
caps_.framebufferBlitSupported = true;
|
||||
caps_.framebufferCopySupported = true;
|
||||
caps_.framebufferDepthBlitSupported = vulkan->GetDeviceInfo().canBlitToPreferredDepthStencilFormat;
|
||||
|
|
|
@ -31,7 +31,7 @@ enum InputDeviceID {
|
|||
DEVICE_ID_XINPUT_1 = 21,
|
||||
DEVICE_ID_XINPUT_2 = 22,
|
||||
DEVICE_ID_XINPUT_3 = 23,
|
||||
DEVICE_ID_ACCELEROMETER = 30,
|
||||
DEVICE_ID_ACCELEROMETER = 30, // no longer used
|
||||
DEVICE_ID_XR_HMD = 39,
|
||||
DEVICE_ID_XR_CONTROLLER_LEFT = 40,
|
||||
DEVICE_ID_XR_CONTROLLER_RIGHT = 41,
|
||||
|
|
|
@ -305,7 +305,7 @@ enum InputAxis {
|
|||
JOYSTICK_AXIS_MOUSE_REL_X = 26,
|
||||
JOYSTICK_AXIS_MOUSE_REL_Y = 27,
|
||||
|
||||
// Mobile device accelerometer/gyro
|
||||
// Mobile device accelerometer/gyro. NOTE: These are no longer passed around internally, only used for the plugin API.
|
||||
JOYSTICK_AXIS_ACCELEROMETER_X = 40,
|
||||
JOYSTICK_AXIS_ACCELEROMETER_Y = 41,
|
||||
JOYSTICK_AXIS_ACCELEROMETER_Z = 42,
|
||||
|
|
|
@ -25,6 +25,7 @@
|
|||
#include "StringUtils.h"
|
||||
#include "Common/Data/Encoding/Utf8.h"
|
||||
#include "Common/Thread/ThreadUtil.h"
|
||||
#include "Common/TimeUtil.h"
|
||||
|
||||
#if PPSSPP_PLATFORM(ANDROID)
|
||||
#include <android/log.h>
|
||||
|
@ -38,10 +39,12 @@ static bool hitAnyAsserts = false;
|
|||
|
||||
std::mutex g_extraAssertInfoMutex;
|
||||
std::string g_extraAssertInfo = "menu";
|
||||
double g_assertInfoTime = 0.0;
|
||||
|
||||
void SetExtraAssertInfo(const char *info) {
|
||||
std::lock_guard<std::mutex> guard(g_extraAssertInfoMutex);
|
||||
g_extraAssertInfo = info ? info : "menu";
|
||||
g_assertInfoTime = time_now_d();
|
||||
}
|
||||
|
||||
bool HandleAssert(const char *function, const char *file, int line, const char *expression, const char* format, ...) {
|
||||
|
@ -57,7 +60,8 @@ bool HandleAssert(const char *function, const char *file, int line, const char *
|
|||
char formatted[LOG_BUF_SIZE + 128];
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(g_extraAssertInfoMutex);
|
||||
snprintf(formatted, sizeof(formatted), "(%s:%s:%d): [%s] (%s) %s", file, function, line, expression, g_extraAssertInfo.c_str(), text);
|
||||
double delta = time_now_d() - g_assertInfoTime;
|
||||
snprintf(formatted, sizeof(formatted), "(%s:%s:%d): [%s] (%s, %0.1fs) %s", file, function, line, expression, g_extraAssertInfo.c_str(), delta, text);
|
||||
}
|
||||
|
||||
// Normal logging (will also log to Android log)
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "Common/Net/URL.h"
|
||||
|
||||
#include "Common/File/FileDescriptor.h"
|
||||
#include "Common/SysError.h"
|
||||
#include "Common/Thread/ThreadUtil.h"
|
||||
#include "Common/Data/Encoding/Compression.h"
|
||||
#include "Common/Net/NetBuffer.h"
|
||||
|
@ -97,7 +98,7 @@ static void FormatAddr(char *addrbuf, size_t bufsize, const addrinfo *info) {
|
|||
switch (info->ai_family) {
|
||||
case AF_INET:
|
||||
case AF_INET6:
|
||||
inet_ntop(info->ai_family, info->ai_addr, addrbuf, bufsize);
|
||||
inet_ntop(info->ai_family, &((sockaddr_in *)info->ai_addr)->sin_addr, addrbuf, bufsize);
|
||||
break;
|
||||
default:
|
||||
snprintf(addrbuf, bufsize, "(Unknown AF %d)", info->ai_family);
|
||||
|
@ -131,11 +132,22 @@ bool Connection::Connect(int maxTries, double timeout, bool *cancelConnect) {
|
|||
// Start trying to connect (async with timeout.)
|
||||
errno = 0;
|
||||
if (connect(sock, possible->ai_addr, (int)possible->ai_addrlen) < 0) {
|
||||
if (errno != 0 && errno != EINPROGRESS) {
|
||||
char addrStr[128];
|
||||
#if PPSSPP_PLATFORM(WINDOWS)
|
||||
int errorCode = WSAGetLastError();
|
||||
std::string errorString = GetStringErrorMsg(errorCode);
|
||||
bool unreachable = errorCode == WSAENETUNREACH;
|
||||
bool inProgress = errorCode == WSAEINPROGRESS || errorCode == WSAEWOULDBLOCK;
|
||||
#else
|
||||
int errorCode = errno;
|
||||
std::string errorString = strerror(errno);
|
||||
bool unreachable = errorCode == ENETUNREACH;
|
||||
bool inProgress = errorCode == EINPROGRESS || errorCode == EWOULDBLOCK;
|
||||
#endif
|
||||
if (!inProgress) {
|
||||
char addrStr[128]{};
|
||||
FormatAddr(addrStr, sizeof(addrStr), possible);
|
||||
if (errno != ENETUNREACH) {
|
||||
ERROR_LOG(HTTP, "connect(%d) call to %s failed (%d: %s)", sock, addrStr, errno, strerror(errno));
|
||||
if (!unreachable) {
|
||||
ERROR_LOG(HTTP, "connect(%d) call to %s failed (%d: %s)", sock, addrStr, errorCode, errorString.c_str());
|
||||
} else {
|
||||
INFO_LOG(HTTP, "connect(%d): Ignoring unreachable resolved address %s", sock, addrStr);
|
||||
}
|
||||
|
@ -207,9 +219,9 @@ namespace http {
|
|||
|
||||
// TODO: do something sane here
|
||||
constexpr const char *DEFAULT_USERAGENT = "PPSSPP";
|
||||
constexpr const char *HTTP_VERSION = "1.1";
|
||||
|
||||
Client::Client() {
|
||||
httpVersion_ = "1.1";
|
||||
userAgent_ = DEFAULT_USERAGENT;
|
||||
}
|
||||
|
||||
|
@ -341,7 +353,7 @@ int Client::SendRequestWithData(const char *method, const RequestParams &req, co
|
|||
"\r\n";
|
||||
|
||||
buffer.Printf(tpl,
|
||||
method, req.resource.c_str(), httpVersion_,
|
||||
method, req.resource.c_str(), HTTP_VERSION,
|
||||
host_.c_str(),
|
||||
userAgent_.c_str(),
|
||||
req.acceptMime,
|
||||
|
|
|
@ -86,7 +86,6 @@ public:
|
|||
|
||||
protected:
|
||||
std::string userAgent_;
|
||||
const char *httpVersion_;
|
||||
double dataTimeout_ = 900.0;
|
||||
};
|
||||
|
||||
|
|
|
@ -378,7 +378,7 @@ void TextDrawerSDL::DrawStringBitmap(std::vector<uint8_t> &bitmapData, TextStrin
|
|||
font = fallbackFonts_[0];
|
||||
}
|
||||
|
||||
#ifndef USE_SDL2_TTF_PKGCONFIG
|
||||
#if SDL_TTF_VERSION_ATLEAST(2, 20, 0)
|
||||
if (align & ALIGN_HCENTER)
|
||||
TTF_SetFontWrappedAlign(font, TTF_WRAPPED_ALIGN_CENTER);
|
||||
else if (align & ALIGN_RIGHT)
|
||||
|
|
|
@ -55,6 +55,7 @@ bool NativeIsRestarting();
|
|||
void NativeTouch(const TouchInput &touch);
|
||||
bool NativeKey(const KeyInput &key);
|
||||
void NativeAxis(const AxisInput *axis, size_t count);
|
||||
void NativeAccelerometer(float tiltX, float tiltY, float tiltZ);
|
||||
|
||||
// Called when it's process a frame, including rendering. If the device can keep up, this
|
||||
// will be called sixty times per second. Main thread.
|
||||
|
|
|
@ -45,6 +45,7 @@ public:
|
|||
template<class T>
|
||||
class Promise {
|
||||
public:
|
||||
// Never fails.
|
||||
static Promise<T> *Spawn(ThreadManager *threadman, std::function<T()> fun, TaskType taskType, TaskPriority taskPriority = TaskPriority::NORMAL) {
|
||||
Mailbox<T> *mailbox = new Mailbox<T>();
|
||||
|
||||
|
|
|
@ -122,7 +122,11 @@ void PopupMultiChoice::UpdateText() {
|
|||
if (index < 0 || index >= numChoices_) {
|
||||
valueText_ = "(invalid choice)"; // Shouldn't happen. Should be no need to translate this.
|
||||
} else {
|
||||
if (choices_[index]) {
|
||||
valueText_ = T(category_, choices_[index]);
|
||||
} else {
|
||||
valueText_ = "";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -227,9 +227,11 @@ void ScreenManager::getFocusPosition(float &x, float &y, float &z) {
|
|||
}
|
||||
|
||||
void ScreenManager::sendMessage(const char *msg, const char *value) {
|
||||
if (!strcmp(msg, "recreateviews"))
|
||||
if (!msg) {
|
||||
_dbg_assert_msg_(false, "Empty msg in ScreenManager::sendMessage");
|
||||
} else if (!strcmp(msg, "recreateviews")) {
|
||||
RecreateAllViews();
|
||||
if (!strcmp(msg, "lost_focus")) {
|
||||
} else if (!strcmp(msg, "lost_focus")) {
|
||||
TouchInput input{};
|
||||
input.x = -50000.0f;
|
||||
input.y = -50000.0f;
|
||||
|
@ -238,6 +240,7 @@ void ScreenManager::sendMessage(const char *msg, const char *value) {
|
|||
input.id = 0;
|
||||
touch(input);
|
||||
}
|
||||
|
||||
if (!stack_.empty())
|
||||
stack_.back().screen->sendMessage(msg, value);
|
||||
}
|
||||
|
|
|
@ -1697,7 +1697,6 @@ void XEmitter::MOVMSKPD(X64Reg dest, OpArg arg) {WriteSSEOp(0x66, 0x50, dest, ar
|
|||
|
||||
void XEmitter::LDDQU(X64Reg dest, OpArg arg) {WriteSSEOp(0xF2, sseLDDQU, dest, arg);} // For integer data only
|
||||
|
||||
// THESE TWO ARE UNTESTED.
|
||||
void XEmitter::UNPCKLPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x14, dest, arg);}
|
||||
void XEmitter::UNPCKHPS(X64Reg dest, OpArg arg) {WriteSSEOp(0x00, 0x15, dest, arg);}
|
||||
|
||||
|
@ -1892,6 +1891,9 @@ void XEmitter::PTEST(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3817, dest
|
|||
void XEmitter::PACKUSDW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x382b, dest, arg);}
|
||||
void XEmitter::DPPS(X64Reg dest, OpArg arg, u8 mask) {WriteSSE41Op(0x66, 0x3A40, dest, arg, 1); Write8(mask);}
|
||||
|
||||
void XEmitter::INSERTPS(X64Reg dest, OpArg arg, u8 dstsubreg, u8 srcsubreg, u8 zmask) { WriteSSE41Op(0x66, 0x3A21, dest, arg, 1); Write8((srcsubreg << 6) | (dstsubreg << 4) | zmask); }
|
||||
void XEmitter::EXTRACTPS(OpArg dest, X64Reg arg, u8 subreg) { WriteSSE41Op(0x66, 0x3A17, arg, dest, 1); Write8(subreg); }
|
||||
|
||||
void XEmitter::PMINSB(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3838, dest, arg);}
|
||||
void XEmitter::PMINSD(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x3839, dest, arg);}
|
||||
void XEmitter::PMINUW(X64Reg dest, OpArg arg) {WriteSSE41Op(0x66, 0x383a, dest, arg);}
|
||||
|
@ -2084,7 +2086,7 @@ void XEmitter::VCVTTPD2DQ(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(bits,
|
|||
void XEmitter::VCVTTSS2SI(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(0, 0xF3, 0x2C, regOp1, arg, 0, bits == 64 ? 1 : 0); }
|
||||
void XEmitter::VCVTTSD2SI(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(0, 0xF2, 0x2C, regOp1, arg, 0, bits == 64 ? 1 : 0); }
|
||||
void XEmitter::VEXTRACTPS(OpArg arg, X64Reg regOp1, u8 subreg) { WriteAVXOp(0, 0x66, 0x3A17, regOp1, arg, 1); Write8(subreg); }
|
||||
void XEmitter::VINSERTPS(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 subreg) { WriteAVXOp(0, 0x66, 0x3A21, regOp1, regOp2, arg, 1); Write8(subreg); }
|
||||
void XEmitter::VINSERTPS(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 dstsubreg, u8 srcsubreg, u8 zmask) { WriteAVXOp(0, 0x66, 0x3A21, regOp1, regOp2, arg, 1); Write8((srcsubreg << 6) | (dstsubreg << 4) | zmask); }
|
||||
void XEmitter::VLDDQU(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(bits, 0xF2, sseLDDQU, regOp1, arg); }
|
||||
void XEmitter::VMOVAPS(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(bits, 0x00, sseMOVAPfromRM, regOp1, arg); }
|
||||
void XEmitter::VMOVAPD(int bits, X64Reg regOp1, OpArg arg) { WriteAVXOp(bits, 0x66, sseMOVAPfromRM, regOp1, arg); }
|
||||
|
|
|
@ -684,12 +684,14 @@ public:
|
|||
|
||||
// SSE4: Further horizontal operations - dot products. These are weirdly flexible, the arg contains both a read mask and a write "mask".
|
||||
void DPPD(X64Reg dest, OpArg src, u8 arg);
|
||||
|
||||
// These are probably useful for VFPU emulation.
|
||||
void INSERTPS(X64Reg dest, OpArg src, u8 arg);
|
||||
void EXTRACTPS(OpArg dest, X64Reg src, u8 arg);
|
||||
#endif
|
||||
|
||||
// SSE4: Insert and extract for floats.
|
||||
// Note: insert from memory or an XMM.
|
||||
void INSERTPS(X64Reg dest, OpArg arg, u8 dstsubreg, u8 srcsubreg = 0, u8 zmask = 0);
|
||||
// Extract to memory or GPR.
|
||||
void EXTRACTPS(OpArg dest, X64Reg arg, u8 subreg);
|
||||
|
||||
// SSE3: Horizontal operations in SIMD registers. Very slow! shufps-based code beats it handily on Ivy.
|
||||
void HADDPS(X64Reg dest, OpArg src);
|
||||
|
||||
|
@ -1040,7 +1042,7 @@ public:
|
|||
// Can only extract from the low 128 bits.
|
||||
void VEXTRACTPS(OpArg arg, X64Reg regOp1, u8 subreg);
|
||||
// Can only insert into the low 128 bits, zeros upper bits. Inserts from XMM.
|
||||
void VINSERTPS(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 subreg);
|
||||
void VINSERTPS(X64Reg regOp1, X64Reg regOp2, OpArg arg, u8 dstsubreg, u8 srcsubreg = 0, u8 zmask = 0);
|
||||
void VLDDQU(int bits, X64Reg regOp1, OpArg arg);
|
||||
void VMOVAPS(int bits, X64Reg regOp1, OpArg arg);
|
||||
void VMOVAPD(int bits, X64Reg regOp1, OpArg arg);
|
||||
|
|
|
@ -480,8 +480,9 @@ void ControlMapper::Axis(const AxisInput &axis) {
|
|||
double now = time_now_d();
|
||||
|
||||
std::lock_guard<std::mutex> guard(mutex_);
|
||||
if (axis.deviceId < DEVICE_ID_COUNT) {
|
||||
deviceTimestamps_[(int)axis.deviceId] = now;
|
||||
size_t deviceIndex = (size_t)axis.deviceId; // this'll wrap around ANY (-1) to max, which will eliminate it on the next line, if such an event appears by mistake.
|
||||
if (deviceIndex < (size_t)DEVICE_ID_COUNT) {
|
||||
deviceTimestamps_[deviceIndex] = now;
|
||||
}
|
||||
if (axis.value >= 0.0f) {
|
||||
InputMapping mapping(axis.deviceId, axis.axisId, 1);
|
||||
|
|
|
@ -62,7 +62,7 @@ private:
|
|||
float virtKeys_[VIRTKEY_COUNT]{};
|
||||
bool virtKeyOn_[VIRTKEY_COUNT]{}; // Track boolean output separaately since thresholds may differ.
|
||||
|
||||
double deviceTimestamps_[42]{};
|
||||
double deviceTimestamps_[(size_t)DEVICE_ID_COUNT]{};
|
||||
|
||||
int lastNonDeadzoneDeviceID_[2]{};
|
||||
|
||||
|
|
|
@ -138,7 +138,7 @@
|
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\x86\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;USE_FFMPEG;WITH_UPNP;WIN32;_ARCH_32=1;_M_IX86=1;_DEBUG;_LIB;_UNICODE;UNICODE;MINIUPNP_STATICLIB;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
|
@ -165,7 +165,7 @@
|
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\x86_64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;USE_FFMPEG;WITH_UPNP;WIN32;_ARCH_64=1;_M_X64=1;_DEBUG;_LIB;_UNICODE;UNICODE;MINIUPNP_STATICLIB;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
|
@ -193,7 +193,7 @@
|
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\aarch64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\aarch64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;USE_FFMPEG;WITH_UPNP;WIN32;_ARCH_64=1;_DEBUG;_LIB;_UNICODE;UNICODE;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
|
@ -221,7 +221,7 @@
|
|||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|ARM'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\arm\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\arm\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<PreprocessorDefinitions>_CRTDBG_MAP_ALLOC;USING_WIN_UI;_CRT_NONSTDC_NO_DEPRECATE;_CRT_SECURE_NO_WARNINGS;USE_FFMPEG;WITH_UPNP;WIN32;_ARCH_32=1;_DEBUG;_LIB;_UNICODE;UNICODE;ARMIPS_USE_STD_FILESYSTEM;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
|
@ -253,7 +253,7 @@
|
|||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\x86\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
<EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
|
@ -286,7 +286,7 @@
|
|||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\x86_64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\x86_64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
|
@ -321,7 +321,7 @@
|
|||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\aarch64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\aarch64\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
|
@ -356,7 +356,7 @@
|
|||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ffmpeg\Windows\arm\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<AdditionalIncludeDirectories>..\ffmpeg\WindowsInclude;..\ext\libchdr\include;..\ffmpeg\Windows\arm\include;../common;..;../ext/glew;../ext/snappy;../ext/libpng17;../ext/zlib;../ext;../ext/zstd/lib</AdditionalIncludeDirectories>
|
||||
<EnableEnhancedInstructionSet>NotSet</EnableEnhancedInstructionSet>
|
||||
<FloatingPointModel>Precise</FloatingPointModel>
|
||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||
|
@ -1466,6 +1466,9 @@
|
|||
<ProjectReference Include="..\ext\libarmips.vcxproj">
|
||||
<Project>{129e5e2b-39c1-4d84-96fe-dfd22dbb4a25}</Project>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\ext\libchdr.vcxproj">
|
||||
<Project>{956f1f48-b612-46d8-89ee-96996dcd9383}</Project>
|
||||
</ProjectReference>
|
||||
<ProjectReference Include="..\ext\miniupnpc.vcxproj">
|
||||
<Project>{d8a71225-178b-424e-96c1-cc3be2c1b047}</Project>
|
||||
</ProjectReference>
|
||||
|
|
|
@ -17,8 +17,10 @@
|
|||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <condition_variable>
|
||||
#include <cstring>
|
||||
#include <mutex>
|
||||
#include <thread>
|
||||
|
||||
#include "Common/Log.h"
|
||||
#include "Common/Serialize/Serializer.h"
|
||||
|
@ -78,12 +80,15 @@ struct PendingNotifyMem {
|
|||
MemBlockFlags flags;
|
||||
uint32_t start;
|
||||
uint32_t size;
|
||||
uint32_t copySrc;
|
||||
uint64_t ticks;
|
||||
uint32_t pc;
|
||||
char tag[128];
|
||||
};
|
||||
|
||||
static constexpr size_t MAX_PENDING_NOTIFIES = 512;
|
||||
// 160 KB.
|
||||
static constexpr size_t MAX_PENDING_NOTIFIES = 1024;
|
||||
static constexpr size_t MAX_PENDING_NOTIFIES_THREAD = 1000;
|
||||
static MemSlabMap allocMap;
|
||||
static MemSlabMap suballocMap;
|
||||
static MemSlabMap writeMap;
|
||||
|
@ -93,9 +98,17 @@ static std::atomic<uint32_t> pendingNotifyMinAddr1;
|
|||
static std::atomic<uint32_t> pendingNotifyMaxAddr1;
|
||||
static std::atomic<uint32_t> pendingNotifyMinAddr2;
|
||||
static std::atomic<uint32_t> pendingNotifyMaxAddr2;
|
||||
static std::mutex pendingMutex;
|
||||
// To prevent deadlocks, acquire Read before Write if you're going to acquire both.
|
||||
static std::mutex pendingWriteMutex;
|
||||
static std::mutex pendingReadMutex;
|
||||
static int detailedOverride;
|
||||
|
||||
static std::thread flushThread;
|
||||
static std::atomic<bool> flushThreadRunning;
|
||||
static std::atomic<bool> flushThreadPending;
|
||||
static std::mutex flushLock;
|
||||
static std::condition_variable flushCond;
|
||||
|
||||
MemSlabMap::MemSlabMap() {
|
||||
Reset();
|
||||
}
|
||||
|
@ -369,9 +382,32 @@ void MemSlabMap::FillHeads(Slab *slab) {
|
|||
}
|
||||
}
|
||||
|
||||
size_t FormatMemWriteTagAtNoFlush(char *buf, size_t sz, const char *prefix, uint32_t start, uint32_t size);
|
||||
|
||||
void FlushPendingMemInfo() {
|
||||
std::lock_guard<std::mutex> guard(pendingMutex);
|
||||
for (const auto &info : pendingNotifies) {
|
||||
// This lock prevents us from another thread reading while we're busy flushing.
|
||||
std::lock_guard<std::mutex> guard(pendingReadMutex);
|
||||
std::vector<PendingNotifyMem> thisBatch;
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(pendingWriteMutex);
|
||||
thisBatch = std::move(pendingNotifies);
|
||||
pendingNotifies.clear();
|
||||
pendingNotifies.reserve(MAX_PENDING_NOTIFIES);
|
||||
|
||||
pendingNotifyMinAddr1 = 0xFFFFFFFF;
|
||||
pendingNotifyMaxAddr1 = 0;
|
||||
pendingNotifyMinAddr2 = 0xFFFFFFFF;
|
||||
pendingNotifyMaxAddr2 = 0;
|
||||
}
|
||||
|
||||
for (const auto &info : thisBatch) {
|
||||
if (info.copySrc != 0) {
|
||||
char tagData[128];
|
||||
size_t tagSize = FormatMemWriteTagAtNoFlush(tagData, sizeof(tagData), info.tag, info.copySrc, info.size);
|
||||
writeMap.Mark(info.start, info.size, info.ticks, info.pc, true, tagData);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (info.flags & MemBlockFlags::ALLOC) {
|
||||
allocMap.Mark(info.start, info.size, info.ticks, info.pc, true, info.tag);
|
||||
} else if (info.flags & MemBlockFlags::FREE) {
|
||||
|
@ -392,11 +428,6 @@ void FlushPendingMemInfo() {
|
|||
writeMap.Mark(info.start, info.size, info.ticks, info.pc, true, info.tag);
|
||||
}
|
||||
}
|
||||
pendingNotifies.clear();
|
||||
pendingNotifyMinAddr1 = 0xFFFFFFFF;
|
||||
pendingNotifyMaxAddr1 = 0;
|
||||
pendingNotifyMinAddr2 = 0xFFFFFFFF;
|
||||
pendingNotifyMaxAddr2 = 0;
|
||||
}
|
||||
|
||||
static inline uint32_t NormalizeAddress(uint32_t addr) {
|
||||
|
@ -411,6 +442,9 @@ static inline bool MergeRecentMemInfo(const PendingNotifyMem &info, size_t copyL
|
|||
|
||||
for (size_t i = 1; i <= 4; ++i) {
|
||||
auto &prev = pendingNotifies[pendingNotifies.size() - i];
|
||||
if (prev.copySrc != 0)
|
||||
return false;
|
||||
|
||||
if (prev.flags != info.flags)
|
||||
continue;
|
||||
|
||||
|
@ -440,7 +474,7 @@ void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_
|
|||
|
||||
bool needFlush = false;
|
||||
// When the setting is off, we skip smaller info to keep things fast.
|
||||
if (MemBlockInfoDetailed(size)) {
|
||||
if (MemBlockInfoDetailed(size) && flags != MemBlockFlags::READ) {
|
||||
PendingNotifyMem info{ flags, start, size };
|
||||
info.ticks = CoreTiming::GetTicks();
|
||||
info.pc = pc;
|
||||
|
@ -452,7 +486,7 @@ void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_
|
|||
memcpy(info.tag, tagStr, copyLength);
|
||||
info.tag[copyLength] = 0;
|
||||
|
||||
std::lock_guard<std::mutex> guard(pendingMutex);
|
||||
std::lock_guard<std::mutex> guard(pendingWriteMutex);
|
||||
// Sometimes we get duplicates, quickly check.
|
||||
if (!MergeRecentMemInfo(info, copyLength)) {
|
||||
if (start < 0x08000000) {
|
||||
|
@ -464,11 +498,15 @@ void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_
|
|||
}
|
||||
pendingNotifies.push_back(info);
|
||||
}
|
||||
needFlush = pendingNotifies.size() > MAX_PENDING_NOTIFIES;
|
||||
needFlush = pendingNotifies.size() > MAX_PENDING_NOTIFIES_THREAD;
|
||||
}
|
||||
|
||||
if (needFlush) {
|
||||
FlushPendingMemInfo();
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(flushLock);
|
||||
flushThreadPending = true;
|
||||
}
|
||||
flushCond.notify_one();
|
||||
}
|
||||
|
||||
if (!(flags & MemBlockFlags::SKIP_MEMCHECK)) {
|
||||
|
@ -484,6 +522,50 @@ void NotifyMemInfo(MemBlockFlags flags, uint32_t start, uint32_t size, const cha
|
|||
NotifyMemInfoPC(flags, start, size, currentMIPS->pc, str, strLength);
|
||||
}
|
||||
|
||||
void NotifyMemInfoCopy(uint32_t destPtr, uint32_t srcPtr, uint32_t size, const char *prefix) {
|
||||
if (size == 0)
|
||||
return;
|
||||
|
||||
bool needsFlush = false;
|
||||
if (CBreakPoints::HasMemChecks()) {
|
||||
// This will cause a flush, but it's needed to trigger memchecks with proper data.
|
||||
char tagData[128];
|
||||
size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), prefix, srcPtr, size);
|
||||
NotifyMemInfo(MemBlockFlags::READ, srcPtr, size, tagData, tagSize);
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, destPtr, size, tagData, tagSize);
|
||||
} else if (MemBlockInfoDetailed(size)) {
|
||||
srcPtr = NormalizeAddress(srcPtr);
|
||||
destPtr = NormalizeAddress(destPtr);
|
||||
|
||||
PendingNotifyMem info{ MemBlockFlags::WRITE, destPtr, size };
|
||||
info.copySrc = srcPtr;
|
||||
info.ticks = CoreTiming::GetTicks();
|
||||
info.pc = currentMIPS->pc;
|
||||
|
||||
// Store the prefix for now. The correct tag will be calculated on flush.
|
||||
truncate_cpy(info.tag, prefix);
|
||||
|
||||
std::lock_guard<std::mutex> guard(pendingWriteMutex);
|
||||
if (destPtr < 0x08000000) {
|
||||
pendingNotifyMinAddr1 = std::min(pendingNotifyMinAddr1.load(), destPtr);
|
||||
pendingNotifyMaxAddr1 = std::max(pendingNotifyMaxAddr1.load(), destPtr + size);
|
||||
} else {
|
||||
pendingNotifyMinAddr2 = std::min(pendingNotifyMinAddr2.load(), destPtr);
|
||||
pendingNotifyMaxAddr2 = std::max(pendingNotifyMaxAddr2.load(), destPtr + size);
|
||||
}
|
||||
pendingNotifies.push_back(info);
|
||||
needsFlush = pendingNotifies.size() > MAX_PENDING_NOTIFIES_THREAD;
|
||||
}
|
||||
|
||||
if (needsFlush) {
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(flushLock);
|
||||
flushThreadPending = true;
|
||||
}
|
||||
flushCond.notify_one();
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<MemBlockInfo> FindMemInfo(uint32_t start, uint32_t size) {
|
||||
start = NormalizeAddress(start);
|
||||
|
||||
|
@ -520,13 +602,15 @@ std::vector<MemBlockInfo> FindMemInfoByFlag(MemBlockFlags flags, uint32_t start,
|
|||
return results;
|
||||
}
|
||||
|
||||
static const char *FindWriteTagByFlag(MemBlockFlags flags, uint32_t start, uint32_t size) {
|
||||
static const char *FindWriteTagByFlag(MemBlockFlags flags, uint32_t start, uint32_t size, bool flush = true) {
|
||||
start = NormalizeAddress(start);
|
||||
|
||||
if (flush) {
|
||||
if (pendingNotifyMinAddr1 < start + size && pendingNotifyMaxAddr1 >= start)
|
||||
FlushPendingMemInfo();
|
||||
if (pendingNotifyMinAddr2 < start + size && pendingNotifyMaxAddr2 >= start)
|
||||
FlushPendingMemInfo();
|
||||
}
|
||||
|
||||
if (flags & MemBlockFlags::ALLOC) {
|
||||
const char *tag = allocMap.FastFindWriteTag(MemBlockFlags::ALLOC, start, size);
|
||||
|
@ -564,17 +648,49 @@ size_t FormatMemWriteTagAt(char *buf, size_t sz, const char *prefix, uint32_t st
|
|||
return snprintf(buf, sz, "%s%08x_size_%08x", prefix, start, size);
|
||||
}
|
||||
|
||||
size_t FormatMemWriteTagAtNoFlush(char *buf, size_t sz, const char *prefix, uint32_t start, uint32_t size) {
|
||||
const char *tag = FindWriteTagByFlag(MemBlockFlags::WRITE, start, size, false);
|
||||
if (tag && strcmp(tag, "MemInit") != 0) {
|
||||
return snprintf(buf, sz, "%s%s", prefix, tag);
|
||||
}
|
||||
// Fall back to alloc and texture, especially for VRAM. We prefer write above.
|
||||
tag = FindWriteTagByFlag(MemBlockFlags::ALLOC | MemBlockFlags::TEXTURE, start, size, false);
|
||||
if (tag) {
|
||||
return snprintf(buf, sz, "%s%s", prefix, tag);
|
||||
}
|
||||
return snprintf(buf, sz, "%s%08x_size_%08x", prefix, start, size);
|
||||
}
|
||||
|
||||
static void FlushMemInfoThread() {
|
||||
while (flushThreadRunning.load()) {
|
||||
flushThreadPending = false;
|
||||
FlushPendingMemInfo();
|
||||
|
||||
std::unique_lock<std::mutex> guard(flushLock);
|
||||
flushCond.wait(guard, [] {
|
||||
return flushThreadPending.load();
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
void MemBlockInfoInit() {
|
||||
std::lock_guard<std::mutex> guard(pendingMutex);
|
||||
std::lock_guard<std::mutex> guard(pendingReadMutex);
|
||||
std::lock_guard<std::mutex> guardW(pendingWriteMutex);
|
||||
pendingNotifies.reserve(MAX_PENDING_NOTIFIES);
|
||||
pendingNotifyMinAddr1 = 0xFFFFFFFF;
|
||||
pendingNotifyMaxAddr1 = 0;
|
||||
pendingNotifyMinAddr2 = 0xFFFFFFFF;
|
||||
pendingNotifyMaxAddr2 = 0;
|
||||
|
||||
flushThreadRunning = true;
|
||||
flushThreadPending = false;
|
||||
flushThread = std::thread(&FlushMemInfoThread);
|
||||
}
|
||||
|
||||
void MemBlockInfoShutdown() {
|
||||
std::lock_guard<std::mutex> guard(pendingMutex);
|
||||
{
|
||||
std::lock_guard<std::mutex> guard(pendingReadMutex);
|
||||
std::lock_guard<std::mutex> guardW(pendingWriteMutex);
|
||||
allocMap.Reset();
|
||||
suballocMap.Reset();
|
||||
writeMap.Reset();
|
||||
|
@ -582,6 +698,15 @@ void MemBlockInfoShutdown() {
|
|||
pendingNotifies.clear();
|
||||
}
|
||||
|
||||
if (flushThreadRunning.load()) {
|
||||
std::lock_guard<std::mutex> guard(flushLock);
|
||||
flushThreadRunning = false;
|
||||
flushThreadPending = true;
|
||||
}
|
||||
flushCond.notify_one();
|
||||
flushThread.join();
|
||||
}
|
||||
|
||||
void MemBlockInfoDoState(PointerWrap &p) {
|
||||
auto s = p.Section("MemBlockInfo", 0, 1);
|
||||
if (!s)
|
||||
|
|
|
@ -53,6 +53,7 @@ struct MemBlockInfo {
|
|||
|
||||
void NotifyMemInfo(MemBlockFlags flags, uint32_t start, uint32_t size, const char *tag, size_t tagLength);
|
||||
void NotifyMemInfoPC(MemBlockFlags flags, uint32_t start, uint32_t size, uint32_t pc, const char *tag, size_t tagLength);
|
||||
void NotifyMemInfoCopy(uint32_t destPtr, uint32_t srcPtr, uint32_t size, const char *prefix);
|
||||
|
||||
// This lets us avoid calling strlen on string constants, instead the string length (including null,
|
||||
// so we have to subtract 1) is computed at compile time.
|
||||
|
|
|
@ -24,8 +24,11 @@
|
|||
#include "Common/System/OSD.h"
|
||||
#include "Common/Log.h"
|
||||
#include "Common/Swap.h"
|
||||
#include "Common/File/FileUtil.h"
|
||||
#include "Common/File/DirListing.h"
|
||||
#include "Core/Loaders.h"
|
||||
#include "Core/FileSystems/BlockDevices.h"
|
||||
#include "libchdr/chd.h"
|
||||
|
||||
extern "C"
|
||||
{
|
||||
|
@ -37,19 +40,28 @@ extern "C"
|
|||
std::mutex NPDRMDemoBlockDevice::mutex_;
|
||||
|
||||
BlockDevice *constructBlockDevice(FileLoader *fileLoader) {
|
||||
// Check for CISO
|
||||
if (!fileLoader->Exists())
|
||||
return nullptr;
|
||||
char buffer[4]{};
|
||||
size_t size = fileLoader->ReadAt(0, 1, 4, buffer);
|
||||
if (size == 4 && !memcmp(buffer, "CISO", 4))
|
||||
char buffer[8]{};
|
||||
size_t size = fileLoader->ReadAt(0, 1, 8, buffer);
|
||||
if (size != 8) {
|
||||
// Bad or empty file
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Check for CISO
|
||||
if (!memcmp(buffer, "CISO", 4)) {
|
||||
return new CISOFileBlockDevice(fileLoader);
|
||||
if (size == 4 && !memcmp(buffer, "\x00PBP", 4)) {
|
||||
} else if (!memcmp(buffer, "\x00PBP", 4)) {
|
||||
uint32_t psarOffset = 0;
|
||||
size = fileLoader->ReadAt(0x24, 1, 4, &psarOffset);
|
||||
if (size == 4 && psarOffset < fileLoader->FileSize())
|
||||
return new NPDRMDemoBlockDevice(fileLoader);
|
||||
} else if (!memcmp(buffer, "MComprHD", 8)) {
|
||||
return new CHDFileBlockDevice(fileLoader);
|
||||
}
|
||||
|
||||
// Should be just a regular ISO. Let's open it as a plain block device and let the other systems take over.
|
||||
return new FileBlockDevice(fileLoader);
|
||||
}
|
||||
|
||||
|
@ -445,7 +457,6 @@ NPDRMDemoBlockDevice::NPDRMDemoBlockDevice(FileLoader *fileLoader)
|
|||
}
|
||||
|
||||
currentBlock = -1;
|
||||
|
||||
}
|
||||
|
||||
NPDRMDemoBlockDevice::~NPDRMDemoBlockDevice()
|
||||
|
@ -520,3 +531,150 @@ bool NPDRMDemoBlockDevice::ReadBlock(int blockNumber, u8 *outPtr, bool uncached)
|
|||
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
* CHD file
|
||||
*/
|
||||
static const UINT8 nullsha1[CHD_SHA1_BYTES] = { 0 };
|
||||
|
||||
struct CHDImpl {
|
||||
chd_file *chd = nullptr;
|
||||
const chd_header *header = nullptr;
|
||||
};
|
||||
|
||||
CHDFileBlockDevice::CHDFileBlockDevice(FileLoader *fileLoader)
|
||||
: BlockDevice(fileLoader), impl_(new CHDImpl())
|
||||
{
|
||||
Path paths[8];
|
||||
paths[0] = fileLoader->GetPath();
|
||||
int depth = 0;
|
||||
|
||||
/*
|
||||
// TODO: Support parent/child CHD files.
|
||||
|
||||
// Default, in case of failure
|
||||
numBlocks = 0;
|
||||
|
||||
chd_header childHeader;
|
||||
|
||||
chd_error err = chd_read_header(paths[0].c_str(), &childHeader);
|
||||
if (err != CHDERR_NONE) {
|
||||
ERROR_LOG(LOADER, "Error loading CHD header for '%s': %s", paths[0].c_str(), chd_error_string(err));
|
||||
NotifyReadError();
|
||||
return;
|
||||
}
|
||||
|
||||
if (memcmp(nullsha1, childHeader.parentsha1, sizeof(childHeader.sha1)) != 0) {
|
||||
chd_header parentHeader;
|
||||
|
||||
// Look for parent CHD in current directory
|
||||
Path chdDir = paths[0].NavigateUp();
|
||||
|
||||
std::vector<File::FileInfo> files;
|
||||
if (File::GetFilesInDir(chdDir, &files)) {
|
||||
parentHeader.length = 0;
|
||||
|
||||
for (const auto &file : files) {
|
||||
std::string extension = file.fullName.GetFileExtension();
|
||||
if (extension != ".chd") {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (chd_read_header(filepath.c_str(), &parentHeader) == CHDERR_NONE &&
|
||||
memcmp(parentHeader.sha1, childHeader.parentsha1, sizeof(parentHeader.sha1)) == 0) {
|
||||
// ERROR_LOG(LOADER, "Checking '%s'", filepath.c_str());
|
||||
paths[++depth] = filepath;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if parentHeader was opened
|
||||
if (parentHeader.length == 0) {
|
||||
ERROR_LOG(LOADER, "Error loading CHD '%s': parents not found", fileLoader->GetPath().c_str());
|
||||
NotifyReadError();
|
||||
return;
|
||||
}
|
||||
memcpy(childHeader.parentsha1, parentHeader.parentsha1, sizeof(childHeader.parentsha1));
|
||||
} while (memcmp(nullsha1, childHeader.parentsha1, sizeof(childHeader.sha1)) != 0);
|
||||
}
|
||||
*/
|
||||
|
||||
chd_file *parent = NULL;
|
||||
chd_file *child = NULL;
|
||||
|
||||
FILE *file = File::OpenCFile(paths[depth], "rb");
|
||||
if (!file) {
|
||||
ERROR_LOG(LOADER, "Error opening CHD file '%s'", paths[depth].c_str());
|
||||
NotifyReadError();
|
||||
return;
|
||||
}
|
||||
chd_error err = chd_open_file(file, CHD_OPEN_READ, NULL, &child);
|
||||
if (err != CHDERR_NONE) {
|
||||
ERROR_LOG(LOADER, "Error loading CHD '%s': %s", paths[depth].c_str(), chd_error_string(err));
|
||||
NotifyReadError();
|
||||
return;
|
||||
}
|
||||
|
||||
// We won't enter this loop until we enable the parent/child stuff above.
|
||||
for (int d = depth - 1; d >= 0; d--) {
|
||||
parent = child;
|
||||
child = NULL;
|
||||
// TODO: Use chd_open_file
|
||||
err = chd_open(paths[d].c_str(), CHD_OPEN_READ, parent, &child);
|
||||
if (err != CHDERR_NONE) {
|
||||
ERROR_LOG(LOADER, "Error loading CHD '%s': %s", paths[d].c_str(), chd_error_string(err));
|
||||
NotifyReadError();
|
||||
return;
|
||||
}
|
||||
}
|
||||
impl_->chd = child;
|
||||
|
||||
impl_->header = chd_get_header(impl_->chd);
|
||||
readBuffer = new u8[impl_->header->hunkbytes];
|
||||
currentHunk = -1;
|
||||
blocksPerHunk = impl_->header->hunkbytes / impl_->header->unitbytes;
|
||||
numBlocks = impl_->header->unitcount;
|
||||
}
|
||||
|
||||
CHDFileBlockDevice::~CHDFileBlockDevice()
|
||||
{
|
||||
if (numBlocks > 0) {
|
||||
chd_close(impl_->chd);
|
||||
delete[] readBuffer;
|
||||
}
|
||||
}
|
||||
|
||||
bool CHDFileBlockDevice::ReadBlock(int blockNumber, u8 *outPtr, bool uncached)
|
||||
{
|
||||
if ((u32)blockNumber >= numBlocks) {
|
||||
memset(outPtr, 0, GetBlockSize());
|
||||
return false;
|
||||
}
|
||||
u32 hunk = blockNumber / blocksPerHunk;
|
||||
u32 blockInHunk = blockNumber % blocksPerHunk;
|
||||
|
||||
if (currentHunk != hunk) {
|
||||
chd_error err = chd_read(impl_->chd, hunk, readBuffer);
|
||||
if (err != CHDERR_NONE) {
|
||||
ERROR_LOG(LOADER, "CHD read failed: %d %d %s", blockNumber, hunk, chd_error_string(err));
|
||||
NotifyReadError();
|
||||
}
|
||||
}
|
||||
memcpy(outPtr, readBuffer + blockInHunk * impl_->header->unitbytes, GetBlockSize());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CHDFileBlockDevice::ReadBlocks(u32 minBlock, int count, u8 *outPtr) {
|
||||
if (minBlock >= numBlocks) {
|
||||
memset(outPtr, 0, GetBlockSize() * count);
|
||||
return false;
|
||||
}
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (!ReadBlock(minBlock + i, outPtr + i * GetBlockSize())) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -130,5 +130,23 @@ private:
|
|||
u8 *tempBuf;
|
||||
};
|
||||
|
||||
struct CHDImpl;
|
||||
|
||||
class CHDFileBlockDevice : public BlockDevice {
|
||||
public:
|
||||
CHDFileBlockDevice(FileLoader *fileLoader);
|
||||
~CHDFileBlockDevice();
|
||||
bool ReadBlock(int blockNumber, u8 *outPtr, bool uncached = false) override;
|
||||
bool ReadBlocks(u32 minBlock, int count, u8 *outPtr) override;
|
||||
u32 GetNumBlocks() override { return numBlocks; }
|
||||
bool IsDisc() override { return true; }
|
||||
|
||||
private:
|
||||
std::unique_ptr<CHDImpl> impl_;
|
||||
u8 *readBuffer;
|
||||
u32 currentHunk;
|
||||
u32 blocksPerHunk;
|
||||
u32 numBlocks;
|
||||
};
|
||||
|
||||
BlockDevice *constructBlockDevice(FileLoader *fileLoader);
|
||||
|
|
|
@ -159,16 +159,19 @@ static int Replace_memcpy() {
|
|||
RETURN(destPtr);
|
||||
|
||||
if (MemBlockInfoDetailed(bytes)) {
|
||||
// It's pretty common that games will copy video data.
|
||||
// Detect that by manually reading the tag when the size looks right.
|
||||
if (bytes == 512 * 272 * 4) {
|
||||
char tagData[128];
|
||||
size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy/", srcPtr, bytes);
|
||||
NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize);
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize);
|
||||
|
||||
// It's pretty common that games will copy video data.
|
||||
if (!strcmp(tagData, "ReplaceMemcpy/VideoDecode") || !strcmp(tagData, "ReplaceMemcpy/VideoDecodeRange")) {
|
||||
if (bytes == 512 * 272 * 4) {
|
||||
gpu->PerformWriteFormattedFromMemory(destPtr, bytes, 512, GE_FORMAT_8888);
|
||||
}
|
||||
} else {
|
||||
NotifyMemInfoCopy(destPtr, srcPtr, bytes, "ReplaceMemcpy/");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -212,16 +215,19 @@ static int Replace_memcpy_jak() {
|
|||
RETURN(destPtr);
|
||||
|
||||
if (MemBlockInfoDetailed(bytes)) {
|
||||
// It's pretty common that games will copy video data.
|
||||
// Detect that by manually reading the tag when the size looks right.
|
||||
if (bytes == 512 * 272 * 4) {
|
||||
char tagData[128];
|
||||
size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy/", srcPtr, bytes);
|
||||
NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize);
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize);
|
||||
|
||||
// It's pretty common that games will copy video data.
|
||||
if (!strcmp(tagData, "ReplaceMemcpy/VideoDecode") || !strcmp(tagData, "ReplaceMemcpy/VideoDecodeRange")) {
|
||||
if (bytes == 512 * 272 * 4) {
|
||||
gpu->PerformWriteFormattedFromMemory(destPtr, bytes, 512, GE_FORMAT_8888);
|
||||
}
|
||||
} else {
|
||||
NotifyMemInfoCopy(destPtr, srcPtr, bytes, "ReplaceMemcpy/");
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -252,10 +258,7 @@ static int Replace_memcpy16() {
|
|||
RETURN(destPtr);
|
||||
|
||||
if (MemBlockInfoDetailed(bytes)) {
|
||||
char tagData[128];
|
||||
size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpy16/", srcPtr, bytes);
|
||||
NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize);
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize);
|
||||
NotifyMemInfoCopy(destPtr, srcPtr, bytes, "ReplaceMemcpy16/");
|
||||
}
|
||||
|
||||
return 10 + bytes / 4; // approximation
|
||||
|
@ -294,10 +297,7 @@ static int Replace_memcpy_swizzled() {
|
|||
RETURN(0);
|
||||
|
||||
if (MemBlockInfoDetailed(pitch * h)) {
|
||||
char tagData[128];
|
||||
size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemcpySwizzle/", srcPtr, pitch * h);
|
||||
NotifyMemInfo(MemBlockFlags::READ, srcPtr, pitch * h, tagData, tagSize);
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, destPtr, pitch * h, tagData, tagSize);
|
||||
NotifyMemInfoCopy(destPtr, srcPtr, pitch * h, "ReplaceMemcpySwizzle/");
|
||||
}
|
||||
|
||||
return 10 + (pitch * h) / 4; // approximation
|
||||
|
@ -326,10 +326,7 @@ static int Replace_memmove() {
|
|||
RETURN(destPtr);
|
||||
|
||||
if (MemBlockInfoDetailed(bytes)) {
|
||||
char tagData[128];
|
||||
size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "ReplaceMemmove/", srcPtr, bytes);
|
||||
NotifyMemInfo(MemBlockFlags::READ, srcPtr, bytes, tagData, tagSize);
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, destPtr, bytes, tagData, tagSize);
|
||||
NotifyMemInfoCopy(destPtr, srcPtr, bytes, "ReplaceMemmove/");
|
||||
}
|
||||
|
||||
return 10 + bytes / 4; // approximation
|
||||
|
@ -1590,7 +1587,10 @@ std::vector<int> GetReplacementFuncIndexes(u64 hash, int funcSize) {
|
|||
return emptyResult;
|
||||
}
|
||||
|
||||
const ReplacementTableEntry *GetReplacementFunc(int i) {
|
||||
const ReplacementTableEntry *GetReplacementFunc(size_t i) {
|
||||
if (i >= ARRAY_SIZE(entries)) {
|
||||
return nullptr;
|
||||
}
|
||||
return &entries[i];
|
||||
}
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ void Replacement_Shutdown();
|
|||
|
||||
int GetNumReplacementFuncs();
|
||||
std::vector<int> GetReplacementFuncIndexes(u64 hash, int funcSize);
|
||||
const ReplacementTableEntry *GetReplacementFunc(int index);
|
||||
const ReplacementTableEntry *GetReplacementFunc(size_t index);
|
||||
|
||||
void WriteReplaceInstructions(u32 address, u64 hash, int size);
|
||||
void RestoreReplacedInstruction(u32 address);
|
||||
|
|
|
@ -51,12 +51,11 @@ static int __DmacMemcpy(u32 dst, u32 src, u32 size) {
|
|||
}
|
||||
if (!skip && size != 0) {
|
||||
currentMIPS->InvalidateICache(src, size);
|
||||
if (Memory::IsValidRange(dst, size) && Memory::IsValidRange(src, size)) {
|
||||
memcpy(Memory::GetPointerWriteUnchecked(dst), Memory::GetPointerUnchecked(src), size);
|
||||
}
|
||||
if (MemBlockInfoDetailed(size)) {
|
||||
char tagData[128];
|
||||
size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "DmacMemcpy/", src, size);
|
||||
Memory::Memcpy(dst, src, size, tagData, tagSize);
|
||||
} else {
|
||||
Memory::Memcpy(dst, src, size, "DmacMemcpy");
|
||||
NotifyMemInfoCopy(dst, src, size, "DmacMemcpy/");
|
||||
}
|
||||
currentMIPS->InvalidateICache(dst, size);
|
||||
}
|
||||
|
|
|
@ -1486,6 +1486,12 @@ static u32 sceIoLseek32Async(int id, int offset, int whence) {
|
|||
}
|
||||
|
||||
static FileNode *__IoOpen(int &error, const char *filename, int flags, int mode) {
|
||||
if (!filename) {
|
||||
// To prevent crashes. Not sure about the correct value.
|
||||
error = SCE_KERNEL_ERROR_ERRNO_FILE_NOT_FOUND;
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int access = FILEACCESS_NONE;
|
||||
if (flags & PSP_O_RDONLY)
|
||||
access |= FILEACCESS_READ;
|
||||
|
|
|
@ -657,10 +657,7 @@ static u32 sceKernelMemcpy(u32 dst, u32 src, u32 size)
|
|||
}
|
||||
|
||||
if (MemBlockInfoDetailed(size)) {
|
||||
char tagData[128];
|
||||
size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "KernelMemcpy/", src, size);
|
||||
NotifyMemInfo(MemBlockFlags::READ, src, size, tagData, tagSize);
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, dst, size, tagData, tagSize);
|
||||
NotifyMemInfoCopy(dst, src, size, "KernelMemcpy/");
|
||||
}
|
||||
|
||||
return dst;
|
||||
|
@ -693,10 +690,7 @@ static u32 sysclib_memcpy(u32 dst, u32 src, u32 size) {
|
|||
memcpy(Memory::GetPointerWriteUnchecked(dst), Memory::GetPointerUnchecked(src), size);
|
||||
}
|
||||
if (MemBlockInfoDetailed(size)) {
|
||||
char tagData[128];
|
||||
size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "KernelMemcpy/", src, size);
|
||||
NotifyMemInfo(MemBlockFlags::READ, src, size, tagData, tagSize);
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, dst, size, tagData, tagSize);
|
||||
NotifyMemInfoCopy(dst, src, size, "KernelMemcpy/");
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
@ -797,10 +791,7 @@ static u32 sysclib_memmove(u32 dst, u32 src, u32 size) {
|
|||
memmove(Memory::GetPointerWriteUnchecked(dst), Memory::GetPointerUnchecked(src), size);
|
||||
}
|
||||
if (MemBlockInfoDetailed(size)) {
|
||||
char tagData[128];
|
||||
size_t tagSize = FormatMemWriteTagAt(tagData, sizeof(tagData), "KernelMemmove/", src, size);
|
||||
NotifyMemInfo(MemBlockFlags::READ, src, size, tagData, tagSize);
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, dst, size, tagData, tagSize);
|
||||
NotifyMemInfoCopy(dst, src, size, "KernelMemmove/");
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -516,11 +516,11 @@ bool InputMappingsFromPspButton(int btn, std::vector<MultiInputMapping> *mapping
|
|||
return false;
|
||||
}
|
||||
bool mapped = false;
|
||||
for (auto iter2 = iter->second.begin(); iter2 != iter->second.end(); ++iter2) {
|
||||
bool ignore = ignoreMouse && iter2->HasMouse();
|
||||
for (auto &iter2 : iter->second) {
|
||||
bool ignore = ignoreMouse && iter2.HasMouse();
|
||||
if (mappings && !ignore) {
|
||||
mapped = true;
|
||||
mappings->push_back(*iter2);
|
||||
mappings->push_back(iter2);
|
||||
}
|
||||
}
|
||||
return mapped;
|
||||
|
@ -536,8 +536,6 @@ bool PspButtonHasMappings(int btn) {
|
|||
}
|
||||
|
||||
MappedAnalogAxes MappedAxesForDevice(InputDeviceID deviceId) {
|
||||
MappedAnalogAxes result{};
|
||||
|
||||
// Find the axisId mapped for a specific virtual button.
|
||||
auto findAxisId = [&](int btn) -> MappedAnalogAxis {
|
||||
MappedAnalogAxis info{ -1 };
|
||||
|
@ -563,6 +561,7 @@ MappedAnalogAxes MappedAxesForDevice(InputDeviceID deviceId) {
|
|||
return MappedAnalogAxis{ -1 };
|
||||
};
|
||||
|
||||
MappedAnalogAxes result;
|
||||
std::lock_guard<std::recursive_mutex> guard(g_controllerMapLock);
|
||||
result.leftX = findAxisIdPair(VIRTKEY_AXIS_X_MIN, VIRTKEY_AXIS_X_MAX);
|
||||
result.leftY = findAxisIdPair(VIRTKEY_AXIS_Y_MIN, VIRTKEY_AXIS_Y_MAX);
|
||||
|
@ -621,6 +620,7 @@ bool ReplaceSingleKeyMapping(int btn, int index, MultiInputMapping key) {
|
|||
}
|
||||
|
||||
void DeleteNthMapping(int key, int number) {
|
||||
std::lock_guard<std::recursive_mutex> guard(g_controllerMapLock);
|
||||
auto iter = g_controllerMap.find(key);
|
||||
if (iter != g_controllerMap.end()) {
|
||||
if (number < iter->second.size()) {
|
||||
|
@ -699,6 +699,8 @@ void LoadFromIni(IniFile &file) {
|
|||
return;
|
||||
}
|
||||
|
||||
std::lock_guard<std::recursive_mutex> guard(g_controllerMapLock);
|
||||
|
||||
Section *controls = file.GetOrCreateSection("ControlMapping");
|
||||
for (size_t i = 0; i < ARRAY_SIZE(psp_button_names); i++) {
|
||||
std::string value;
|
||||
|
@ -730,6 +732,8 @@ void LoadFromIni(IniFile &file) {
|
|||
void SaveToIni(IniFile &file) {
|
||||
Section *controls = file.GetOrCreateSection("ControlMapping");
|
||||
|
||||
std::lock_guard<std::recursive_mutex> guard(g_controllerMapLock);
|
||||
|
||||
for (size_t i = 0; i < ARRAY_SIZE(psp_button_names); i++) {
|
||||
std::vector<MultiInputMapping> keys;
|
||||
InputMappingsFromPspButton(psp_button_names[i].key, &keys, false);
|
||||
|
|
|
@ -94,6 +94,8 @@ IdentifiedFileType Identify_File(FileLoader *fileLoader, std::string *errorStrin
|
|||
return IdentifiedFileType::PSP_ISO;
|
||||
} else if (extension == ".cso") {
|
||||
return IdentifiedFileType::PSP_ISO;
|
||||
} else if (extension == ".chd") {
|
||||
return IdentifiedFileType::PSP_ISO;
|
||||
} else if (extension == ".ppst") {
|
||||
return IdentifiedFileType::PPSSPP_SAVESTATE;
|
||||
} else if (extension == ".ppdmp") {
|
||||
|
|
|
@ -561,7 +561,7 @@ void ArmJit::Comp_ReplacementFunc(MIPSOpcode op)
|
|||
|
||||
const ReplacementTableEntry *entry = GetReplacementFunc(index);
|
||||
if (!entry) {
|
||||
ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding);
|
||||
ERROR_LOG_REPORT_ONCE(replFunc, HLE, "Invalid replacement op %08x at %08x", op.encoding, js.compilerPC);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -745,7 +745,9 @@ void ArmJit::UpdateRoundingMode(u32 fcr31) {
|
|||
// I don't think this gives us that much benefit.
|
||||
void ArmJit::WriteExit(u32 destination, int exit_num)
|
||||
{
|
||||
// TODO: Check destination is valid and trigger exception.
|
||||
// NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks).
|
||||
_assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num. dest=%08x", destination);
|
||||
|
||||
WriteDownCount();
|
||||
//If nobody has taken care of this yet (this can be removed when all branches are done)
|
||||
JitBlock *b = js.curBlock;
|
||||
|
|
|
@ -1504,7 +1504,7 @@ namespace MIPSComp {
|
|||
void Arm64Jit::Comp_VCrossQuat(MIPSOpcode op) {
|
||||
// This op does not support prefixes anyway.
|
||||
CONDITIONAL_DISABLE(VFPU_VEC);
|
||||
if (js.HasUnknownPrefix())
|
||||
if (!js.HasNoPrefix())
|
||||
DISABLE;
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
|
@ -1521,20 +1521,26 @@ namespace MIPSComp {
|
|||
|
||||
if (sz == V_Triple) {
|
||||
MIPSReg temp3 = fpr.GetTempV();
|
||||
MIPSReg temp4 = fpr.GetTempV();
|
||||
fpr.MapRegV(temp3, MAP_DIRTY | MAP_NOINIT);
|
||||
fpr.MapRegV(temp4, MAP_DIRTY | MAP_NOINIT);
|
||||
// Cross product vcrsp.t
|
||||
|
||||
// Compute X
|
||||
fp.FMUL(S0, fpr.V(sregs[1]), fpr.V(tregs[2]));
|
||||
fp.FMSUB(S0, fpr.V(sregs[2]), fpr.V(tregs[1]), S0);
|
||||
// Note: using FMSUB here causes accuracy issues, see #18203.
|
||||
// Compute X: s[1] * t[2] - s[2] * t[1]
|
||||
fp.FMUL(fpr.V(temp3), fpr.V(sregs[1]), fpr.V(tregs[2]));
|
||||
fp.FMUL(fpr.V(temp4), fpr.V(sregs[2]), fpr.V(tregs[1]));
|
||||
fp.FSUB(S0, fpr.V(temp3), fpr.V(temp4));
|
||||
|
||||
// Compute Y
|
||||
fp.FMUL(S1, fpr.V(sregs[2]), fpr.V(tregs[0]));
|
||||
fp.FMSUB(S1, fpr.V(sregs[0]), fpr.V(tregs[2]), S1);
|
||||
// Compute Y: s[2] * t[0] - s[0] * t[2]
|
||||
fp.FMUL(fpr.V(temp3), fpr.V(sregs[2]), fpr.V(tregs[0]));
|
||||
fp.FMUL(fpr.V(temp4), fpr.V(sregs[0]), fpr.V(tregs[2]));
|
||||
fp.FSUB(S1, fpr.V(temp3), fpr.V(temp4));
|
||||
|
||||
// Compute Z
|
||||
// Compute Z: s[0] * t[1] - s[1] * t[0]
|
||||
fp.FMUL(fpr.V(temp3), fpr.V(sregs[0]), fpr.V(tregs[1]));
|
||||
fp.FMSUB(fpr.V(temp3), fpr.V(sregs[1]), fpr.V(tregs[0]), fpr.V(temp3));
|
||||
fp.FMUL(fpr.V(temp4), fpr.V(sregs[1]), fpr.V(tregs[0]));
|
||||
fp.FSUB(fpr.V(temp3), fpr.V(temp3), fpr.V(temp4));
|
||||
|
||||
fpr.MapRegsAndSpillLockV(dregs, sz, MAP_NOINIT);
|
||||
fp.FMOV(fpr.V(dregs[0]), S0);
|
||||
|
|
|
@ -50,8 +50,18 @@ static void ShowPC(void *membase, void *jitbase) {
|
|||
}
|
||||
|
||||
void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
BeginWrite(GetMemoryProtectPageSize());
|
||||
// This will be used as a writable scratch area, always 32-bit accessible.
|
||||
const u8 *start = AlignCodePage();
|
||||
if (DebugProfilerEnabled()) {
|
||||
ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
|
||||
Write32(0);
|
||||
hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr();
|
||||
Write32(0);
|
||||
}
|
||||
|
||||
const u8 *disasmStart = AlignCodePage();
|
||||
BeginWrite(GetMemoryProtectPageSize());
|
||||
|
||||
if (jo.useStaticAlloc) {
|
||||
saveStaticRegisters_ = AlignCode16();
|
||||
|
@ -63,8 +73,6 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
regs_.EmitLoadStaticRegisters();
|
||||
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
RET();
|
||||
|
||||
start = saveStaticRegisters_;
|
||||
} else {
|
||||
saveStaticRegisters_ = nullptr;
|
||||
loadStaticRegisters_ = nullptr;
|
||||
|
@ -152,13 +160,17 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
MOVI2R(JITBASEREG, (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE);
|
||||
|
||||
LoadStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
MovFromPC(SCRATCH1);
|
||||
WriteDebugPC(SCRATCH1);
|
||||
outerLoopPCInSCRATCH1_ = GetCodePtr();
|
||||
MovToPC(SCRATCH1);
|
||||
outerLoop_ = GetCodePtr();
|
||||
SaveStaticRegisters(); // Advance can change the downcount, so must save/restore
|
||||
RestoreRoundingMode(true);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
|
||||
QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
ApplyRoundingMode(true);
|
||||
LoadStaticRegisters();
|
||||
|
||||
|
@ -191,6 +203,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
}
|
||||
|
||||
MovFromPC(SCRATCH1);
|
||||
WriteDebugPC(SCRATCH1);
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK);
|
||||
#endif
|
||||
|
@ -206,7 +219,9 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
|
||||
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
|
||||
RestoreRoundingMode(true);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
|
||||
QuickCallFunction(SCRATCH1_64, &MIPSComp::JitAt);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
ApplyRoundingMode(true);
|
||||
|
||||
// Let's just dispatch again, we'll enter the block since we know it's there.
|
||||
|
@ -221,6 +236,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
const uint8_t *quitLoop = GetCodePtr();
|
||||
SetJumpTarget(badCoreState);
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
|
||||
SaveStaticRegisters();
|
||||
RestoreRoundingMode(true);
|
||||
|
||||
|
@ -251,7 +267,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
|
||||
// Leave this at the end, add more stuff above.
|
||||
if (enableDisasm) {
|
||||
std::vector<std::string> lines = DisassembleArm64(start, (int)(GetCodePtr() - start));
|
||||
std::vector<std::string> lines = DisassembleArm64(disasmStart, (int)(GetCodePtr() - disasmStart));
|
||||
for (auto s : lines) {
|
||||
INFO_LOG(JIT, "%s", s.c_str());
|
||||
}
|
||||
|
|
|
@ -170,9 +170,18 @@ void Arm64JitBackend::CompIR_Compare(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::SltU:
|
||||
if (regs_.IsGPRImm(inst.src1) && regs_.GetGPRImm(inst.src1) == 0) {
|
||||
// This is kinda common, same as != 0. Avoid flushing src1.
|
||||
regs_.SpillLockGPR(inst.src2, inst.dest);
|
||||
regs_.MapGPR(inst.src2);
|
||||
regs_.MapGPR(inst.dest, MIPSMap::NOINIT);
|
||||
CMP(regs_.R(inst.src2), 0);
|
||||
CSET(regs_.R(inst.dest), CC_NEQ);
|
||||
} else {
|
||||
regs_.Map(inst);
|
||||
CMP(regs_.R(inst.src1), regs_.R(inst.src2));
|
||||
CSET(regs_.R(inst.dest), CC_LO);
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::SltUConst:
|
||||
|
|
|
@ -298,6 +298,11 @@ void Arm64JitBackend::CompIR_FCompare(IRInst inst) {
|
|||
|
||||
case IROp::FCmpVfpuAggregate:
|
||||
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
|
||||
if (inst.dest == 1) {
|
||||
// Just replicate the lowest bit to the others.
|
||||
BFI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), 4, 1);
|
||||
BFI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), 5, 1);
|
||||
} else {
|
||||
MOVI2R(SCRATCH1, inst.dest);
|
||||
// Grab the any bit.
|
||||
TST(regs_.R(IRREG_VFPU_CC), SCRATCH1);
|
||||
|
@ -309,6 +314,7 @@ void Arm64JitBackend::CompIR_FCompare(IRInst inst) {
|
|||
// Insert the bits into place.
|
||||
BFI(regs_.R(IRREG_VFPU_CC), SCRATCH2, 4, 1);
|
||||
BFI(regs_.R(IRREG_VFPU_CC), SCRATCH1, 5, 1);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -502,6 +508,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {
|
|||
|
||||
auto callFuncF_F = [&](float (*func)(float)) {
|
||||
regs_.FlushBeforeCall();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
|
||||
|
||||
// It might be in a non-volatile register.
|
||||
// TODO: May have to handle a transfer if SIMD here.
|
||||
if (regs_.IsFPRMapped(inst.src1)) {
|
||||
|
@ -521,6 +529,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {
|
|||
if (regs_.F(inst.dest) != S0) {
|
||||
fp_.FMOV(regs_.F(inst.dest), S0);
|
||||
}
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
};
|
||||
|
||||
switch (inst.op) {
|
||||
|
|
|
@ -80,7 +80,12 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) {
|
|||
// If it's about to be clobbered, don't waste time pointerifying. Use displacement.
|
||||
bool clobbersSrc1 = !readsFromSrc1 && regs_.IsGPRClobbered(inst.src1);
|
||||
|
||||
int32_t imm = (int32_t)inst.constant;
|
||||
int64_t imm = (int32_t)inst.constant;
|
||||
// It can't be this negative, must be a constant address with the top bit set.
|
||||
if ((imm & 0xC0000000) == 0x80000000) {
|
||||
imm = (uint64_t)(uint32_t)inst.constant;
|
||||
}
|
||||
|
||||
LoadStoreArg addrArg;
|
||||
if (inst.src1 == MIPS_REG_ZERO) {
|
||||
// The constant gets applied later.
|
||||
|
@ -100,7 +105,7 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) {
|
|||
|
||||
// Since we can't modify src1, let's just use a temp reg while copying.
|
||||
if (!addrArg.useRegisterOffset) {
|
||||
ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), (s64)imm, SCRATCH2);
|
||||
ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), imm, SCRATCH2);
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK, SCRATCH2);
|
||||
#endif
|
||||
|
@ -114,7 +119,7 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) {
|
|||
// The offset gets set later.
|
||||
addrArg.base = regs_.MapGPRAsPointer(inst.src1);
|
||||
} else {
|
||||
ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), (s64)imm, SCRATCH2);
|
||||
ADDI2R(SCRATCH1, regs_.MapGPR(inst.src1), imm, SCRATCH2);
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK, SCRATCH2);
|
||||
#endif
|
||||
|
@ -137,15 +142,15 @@ Arm64JitBackend::LoadStoreArg Arm64JitBackend::PrepareSrc1Address(IRInst inst) {
|
|||
int scale = IROpToByteWidth(inst.op);
|
||||
if (imm > 0 && (imm & (scale - 1)) == 0 && imm <= 0xFFF * scale) {
|
||||
// Okay great, use the LDR/STR form.
|
||||
addrArg.immOffset = imm;
|
||||
addrArg.immOffset = (int)imm;
|
||||
addrArg.useUnscaled = false;
|
||||
} else if (imm >= -256 && imm < 256) {
|
||||
// An unscaled offset (LDUR/STUR) should work fine for this range.
|
||||
addrArg.immOffset = imm;
|
||||
addrArg.immOffset = (int)imm;
|
||||
addrArg.useUnscaled = true;
|
||||
} else {
|
||||
// No luck, we'll need to load into a register.
|
||||
MOVI2R(SCRATCH1, (s64)imm);
|
||||
MOVI2R(SCRATCH1, imm);
|
||||
addrArg.regOffset = SCRATCH1;
|
||||
addrArg.useRegisterOffset = true;
|
||||
addrArg.signExtendRegOffset = true;
|
||||
|
|
|
@ -21,9 +21,11 @@
|
|||
|
||||
#include "Common/Profiler/Profiler.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/Debugger/Breakpoints.h"
|
||||
#include "Core/HLE/HLE.h"
|
||||
#include "Core/HLE/ReplaceTables.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/MIPS/MIPSAnalyst.h"
|
||||
#include "Core/MIPS/IR/IRInterpreter.h"
|
||||
#include "Core/MIPS/ARM64/Arm64IRJit.h"
|
||||
#include "Core/MIPS/ARM64/Arm64IRRegCache.h"
|
||||
|
@ -70,6 +72,7 @@ void Arm64JitBackend::CompIR_Basic(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::SetPCConst:
|
||||
lastConstPC_ = inst.constant;
|
||||
MOVI2R(SCRATCH1, inst.constant);
|
||||
MovToPC(SCRATCH1);
|
||||
break;
|
||||
|
@ -85,29 +88,12 @@ void Arm64JitBackend::CompIR_Breakpoint(IRInst inst) {
|
|||
|
||||
switch (inst.op) {
|
||||
case IROp::Breakpoint:
|
||||
{
|
||||
FlushAll();
|
||||
// Note: the constant could be a delay slot.
|
||||
MOVI2R(W0, inst.constant);
|
||||
QuickCallFunction(SCRATCH2_64, &IRRunBreakpoint);
|
||||
break;
|
||||
|
||||
case IROp::MemoryCheck:
|
||||
{
|
||||
ARM64Reg addrBase = regs_.MapGPR(inst.src1);
|
||||
FlushAll();
|
||||
ADDI2R(W1, addrBase, inst.constant, SCRATCH1);
|
||||
MovFromPC(W0);
|
||||
ADDI2R(W0, W0, inst.dest, SCRATCH1);
|
||||
QuickCallFunction(SCRATCH2_64, &IRRunMemCheck);
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
INVALIDOP;
|
||||
break;
|
||||
}
|
||||
|
||||
// Both return a flag on whether to bail out.
|
||||
ptrdiff_t distance = dispatcherCheckCoreState_ - GetCodePointer();
|
||||
if (distance >= -0x100000 && distance < 0x100000) {
|
||||
CBNZ(W0, dispatcherCheckCoreState_);
|
||||
|
@ -116,6 +102,104 @@ void Arm64JitBackend::CompIR_Breakpoint(IRInst inst) {
|
|||
B(dispatcherCheckCoreState_);
|
||||
SetJumpTarget(keepOnKeepingOn);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case IROp::MemoryCheck:
|
||||
if (regs_.IsGPRImm(inst.src1)) {
|
||||
uint32_t iaddr = regs_.GetGPRImm(inst.src1) + inst.constant;
|
||||
uint32_t checkedPC = lastConstPC_ + inst.dest;
|
||||
int size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
|
||||
if (size == 0) {
|
||||
checkedPC += 4;
|
||||
size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
|
||||
}
|
||||
bool isWrite = MIPSAnalyst::IsOpMemoryWrite(checkedPC);
|
||||
|
||||
MemCheck check;
|
||||
if (CBreakPoints::GetMemCheckInRange(iaddr, size, &check)) {
|
||||
if (!(check.cond & MEMCHECK_READ) && !isWrite)
|
||||
break;
|
||||
if (!(check.cond & (MEMCHECK_WRITE | MEMCHECK_WRITE_ONCHANGE)) && isWrite)
|
||||
break;
|
||||
|
||||
// We need to flush, or conditions and log expressions will see old register values.
|
||||
FlushAll();
|
||||
|
||||
MOVI2R(W0, checkedPC);
|
||||
MOVI2R(W1, iaddr);
|
||||
QuickCallFunction(SCRATCH2_64, &IRRunMemCheck);
|
||||
|
||||
ptrdiff_t distance = dispatcherCheckCoreState_ - GetCodePointer();
|
||||
if (distance >= -0x100000 && distance < 0x100000) {
|
||||
CBNZ(W0, dispatcherCheckCoreState_);
|
||||
} else {
|
||||
FixupBranch keepOnKeepingOn = CBZ(W0);
|
||||
B(dispatcherCheckCoreState_);
|
||||
SetJumpTarget(keepOnKeepingOn);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
uint32_t checkedPC = lastConstPC_ + inst.dest;
|
||||
int size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
|
||||
if (size == 0) {
|
||||
checkedPC += 4;
|
||||
size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
|
||||
}
|
||||
bool isWrite = MIPSAnalyst::IsOpMemoryWrite(checkedPC);
|
||||
|
||||
const auto memchecks = CBreakPoints::GetMemCheckRanges(isWrite);
|
||||
// We can trivially skip if there are no checks for this type (i.e. read vs write.)
|
||||
if (memchecks.empty())
|
||||
break;
|
||||
|
||||
ARM64Reg addrBase = regs_.MapGPR(inst.src1);
|
||||
ADDI2R(SCRATCH1, addrBase, inst.constant, SCRATCH2);
|
||||
|
||||
// We need to flush, or conditions and log expressions will see old register values.
|
||||
FlushAll();
|
||||
|
||||
std::vector<FixupBranch> hitChecks;
|
||||
for (auto it : memchecks) {
|
||||
if (it.end != 0) {
|
||||
CMPI2R(SCRATCH1, it.start - size, SCRATCH2);
|
||||
MOVI2R(SCRATCH2, it.end);
|
||||
CCMP(SCRATCH1, SCRATCH2, 0xF, CC_HI);
|
||||
hitChecks.push_back(B(CC_LO));
|
||||
} else {
|
||||
CMPI2R(SCRATCH1, it.start, SCRATCH2);
|
||||
hitChecks.push_back(B(CC_EQ));
|
||||
}
|
||||
}
|
||||
|
||||
FixupBranch noHits = B();
|
||||
|
||||
// Okay, now land any hit here.
|
||||
for (auto &fixup : hitChecks)
|
||||
SetJumpTarget(fixup);
|
||||
hitChecks.clear();
|
||||
|
||||
MOVI2R(W0, checkedPC);
|
||||
MOV(W1, SCRATCH1);
|
||||
QuickCallFunction(SCRATCH2_64, &IRRunMemCheck);
|
||||
|
||||
ptrdiff_t distance = dispatcherCheckCoreState_ - GetCodePointer();
|
||||
if (distance >= -0x100000 && distance < 0x100000) {
|
||||
CBNZ(W0, dispatcherCheckCoreState_);
|
||||
} else {
|
||||
FixupBranch keepOnKeepingOn = CBZ(W0);
|
||||
B(dispatcherCheckCoreState_);
|
||||
SetJumpTarget(keepOnKeepingOn);
|
||||
}
|
||||
|
||||
SetJumpTarget(noHits);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
INVALIDOP;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64JitBackend::CompIR_System(IRInst inst) {
|
||||
|
@ -126,6 +210,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
|
|||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
|
||||
#ifdef USE_PROFILER
|
||||
// When profiling, we can't skip CallSyscall, since it times syscalls.
|
||||
MOVI2R(W0, inst.constant);
|
||||
|
@ -145,6 +230,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
|
|||
}
|
||||
#endif
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
// This is always followed by an ExitToPC, where we check coreState.
|
||||
break;
|
||||
|
@ -152,7 +238,9 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
|
|||
case IROp::CallReplacement:
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
|
||||
QuickCallFunction(SCRATCH2_64, GetReplacementFunc(inst.constant)->replaceFunc);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
SUB(DOWNCOUNTREG, DOWNCOUNTREG, W0);
|
||||
break;
|
||||
|
@ -274,6 +362,66 @@ void Arm64JitBackend::CompIR_ValidateAddress(IRInst inst) {
|
|||
INVALIDOP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (regs_.IsGPRMappedAsPointer(inst.src1)) {
|
||||
if (!jo.enablePointerify) {
|
||||
SUB(SCRATCH1_64, regs_.RPtr(inst.src1), MEMBASEREG);
|
||||
ADDI2R(SCRATCH1, SCRATCH1, inst.constant, SCRATCH2);
|
||||
} else {
|
||||
ADDI2R(SCRATCH1, regs_.R(inst.src1), inst.constant, SCRATCH2);
|
||||
}
|
||||
} else {
|
||||
regs_.Map(inst);
|
||||
ADDI2R(SCRATCH1, regs_.R(inst.src1), inst.constant, SCRATCH2);
|
||||
}
|
||||
ANDI2R(SCRATCH1, SCRATCH1, 0x3FFFFFFF, SCRATCH2);
|
||||
|
||||
std::vector<FixupBranch> validJumps;
|
||||
|
||||
FixupBranch unaligned;
|
||||
if (alignment == 2) {
|
||||
unaligned = TBNZ(SCRATCH1, 0);
|
||||
} else if (alignment != 1) {
|
||||
TSTI2R(SCRATCH1, alignment - 1, SCRATCH2);
|
||||
unaligned = B(CC_NEQ);
|
||||
}
|
||||
|
||||
CMPI2R(SCRATCH1, PSP_GetUserMemoryEnd() - alignment, SCRATCH2);
|
||||
FixupBranch tooHighRAM = B(CC_HI);
|
||||
CMPI2R(SCRATCH1, PSP_GetKernelMemoryBase(), SCRATCH2);
|
||||
validJumps.push_back(B(CC_HS));
|
||||
|
||||
CMPI2R(SCRATCH1, PSP_GetVidMemEnd() - alignment, SCRATCH2);
|
||||
FixupBranch tooHighVid = B(CC_HI);
|
||||
CMPI2R(SCRATCH1, PSP_GetVidMemBase(), SCRATCH2);
|
||||
validJumps.push_back(B(CC_HS));
|
||||
|
||||
CMPI2R(SCRATCH1, PSP_GetScratchpadMemoryEnd() - alignment, SCRATCH2);
|
||||
FixupBranch tooHighScratch = B(CC_HI);
|
||||
CMPI2R(SCRATCH1, PSP_GetScratchpadMemoryBase(), SCRATCH2);
|
||||
validJumps.push_back(B(CC_HS));
|
||||
|
||||
if (alignment != 1)
|
||||
SetJumpTarget(unaligned);
|
||||
SetJumpTarget(tooHighRAM);
|
||||
SetJumpTarget(tooHighVid);
|
||||
SetJumpTarget(tooHighScratch);
|
||||
|
||||
// If we got here, something unusual and bad happened, so we'll always go back to the dispatcher.
|
||||
// Because of that, we can avoid flushing outside this case.
|
||||
auto regsCopy = regs_;
|
||||
regsCopy.FlushAll();
|
||||
|
||||
// Ignores the return value, always returns to the dispatcher.
|
||||
// Otherwise would need a thunk to restore regs.
|
||||
MOV(W0, SCRATCH1);
|
||||
MOVI2R(W1, alignment);
|
||||
MOVI2R(W2, isWrite ? 1 : 0);
|
||||
QuickCallFunction(SCRATCH2, &ReportBadAddress);
|
||||
B(dispatcherCheckCoreState_);
|
||||
|
||||
for (FixupBranch &b : validJumps)
|
||||
SetJumpTarget(b);
|
||||
}
|
||||
|
||||
} // namespace MIPSComp
|
||||
|
|
|
@ -76,6 +76,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
|
|||
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
|
||||
wroteCheckedOffset = true;
|
||||
|
||||
WriteDebugPC(startPC);
|
||||
|
||||
// Check the sign bit to check if negative.
|
||||
FixupBranch normalEntry = TBZ(DOWNCOUNTREG, 31);
|
||||
MOVI2R(SCRATCH1, startPC);
|
||||
|
@ -87,6 +89,7 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
|
|||
const u8 *blockStart = GetCodePointer();
|
||||
block->SetTargetOffset((int)GetOffset(blockStart));
|
||||
compilingBlockNum_ = block_num;
|
||||
lastConstPC_ = 0;
|
||||
|
||||
regs_.Start(block);
|
||||
|
||||
|
@ -128,6 +131,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
|
|||
}
|
||||
|
||||
if (jo.enableBlocklink && jo.useBackJump) {
|
||||
WriteDebugPC(startPC);
|
||||
|
||||
// Small blocks are common, check if it's < 32KB long.
|
||||
ptrdiff_t distance = blockStart - GetCodePointer();
|
||||
if (distance >= -0x8000 && distance < 0x8000) {
|
||||
|
@ -228,8 +233,10 @@ void Arm64JitBackend::CompIR_Generic(IRInst inst) {
|
|||
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
|
||||
MOVI2R(X0, value);
|
||||
QuickCallFunction(SCRATCH2_64, &DoIRInst);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
|
||||
// We only need to check the return value if it's a potential exit.
|
||||
|
@ -255,12 +262,14 @@ void Arm64JitBackend::CompIR_Interpret(IRInst inst) {
|
|||
// IR protects us against this being a branching instruction (well, hopefully.)
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
|
||||
if (DebugStatsEnabled()) {
|
||||
MOVP2R(X0, MIPSGetName(op));
|
||||
QuickCallFunction(SCRATCH2_64, &NotifyMIPSInterpret);
|
||||
}
|
||||
MOVI2R(X0, inst.constant);
|
||||
QuickCallFunction(SCRATCH2_64, MIPSGetInterpretFunc(op));
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
}
|
||||
|
||||
|
@ -353,6 +362,32 @@ void Arm64JitBackend::MovToPC(ARM64Reg r) {
|
|||
STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
|
||||
}
|
||||
|
||||
void Arm64JitBackend::WriteDebugPC(uint32_t pc) {
|
||||
if (hooks_.profilerPC) {
|
||||
int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());
|
||||
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
|
||||
MOVI2R(SCRATCH1, pc);
|
||||
STR(SCRATCH1, JITBASEREG, SCRATCH2);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64JitBackend::WriteDebugPC(ARM64Reg r) {
|
||||
if (hooks_.profilerPC) {
|
||||
int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());
|
||||
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
|
||||
STR(r, JITBASEREG, SCRATCH2);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
|
||||
if (hooks_.profilerPC) {
|
||||
int offset = (int)((const u8 *)hooks_.profilerStatus - GetBasePtr());
|
||||
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
|
||||
MOVI2R(SCRATCH1, (int)status);
|
||||
STR(SCRATCH1, JITBASEREG, SCRATCH2);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64JitBackend::SaveStaticRegisters() {
|
||||
if (jo.useStaticAlloc) {
|
||||
QuickCallFunction(SCRATCH2_64, saveStaticRegisters_);
|
||||
|
|
|
@ -57,6 +57,11 @@ private:
|
|||
void UpdateRoundingMode(bool force = false);
|
||||
void MovFromPC(Arm64Gen::ARM64Reg r);
|
||||
void MovToPC(Arm64Gen::ARM64Reg r);
|
||||
// Destroys SCRATCH2.
|
||||
void WriteDebugPC(uint32_t pc);
|
||||
void WriteDebugPC(Arm64Gen::ARM64Reg r);
|
||||
// Destroys SCRATCH2.
|
||||
void WriteDebugProfilerStatus(IRProfilerStatus status);
|
||||
|
||||
void SaveStaticRegisters();
|
||||
void LoadStaticRegisters();
|
||||
|
@ -145,6 +150,8 @@ private:
|
|||
int jitStartOffset_ = 0;
|
||||
int compilingBlockNum_ = -1;
|
||||
int logBlocks_ = 0;
|
||||
// Only useful in breakpoints, where it's set immediately prior.
|
||||
uint32_t lastConstPC_ = 0;
|
||||
};
|
||||
|
||||
class Arm64IRJit : public IRNativeJit {
|
||||
|
|
|
@ -347,7 +347,7 @@ void Arm64IRRegCache::AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) {
|
|||
}
|
||||
}
|
||||
|
||||
bool Arm64IRRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) {
|
||||
bool Arm64IRRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) {
|
||||
// No special flags, skip the check for a little speed.
|
||||
return true;
|
||||
}
|
||||
|
@ -437,19 +437,21 @@ void Arm64IRRegCache::FlushAll(bool gprs, bool fprs) {
|
|||
// Note: make sure not to change the registers when flushing:
|
||||
// Branching code may expect the armreg to retain its value.
|
||||
|
||||
auto needsFlush = [&](IRReg i) {
|
||||
if (mr[i].loc != MIPSLoc::MEM || mr[i].isStatic)
|
||||
return false;
|
||||
if (mr[i].nReg == -1 || !nr[mr[i].nReg].isDirty)
|
||||
return false;
|
||||
return true;
|
||||
};
|
||||
|
||||
// Try to flush in pairs when possible.
|
||||
for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) {
|
||||
if (mr[i].loc == MIPSLoc::MEM || mr[i].loc == MIPSLoc::MEM || mr[i].isStatic || mr[i + 1].isStatic)
|
||||
if (!needsFlush(i) || !needsFlush(i + 1))
|
||||
continue;
|
||||
// Ignore multilane regs. Could handle with more smartness...
|
||||
if (mr[i].lane != -1 || mr[i + 1].lane != -1)
|
||||
continue;
|
||||
if (mr[i].nReg != -1 && !nr[mr[i].nReg].isDirty)
|
||||
continue;
|
||||
if (mr[i + 1].nReg != -1 && !nr[mr[i + 1].nReg].isDirty)
|
||||
continue;
|
||||
if (mr[i].loc == MIPSLoc::MEM || mr[i + 1].loc == MIPSLoc::MEM)
|
||||
continue;
|
||||
|
||||
int offset = GetMipsRegOffset(i);
|
||||
|
||||
|
|
|
@ -86,7 +86,7 @@ protected:
|
|||
const int *GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &count, int &base) const override;
|
||||
void AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) override;
|
||||
|
||||
bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) override;
|
||||
bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) override;
|
||||
void LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) override;
|
||||
void StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) override;
|
||||
void SetNativeRegValue(IRNativeReg nreg, uint32_t imm) override;
|
||||
|
|
|
@ -562,7 +562,8 @@ void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op)
|
|||
|
||||
const ReplacementTableEntry *entry = GetReplacementFunc(index);
|
||||
if (!entry) {
|
||||
ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding);
|
||||
ERROR_LOG_REPORT_ONCE(replFunc, HLE, "Invalid replacement op %08x at %08x", op.encoding, js.compilerPC);
|
||||
// TODO: What should we do here? We're way off in the weeds probably.
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -724,7 +725,10 @@ void Arm64Jit::UpdateRoundingMode(u32 fcr31) {
|
|||
// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
|
||||
// I don't think this gives us that much benefit.
|
||||
void Arm64Jit::WriteExit(u32 destination, int exit_num) {
|
||||
// TODO: Check destination is valid and trigger exception.
|
||||
// NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks).
|
||||
_assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num. dest=%08x", destination);
|
||||
|
||||
// NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks).
|
||||
WriteDownCount();
|
||||
//If nobody has taken care of this yet (this can be removed when all branches are done)
|
||||
JitBlock *b = js.curBlock;
|
||||
|
|
|
@ -1675,7 +1675,7 @@ namespace MIPSComp {
|
|||
if (homogenous) {
|
||||
// This is probably even what the hardware basically does, wiring t[3] to 1.0f.
|
||||
ir.Write(IROp::Vec4Init, IRVTEMP_PFX_T, (int)Vec4Init::AllONE);
|
||||
ir.Write(IROp::Vec4Blend, IRVTEMP_PFX_T, t, IRVTEMP_PFX_T, 0x7);
|
||||
ir.Write(IROp::Vec4Blend, IRVTEMP_PFX_T, IRVTEMP_PFX_T, t, 0x7);
|
||||
t = IRVTEMP_PFX_T;
|
||||
}
|
||||
for (int i = 0; i < 4; i++)
|
||||
|
@ -1771,7 +1771,20 @@ namespace MIPSComp {
|
|||
// d[0] = s[0]*t[1] - s[1]*t[0]
|
||||
// Note: this operates on two vectors, not a 2x2 matrix.
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
if (sz != V_Pair)
|
||||
DISABLE;
|
||||
|
||||
u8 sregs[4], dregs[4], tregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixT(tregs, sz, _VT);
|
||||
GetVectorRegsPrefixD(dregs, V_Single, _VD);
|
||||
|
||||
ir.Write(IROp::FMul, IRVTEMP_0, sregs[1], tregs[0]);
|
||||
ir.Write(IROp::FMul, dregs[0], sregs[0], tregs[1]);
|
||||
ir.Write(IROp::FSub, dregs[0], dregs[0], IRVTEMP_0);
|
||||
|
||||
ApplyPrefixD(dregs, V_Single, _VD);
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_Vi2x(MIPSOpcode op) {
|
||||
|
|
|
@ -15,10 +15,15 @@
|
|||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <atomic>
|
||||
#include <climits>
|
||||
#include <thread>
|
||||
#include "Common/Profiler/Profiler.h"
|
||||
#include "Common/StringUtils.h"
|
||||
#include "Common/TimeUtil.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/Debugger/SymbolMap.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
#include "Core/MIPS/IR/IRNativeCommon.h"
|
||||
|
||||
|
@ -28,18 +33,57 @@ namespace MIPSComp {
|
|||
|
||||
// Compile time flag to enable debug stats for not compiled ops.
|
||||
static constexpr bool enableDebugStats = false;
|
||||
// Compile time flag for enabling the simple IR jit profiler.
|
||||
static constexpr bool enableDebugProfiler = false;
|
||||
|
||||
// Used only for debugging when enableDebug is true above.
|
||||
static std::map<uint8_t, int> debugSeenNotCompiledIR;
|
||||
static std::map<const char *, int> debugSeenNotCompiled;
|
||||
static std::map<std::pair<uint32_t, IRProfilerStatus>, int> debugSeenPCUsage;
|
||||
static double lastDebugStatsLog = 0.0;
|
||||
static constexpr double debugStatsFrequency = 5.0;
|
||||
|
||||
static std::thread debugProfilerThread;
|
||||
std::atomic<bool> debugProfilerThreadStatus = false;
|
||||
|
||||
template <int N>
|
||||
class IRProfilerTopValues {
|
||||
public:
|
||||
void Add(const std::pair<uint32_t, IRProfilerStatus> &v, int c) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
if (c > counts[i]) {
|
||||
counts[i] = c;
|
||||
values[i] = v;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int counts[N]{};
|
||||
std::pair<uint32_t, IRProfilerStatus> values[N]{};
|
||||
};
|
||||
|
||||
const char *IRProfilerStatusToString(IRProfilerStatus s) {
|
||||
switch (s) {
|
||||
case IRProfilerStatus::NOT_RUNNING: return "NOT_RUNNING";
|
||||
case IRProfilerStatus::IN_JIT: return "IN_JIT";
|
||||
case IRProfilerStatus::TIMER_ADVANCE: return "TIMER_ADVANCE";
|
||||
case IRProfilerStatus::COMPILING: return "COMPILING";
|
||||
case IRProfilerStatus::MATH_HELPER: return "MATH_HELPER";
|
||||
case IRProfilerStatus::REPLACEMENT: return "REPLACEMENT";
|
||||
case IRProfilerStatus::SYSCALL: return "SYSCALL";
|
||||
case IRProfilerStatus::INTERPRET: return "INTERPRET";
|
||||
case IRProfilerStatus::IR_INTERPRET: return "IR_INTERPRET";
|
||||
}
|
||||
return "INVALID";
|
||||
}
|
||||
|
||||
static void LogDebugStats() {
|
||||
if (!enableDebugStats)
|
||||
if (!enableDebugStats && !enableDebugProfiler)
|
||||
return;
|
||||
|
||||
double now = time_now_d();
|
||||
if (now < lastDebugStatsLog + 1.0)
|
||||
if (now < lastDebugStatsLog + debugStatsFrequency)
|
||||
return;
|
||||
lastDebugStatsLog = now;
|
||||
|
||||
|
@ -63,16 +107,36 @@ static void LogDebugStats() {
|
|||
}
|
||||
debugSeenNotCompiled.clear();
|
||||
|
||||
IRProfilerTopValues<4> slowestPCs;
|
||||
int64_t totalCount = 0;
|
||||
for (auto it : debugSeenPCUsage) {
|
||||
slowestPCs.Add(it.first, it.second);
|
||||
totalCount += it.second;
|
||||
}
|
||||
debugSeenPCUsage.clear();
|
||||
|
||||
if (worstIROp != -1)
|
||||
WARN_LOG(JIT, "Most not compiled IR op: %s (%d)", GetIRMeta((IROp)worstIROp)->name, worstIRVal);
|
||||
if (worstName != nullptr)
|
||||
WARN_LOG(JIT, "Most not compiled op: %s (%d)", worstName, worstVal);
|
||||
if (slowestPCs.counts[0] != 0) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
uint32_t pc = slowestPCs.values[i].first;
|
||||
const char *status = IRProfilerStatusToString(slowestPCs.values[i].second);
|
||||
const std::string label = g_symbolMap ? g_symbolMap->GetDescription(pc) : "";
|
||||
WARN_LOG(JIT, "Slowest sampled PC #%d: %08x (%s)/%s (%f%%)", i, pc, label.c_str(), status, 100.0 * (double)slowestPCs.counts[i] / (double)totalCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool IRNativeBackend::DebugStatsEnabled() const {
|
||||
return enableDebugStats;
|
||||
}
|
||||
|
||||
bool IRNativeBackend::DebugProfilerEnabled() const {
|
||||
return enableDebugProfiler;
|
||||
}
|
||||
|
||||
void IRNativeBackend::NotifyMIPSInterpret(const char *name) {
|
||||
_assert_(enableDebugStats);
|
||||
debugSeenNotCompiled[name]++;
|
||||
|
@ -98,8 +162,32 @@ uint32_t IRNativeBackend::DoIRInst(uint64_t value) {
|
|||
return IRInterpret(currentMIPS, &inst, 1);
|
||||
}
|
||||
|
||||
int IRNativeBackend::ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_t isWrite) {
|
||||
const auto toss = [&](MemoryExceptionType t) {
|
||||
Core_MemoryException(addr, alignment, currentMIPS->pc, t);
|
||||
return coreState != CORE_RUNNING ? 1 : 0;
|
||||
};
|
||||
|
||||
if (!Memory::IsValidRange(addr, alignment)) {
|
||||
MemoryExceptionType t = isWrite == 1 ? MemoryExceptionType::WRITE_WORD : MemoryExceptionType::READ_WORD;
|
||||
if (alignment > 4)
|
||||
t = isWrite ? MemoryExceptionType::WRITE_BLOCK : MemoryExceptionType::READ_BLOCK;
|
||||
return toss(t);
|
||||
} else if (alignment > 1 && (addr & (alignment - 1)) != 0) {
|
||||
return toss(MemoryExceptionType::ALIGNMENT);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
IRNativeBackend::IRNativeBackend(IRBlockCache &blocks) : blocks_(blocks) {}
|
||||
|
||||
IRNativeBackend::~IRNativeBackend() {
|
||||
if (debugProfilerThreadStatus) {
|
||||
debugProfilerThreadStatus = false;
|
||||
debugProfilerThread.join();
|
||||
}
|
||||
}
|
||||
|
||||
void IRNativeBackend::CompileIRInst(IRInst inst) {
|
||||
switch (inst.op) {
|
||||
case IROp::Nop:
|
||||
|
@ -401,6 +489,20 @@ void IRNativeJit::Init(IRNativeBackend &backend) {
|
|||
|
||||
// Wanted this to be a reference, but vtbls get in the way. Shouldn't change.
|
||||
hooks_ = backend.GetNativeHooks();
|
||||
|
||||
if (enableDebugProfiler && hooks_.profilerPC) {
|
||||
debugProfilerThreadStatus = true;
|
||||
debugProfilerThread = std::thread([&] {
|
||||
// Spin, spin spin... maybe could at least hook into sleeps.
|
||||
while (debugProfilerThreadStatus) {
|
||||
IRProfilerStatus stat = *hooks_.profilerStatus;
|
||||
uint32_t pc = *hooks_.profilerPC;
|
||||
if (stat != IRProfilerStatus::NOT_RUNNING && stat != IRProfilerStatus::SYSCALL) {
|
||||
debugSeenPCUsage[std::make_pair(pc, stat)]++;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
bool IRNativeJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload) {
|
||||
|
@ -412,7 +514,7 @@ void IRNativeJit::FinalizeTargetBlock(IRBlock *block, int block_num) {
|
|||
}
|
||||
|
||||
void IRNativeJit::RunLoopUntil(u64 globalticks) {
|
||||
if constexpr (enableDebugStats) {
|
||||
if constexpr (enableDebugStats || enableDebugProfiler) {
|
||||
LogDebugStats();
|
||||
}
|
||||
|
||||
|
@ -443,15 +545,29 @@ bool IRNativeJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
|
|||
return false;
|
||||
|
||||
int block_num = -1;
|
||||
int block_offset = INT_MAX;
|
||||
for (int i = 0; i < blocks_.GetNumBlocks(); ++i) {
|
||||
const auto &b = blocks_.GetBlock(i);
|
||||
// We allocate linearly.
|
||||
if (b->GetTargetOffset() <= offset)
|
||||
int b_start = b->GetTargetOffset();
|
||||
if (b_start > offset)
|
||||
continue;
|
||||
|
||||
int b_end = backend_->GetNativeBlock(i)->checkedOffset;
|
||||
int b_offset = offset - b_start;
|
||||
if (b_end > b_start && b_end >= offset) {
|
||||
// For sure within the block.
|
||||
block_num = i;
|
||||
if (b->GetTargetOffset() > offset)
|
||||
block_offset = b_offset;
|
||||
break;
|
||||
}
|
||||
|
||||
if (b_offset < block_offset) {
|
||||
// Possibly within the block, unless in some other block...
|
||||
block_num = i;
|
||||
block_offset = b_offset;
|
||||
}
|
||||
}
|
||||
|
||||
// Used by profiling tools that don't like spaces.
|
||||
if (block_num == -1) {
|
||||
name = "unknownOrDeletedBlock";
|
||||
|
@ -466,9 +582,9 @@ bool IRNativeJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
|
|||
// It helps to know which func this block is inside.
|
||||
const std::string label = g_symbolMap ? g_symbolMap->GetDescription(start) : "";
|
||||
if (!label.empty())
|
||||
name = StringFromFormat("block%d_%08x_%s", block_num, start, label.c_str());
|
||||
name = StringFromFormat("block%d_%08x_%s_0x%x", block_num, start, label.c_str(), block_offset);
|
||||
else
|
||||
name = StringFromFormat("block%d_%08x", block_num, start);
|
||||
name = StringFromFormat("block%d_%08x_0x%x", block_num, start, block_offset);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
|
@ -25,12 +25,27 @@ namespace MIPSComp {
|
|||
|
||||
typedef void (*IRNativeFuncNoArg)();
|
||||
|
||||
enum class IRProfilerStatus : int32_t {
|
||||
NOT_RUNNING,
|
||||
IN_JIT,
|
||||
TIMER_ADVANCE,
|
||||
COMPILING,
|
||||
MATH_HELPER,
|
||||
REPLACEMENT,
|
||||
SYSCALL,
|
||||
INTERPRET,
|
||||
IR_INTERPRET,
|
||||
};
|
||||
|
||||
struct IRNativeHooks {
|
||||
IRNativeFuncNoArg enterDispatcher = nullptr;
|
||||
|
||||
const uint8_t *dispatcher = nullptr;
|
||||
const uint8_t *dispatchFetch = nullptr;
|
||||
const uint8_t *crashHandler = nullptr;
|
||||
|
||||
uint32_t *profilerPC = nullptr;
|
||||
IRProfilerStatus *profilerStatus = nullptr;
|
||||
};
|
||||
|
||||
struct IRNativeBlockExit {
|
||||
|
@ -47,7 +62,7 @@ struct IRNativeBlock {
|
|||
class IRNativeBackend {
|
||||
public:
|
||||
IRNativeBackend(IRBlockCache &blocks);
|
||||
virtual ~IRNativeBackend() {}
|
||||
virtual ~IRNativeBackend();
|
||||
|
||||
void CompileIRInst(IRInst inst);
|
||||
|
||||
|
@ -120,6 +135,7 @@ protected:
|
|||
|
||||
// Returns true when debugging statistics should be compiled in.
|
||||
bool DebugStatsEnabled() const;
|
||||
bool DebugProfilerEnabled() const;
|
||||
|
||||
// Callback (compile when DebugStatsEnabled()) to log a base interpreter hit.
|
||||
// Call the func returned by MIPSGetInterpretFunc(op) directly for interpret.
|
||||
|
@ -131,6 +147,8 @@ protected:
|
|||
// Callback to log AND perform an IR interpreter inst. Returns 0 or a PC to jump to.
|
||||
static uint32_t DoIRInst(uint64_t inst);
|
||||
|
||||
static int ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_t isWrite);
|
||||
|
||||
void AddLinkableExit(int block_num, uint32_t pc, int exitStartOffset, int exitLen);
|
||||
void EraseAllLinks(int block_num);
|
||||
|
||||
|
|
|
@ -1794,7 +1794,8 @@ bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &o
|
|||
bool spModified = false;
|
||||
for (IRInst inst : in.GetInstructions()) {
|
||||
IRMemoryOpInfo info = IROpMemoryAccessSize(inst.op);
|
||||
if (info.size != 0 && inst.src1 == MIPS_REG_SP) {
|
||||
// Note: we only combine word aligned accesses.
|
||||
if (info.size != 0 && inst.src1 == MIPS_REG_SP && info.size == 4) {
|
||||
if (spModified) {
|
||||
// No good, it was modified and then we did more accesses. Can't combine.
|
||||
spUpper = -1;
|
||||
|
@ -1805,11 +1806,6 @@ bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &o
|
|||
spUpper = -1;
|
||||
break;
|
||||
}
|
||||
if (info.size == 16 && (inst.constant & 0xF) != 0) {
|
||||
// Shouldn't happen, sp should always be aligned.
|
||||
spUpper = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
spLower = std::min(spLower, (int)inst.constant);
|
||||
spUpper = std::max(spUpper, (int)inst.constant + info.size);
|
||||
|
@ -1828,7 +1824,7 @@ bool ApplyMemoryValidation(const IRWriter &in, IRWriter &out, const IROptions &o
|
|||
|
||||
std::map<uint64_t, uint8_t> checks;
|
||||
const auto addValidate = [&](IROp validate, uint8_t sz, const IRInst &inst, bool isStore) {
|
||||
if (inst.src1 == MIPS_REG_SP && skipSP) {
|
||||
if (inst.src1 == MIPS_REG_SP && skipSP && validate == IROp::ValidateAddress32) {
|
||||
if (!flushedSP) {
|
||||
out.Write(IROp::ValidateAddress32, 0, MIPS_REG_SP, spWrite ? 1U : 0U, spLower);
|
||||
if (spUpper > spLower + 4)
|
||||
|
|
|
@ -160,7 +160,7 @@ bool IRNativeRegCacheBase::IsFPRMapped(IRReg fpr) {
|
|||
}
|
||||
|
||||
int IRNativeRegCacheBase::GetFPRLaneCount(IRReg fpr) {
|
||||
if (!IsFPRMapped(fpr) || mr[fpr + 32].lane > 0)
|
||||
if (!IsFPRMapped(fpr))
|
||||
return 0;
|
||||
if (mr[fpr + 32].lane == -1)
|
||||
return 1;
|
||||
|
@ -406,12 +406,12 @@ IRNativeReg IRNativeRegCacheBase::FindFreeReg(MIPSLoc type, MIPSMap flags) const
|
|||
|
||||
bool IRNativeRegCacheBase::IsGPRClobbered(IRReg gpr) const {
|
||||
_dbg_assert_(IsValidGPR(gpr));
|
||||
return IsRegClobbered(MIPSLoc::REG, MIPSMap::INIT, gpr);
|
||||
return IsRegClobbered(MIPSLoc::REG, gpr);
|
||||
}
|
||||
|
||||
bool IRNativeRegCacheBase::IsFPRClobbered(IRReg fpr) const {
|
||||
_dbg_assert_(IsValidFPR(fpr));
|
||||
return IsRegClobbered(MIPSLoc::FREG, MIPSMap::INIT, fpr + 32);
|
||||
return IsRegClobbered(MIPSLoc::FREG, fpr + 32);
|
||||
}
|
||||
|
||||
IRUsage IRNativeRegCacheBase::GetNextRegUsage(const IRSituation &info, MIPSLoc type, IRReg r) const {
|
||||
|
@ -423,7 +423,7 @@ IRUsage IRNativeRegCacheBase::GetNextRegUsage(const IRSituation &info, MIPSLoc t
|
|||
return IRUsage::UNKNOWN;
|
||||
}
|
||||
|
||||
bool IRNativeRegCacheBase::IsRegClobbered(MIPSLoc type, MIPSMap flags, IRReg r) const {
|
||||
bool IRNativeRegCacheBase::IsRegClobbered(MIPSLoc type, IRReg r) const {
|
||||
static const int UNUSED_LOOKAHEAD_OPS = 30;
|
||||
|
||||
IRSituation info;
|
||||
|
@ -450,6 +450,21 @@ bool IRNativeRegCacheBase::IsRegClobbered(MIPSLoc type, MIPSMap flags, IRReg r)
|
|||
return false;
|
||||
}
|
||||
|
||||
bool IRNativeRegCacheBase::IsRegRead(MIPSLoc type, IRReg first) const {
|
||||
static const int UNUSED_LOOKAHEAD_OPS = 30;
|
||||
|
||||
IRSituation info;
|
||||
info.lookaheadCount = UNUSED_LOOKAHEAD_OPS;
|
||||
// We look starting one ahead, unlike spilling.
|
||||
info.currentIndex = irIndex_ + 1;
|
||||
info.instructions = irBlock_->GetInstructions();
|
||||
info.numInstructions = irBlock_->GetNumInstructions();
|
||||
|
||||
// Note: this intentionally doesn't look at the full reg, only the lane.
|
||||
IRUsage usage = GetNextRegUsage(info, type, first);
|
||||
return usage == IRUsage::READ;
|
||||
}
|
||||
|
||||
IRNativeReg IRNativeRegCacheBase::FindBestToSpill(MIPSLoc type, MIPSMap flags, bool unusedOnly, bool *clobbered) const {
|
||||
int allocCount = 0, base = 0;
|
||||
const int *allocOrder = GetAllocationOrder(type, flags, allocCount, base);
|
||||
|
@ -501,7 +516,7 @@ IRNativeReg IRNativeRegCacheBase::FindBestToSpill(MIPSLoc type, MIPSMap flags, b
|
|||
return -1;
|
||||
}
|
||||
|
||||
bool IRNativeRegCacheBase::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) {
|
||||
bool IRNativeRegCacheBase::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) {
|
||||
int allocCount = 0, base = 0;
|
||||
const int *allocOrder = GetAllocationOrder(type, flags, allocCount, base);
|
||||
|
||||
|
@ -514,6 +529,11 @@ bool IRNativeRegCacheBase::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type,
|
|||
return false;
|
||||
}
|
||||
|
||||
bool IRNativeRegCacheBase::TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) {
|
||||
// To be overridden if the backend supports transfers.
|
||||
return false;
|
||||
}
|
||||
|
||||
void IRNativeRegCacheBase::DiscardNativeReg(IRNativeReg nreg) {
|
||||
_assert_msg_(nreg >= 0 && nreg < config_.totalNativeRegs, "DiscardNativeReg on invalid register %d", nreg);
|
||||
if (nr[nreg].mipsReg != IRREG_INVALID) {
|
||||
|
@ -930,22 +950,29 @@ IRNativeReg IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRReg first, int la
|
|||
case MIPSLoc::REG:
|
||||
if (type != MIPSLoc::REG) {
|
||||
nreg = AllocateReg(type, flags);
|
||||
} else if (!IsNativeRegCompatible(nreg, type, flags)) {
|
||||
} else if (!IsNativeRegCompatible(nreg, type, flags, lanes)) {
|
||||
// If it's not compatible, we'll need to reallocate.
|
||||
// TODO: Could do a transfer and avoid memory flush.
|
||||
if (TransferNativeReg(nreg, -1, type, first, lanes, flags)) {
|
||||
nreg = mr[first].nReg;
|
||||
} else {
|
||||
FlushNativeReg(nreg);
|
||||
nreg = AllocateReg(type, flags);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case MIPSLoc::FREG:
|
||||
case MIPSLoc::VREG:
|
||||
if (type != mr[first].loc) {
|
||||
nreg = AllocateReg(type, flags);
|
||||
} else if (!IsNativeRegCompatible(nreg, type, flags)) {
|
||||
} else if (!IsNativeRegCompatible(nreg, type, flags, lanes)) {
|
||||
if (TransferNativeReg(nreg, -1, type, first, lanes, flags)) {
|
||||
nreg = mr[first].nReg;
|
||||
} else {
|
||||
FlushNativeReg(nreg);
|
||||
nreg = AllocateReg(type, flags);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case MIPSLoc::IMM:
|
||||
|
@ -981,10 +1008,13 @@ void IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg fi
|
|||
_assert_msg_(!mreg.isStatic, "Cannot MapNativeReg a static reg mismatch");
|
||||
if ((flags & MIPSMap::NOINIT) != MIPSMap::NOINIT) {
|
||||
// If we need init, we have to flush mismatches.
|
||||
// TODO: Do a shuffle if interior only?
|
||||
if (!TransferNativeReg(mreg.nReg, nreg, type, first, lanes, flags)) {
|
||||
// TODO: We may also be motivated to have multiple read-only "views" or an IRReg.
|
||||
// For example Vec4Scale v0..v3, v0..v3, v3
|
||||
FlushNativeReg(mreg.nReg);
|
||||
}
|
||||
// The mismatch has been "resolved" now.
|
||||
mismatch = false;
|
||||
} else if (oldlanes != 1) {
|
||||
// Even if we don't care about the current contents, we can't discard outside.
|
||||
bool extendsBefore = oldlane > i;
|
||||
|
@ -1017,6 +1047,9 @@ void IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg fi
|
|||
DiscardNativeReg(mreg.nReg);
|
||||
else
|
||||
FlushNativeReg(mreg.nReg);
|
||||
|
||||
// That took care of the mismatch, either by clobber or flush.
|
||||
mismatch = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1027,7 +1060,7 @@ void IRNativeRegCacheBase::MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg fi
|
|||
if ((flags & MIPSMap::NOINIT) != MIPSMap::NOINIT) {
|
||||
// We better not be trying to map to a different nreg if it's in one now.
|
||||
// This might happen on some sort of transfer...
|
||||
// TODO: Make a direct transfer, i.e. FREG -> VREG?
|
||||
if (!TransferNativeReg(mreg.nReg, nreg, type, first, lanes, flags))
|
||||
FlushNativeReg(mreg.nReg);
|
||||
} else {
|
||||
DiscardNativeReg(mreg.nReg);
|
||||
|
|
|
@ -209,13 +209,14 @@ protected:
|
|||
IRNativeReg AllocateReg(MIPSLoc type, MIPSMap flags);
|
||||
IRNativeReg FindFreeReg(MIPSLoc type, MIPSMap flags) const;
|
||||
IRNativeReg FindBestToSpill(MIPSLoc type, MIPSMap flags, bool unusedOnly, bool *clobbered) const;
|
||||
virtual bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags);
|
||||
virtual bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes);
|
||||
virtual void DiscardNativeReg(IRNativeReg nreg);
|
||||
virtual void FlushNativeReg(IRNativeReg nreg);
|
||||
virtual void DiscardReg(IRReg mreg);
|
||||
virtual void FlushReg(IRReg mreg);
|
||||
virtual void AdjustNativeRegAsPtr(IRNativeReg nreg, bool state);
|
||||
virtual void MapNativeReg(MIPSLoc type, IRNativeReg nreg, IRReg first, int lanes, MIPSMap flags);
|
||||
virtual bool TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags);
|
||||
virtual IRNativeReg MapNativeReg(MIPSLoc type, IRReg first, int lanes, MIPSMap flags);
|
||||
IRNativeReg MapNativeRegAsPointer(IRReg gpr);
|
||||
|
||||
|
@ -238,7 +239,8 @@ protected:
|
|||
void SetSpillLockIRIndex(IRReg reg, int index);
|
||||
int GetMipsRegOffset(IRReg r);
|
||||
|
||||
bool IsRegClobbered(MIPSLoc type, MIPSMap flags, IRReg r) const;
|
||||
bool IsRegClobbered(MIPSLoc type, IRReg r) const;
|
||||
bool IsRegRead(MIPSLoc type, IRReg r) const;
|
||||
IRUsage GetNextRegUsage(const IRSituation &info, MIPSLoc type, IRReg r) const;
|
||||
|
||||
bool IsValidGPR(IRReg r) const;
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "Core/MemMap.h"
|
||||
#include "Core/CoreTiming.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "Core/Config.h"
|
||||
|
||||
#include "Core/MIPS/MIPS.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
|
@ -246,8 +247,7 @@ static void ExpandRange(std::pair<u32, u32> &range, u32 newStart, u32 newEnd) {
|
|||
|
||||
void JitBlockCache::FinalizeBlock(int block_num, bool block_link) {
|
||||
JitBlock &b = blocks_[block_num];
|
||||
|
||||
_assert_msg_(Memory::IsValidAddress(b.originalAddress), "FinalizeBlock: Bad originalAddress %08x in block %d", b.originalAddress, block_num);
|
||||
_assert_msg_(Memory::IsValidAddress(b.originalAddress), "FinalizeBlock: Bad originalAddress %08x in block %d (b.num: %d) proxy: %s sz: %d", b.originalAddress, block_num, b.blockNum, b.proxyFor ? "y" : "n", b.codeSize);
|
||||
|
||||
b.originalFirstOpcode = Memory::Read_Opcode_JIT(b.originalAddress);
|
||||
MIPSOpcode opcode = GetEmuHackOpForBlock(block_num);
|
||||
|
@ -462,6 +462,11 @@ void JitBlockCache::UnlinkBlock(int i) {
|
|||
if (ppp.first == ppp.second)
|
||||
return;
|
||||
for (auto iter = ppp.first; iter != ppp.second; ++iter) {
|
||||
if ((size_t)iter->second >= num_blocks_) {
|
||||
// Something probably went very wrong. Try to stumble along nevertheless.
|
||||
ERROR_LOG(JIT, "UnlinkBlock: Invalid block number %d", iter->second);
|
||||
continue;
|
||||
}
|
||||
JitBlock &sourceBlock = blocks_[iter->second];
|
||||
for (int e = 0; e < MAX_JIT_BLOCK_EXITS; e++) {
|
||||
if (sourceBlock.exitAddress[e] == b.originalAddress)
|
||||
|
|
|
@ -29,7 +29,7 @@
|
|||
#include "Core/MIPS/MIPS.h"
|
||||
|
||||
#if PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64)
|
||||
const int MAX_JIT_BLOCK_EXITS = 2;
|
||||
const int MAX_JIT_BLOCK_EXITS = 4;
|
||||
#else
|
||||
const int MAX_JIT_BLOCK_EXITS = 8;
|
||||
#endif
|
||||
|
|
|
@ -1446,7 +1446,7 @@ namespace MIPSInt
|
|||
d[0] += s[2] * t[2] + s[3] * t[3];
|
||||
}
|
||||
|
||||
ApplyPrefixD(d, sz);
|
||||
ApplyPrefixD(d, V_Single);
|
||||
WriteVector(d, V_Single, vd);
|
||||
PC += 4;
|
||||
EatPrefixes();
|
||||
|
|
|
@ -45,8 +45,19 @@ static void ShowPC(u32 downcount, void *membase, void *jitbase) {
|
|||
}
|
||||
|
||||
void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
BeginWrite(GetMemoryProtectPageSize());
|
||||
// This will be used as a writable scratch area, always 32-bit accessible.
|
||||
const u8 *start = AlignCodePage();
|
||||
if (DebugProfilerEnabled()) {
|
||||
ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
|
||||
*hooks_.profilerPC = 0;
|
||||
hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr() + 1;
|
||||
*hooks_.profilerStatus = IRProfilerStatus::NOT_RUNNING;
|
||||
SetCodePointer(GetCodePtr() + sizeof(uint32_t) * 2, GetWritableCodePtr() + sizeof(uint32_t) * 2);
|
||||
}
|
||||
|
||||
const u8 *disasmStart = AlignCodePage();
|
||||
BeginWrite(GetMemoryProtectPageSize());
|
||||
|
||||
if (jo.useStaticAlloc) {
|
||||
saveStaticRegisters_ = AlignCode16();
|
||||
|
@ -58,8 +69,6 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
regs_.EmitLoadStaticRegisters();
|
||||
LW(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
RET();
|
||||
|
||||
start = saveStaticRegisters_;
|
||||
} else {
|
||||
saveStaticRegisters_ = nullptr;
|
||||
loadStaticRegisters_ = nullptr;
|
||||
|
@ -124,14 +133,18 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
LI(JITBASEREG, GetBasePtr() - MIPS_EMUHACK_OPCODE, SCRATCH1);
|
||||
|
||||
LoadStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
MovFromPC(SCRATCH1);
|
||||
WriteDebugPC(SCRATCH1);
|
||||
outerLoopPCInSCRATCH1_ = GetCodePtr();
|
||||
MovToPC(SCRATCH1);
|
||||
outerLoop_ = GetCodePtr();
|
||||
// Advance can change the downcount (or thread), so must save/restore around it.
|
||||
SaveStaticRegisters();
|
||||
RestoreRoundingMode(true);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
|
||||
QuickCallFunction(&CoreTiming::Advance, X7);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
ApplyRoundingMode(true);
|
||||
LoadStaticRegisters();
|
||||
|
||||
|
@ -162,6 +175,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
}
|
||||
|
||||
LWU(SCRATCH1, CTXREG, offsetof(MIPSState, pc));
|
||||
WriteDebugPC(SCRATCH1);
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
LI(SCRATCH2, 0x3FFFFFFF);
|
||||
AND(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
|
@ -180,7 +194,9 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
|
||||
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
|
||||
RestoreRoundingMode(true);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
|
||||
QuickCallFunction(&MIPSComp::JitAt, X7);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
ApplyRoundingMode(true);
|
||||
|
||||
// Try again, the block index should be set now.
|
||||
|
@ -195,6 +211,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
const uint8_t *quitLoop = GetCodePtr();
|
||||
SetJumpTarget(badCoreState);
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
|
||||
SaveStaticRegisters();
|
||||
RestoreRoundingMode(true);
|
||||
|
||||
|
|
|
@ -520,20 +520,32 @@ void RiscVJitBackend::CompIR_FCompare(IRInst inst) {
|
|||
|
||||
case IROp::FCmpVfpuAggregate:
|
||||
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
|
||||
if (inst.dest == 1) {
|
||||
ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest);
|
||||
// This is the "any bit", easy.
|
||||
SNEZ(SCRATCH2, SCRATCH1);
|
||||
// Negate so 1 becomes all bits set and zero stays zero, then mask to 0x30.
|
||||
NEG(SCRATCH1, SCRATCH1);
|
||||
ANDI(SCRATCH1, SCRATCH1, 0x30);
|
||||
|
||||
// Reject the old any/all bits and replace them with our own.
|
||||
ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30);
|
||||
OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
|
||||
} else {
|
||||
ANDI(SCRATCH1, regs_.R(IRREG_VFPU_CC), inst.dest);
|
||||
FixupBranch skipZero = BEQ(SCRATCH1, R_ZERO);
|
||||
|
||||
// To compare to inst.dest for "all", let's simply subtract it and compare to zero.
|
||||
ADDI(SCRATCH1, SCRATCH1, -inst.dest);
|
||||
SEQZ(SCRATCH1, SCRATCH1);
|
||||
// Now we combine those together.
|
||||
// Now we combine with the "any" bit.
|
||||
SLLI(SCRATCH1, SCRATCH1, 5);
|
||||
SLLI(SCRATCH2, SCRATCH2, 4);
|
||||
OR(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
ORI(SCRATCH1, SCRATCH1, 0x10);
|
||||
|
||||
// Reject those any/all bits and replace them with our own.
|
||||
SetJumpTarget(skipZero);
|
||||
|
||||
// Reject the old any/all bits and replace them with our own.
|
||||
ANDI(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), ~0x30);
|
||||
OR(regs_.R(IRREG_VFPU_CC), regs_.R(IRREG_VFPU_CC), SCRATCH1);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -573,6 +585,8 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
|
|||
|
||||
auto callFuncF_F = [&](float (*func)(float)) {
|
||||
regs_.FlushBeforeCall();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
|
||||
|
||||
// It might be in a non-volatile register.
|
||||
// TODO: May have to handle a transfer if SIMD here.
|
||||
if (regs_.IsFPRMapped(inst.src1)) {
|
||||
|
@ -588,6 +602,8 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
|
|||
if (regs_.F(inst.dest) != F10) {
|
||||
FMV(32, regs_.F(inst.dest), F10);
|
||||
}
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
};
|
||||
|
||||
RiscVReg tempReg = INVALID_REG;
|
||||
|
|
|
@ -59,8 +59,19 @@ int32_t RiscVJitBackend::AdjustForAddressOffset(RiscVGen::RiscVReg *reg, int32_t
|
|||
if (constant > 0)
|
||||
constant &= Memory::MEMVIEW32_MASK;
|
||||
#endif
|
||||
// It can't be this negative, must be a constant with top bit set.
|
||||
if ((constant & 0xC0000000) == 0x80000000) {
|
||||
if (cpu_info.RiscV_Zba) {
|
||||
LI(SCRATCH2, constant);
|
||||
ADD_UW(SCRATCH1, SCRATCH2, *reg);
|
||||
} else {
|
||||
LI(SCRATCH2, (uint32_t)constant);
|
||||
ADD(SCRATCH1, *reg, SCRATCH2);
|
||||
}
|
||||
} else {
|
||||
LI(SCRATCH2, constant);
|
||||
ADD(SCRATCH1, *reg, SCRATCH2);
|
||||
}
|
||||
*reg = SCRATCH1;
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -188,6 +188,7 @@ void RiscVJitBackend::CompIR_System(IRInst inst) {
|
|||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
|
||||
#ifdef USE_PROFILER
|
||||
// When profiling, we can't skip CallSyscall, since it times syscalls.
|
||||
LI(X10, (int32_t)inst.constant);
|
||||
|
@ -207,6 +208,7 @@ void RiscVJitBackend::CompIR_System(IRInst inst) {
|
|||
}
|
||||
#endif
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
// This is always followed by an ExitToPC, where we check coreState.
|
||||
break;
|
||||
|
@ -214,7 +216,9 @@ void RiscVJitBackend::CompIR_System(IRInst inst) {
|
|||
case IROp::CallReplacement:
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
|
||||
QuickCallFunction(GetReplacementFunc(inst.constant)->replaceFunc, SCRATCH2);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
SUB(DOWNCOUNTREG, DOWNCOUNTREG, X10);
|
||||
break;
|
||||
|
|
|
@ -67,6 +67,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
|
|||
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
|
||||
wroteCheckedOffset = true;
|
||||
|
||||
WriteDebugPC(startPC);
|
||||
|
||||
FixupBranch normalEntry = BGE(DOWNCOUNTREG, R_ZERO);
|
||||
LI(SCRATCH1, startPC);
|
||||
QuickJ(R_RA, outerLoopPCInSCRATCH1_);
|
||||
|
@ -118,6 +120,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
|
|||
}
|
||||
|
||||
if (jo.enableBlocklink && jo.useBackJump) {
|
||||
WriteDebugPC(startPC);
|
||||
|
||||
// Most blocks shouldn't be >= 4KB, so usually we can just BGE.
|
||||
if (BInRange(blockStart)) {
|
||||
BGE(DOWNCOUNTREG, R_ZERO, blockStart);
|
||||
|
@ -218,7 +222,9 @@ void RiscVJitBackend::CompIR_Generic(IRInst inst) {
|
|||
FlushAll();
|
||||
LI(X10, value, SCRATCH2);
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
|
||||
QuickCallFunction(&DoIRInst, SCRATCH2);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
|
||||
// We only need to check the return value if it's a potential exit.
|
||||
|
@ -241,12 +247,14 @@ void RiscVJitBackend::CompIR_Interpret(IRInst inst) {
|
|||
// IR protects us against this being a branching instruction (well, hopefully.)
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
|
||||
if (DebugStatsEnabled()) {
|
||||
LI(X10, MIPSGetName(op));
|
||||
QuickCallFunction(&NotifyMIPSInterpret, SCRATCH2);
|
||||
}
|
||||
LI(X10, (int32_t)inst.constant);
|
||||
QuickCallFunction((const u8 *)MIPSGetInterpretFunc(op), SCRATCH2);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
}
|
||||
|
||||
|
@ -329,6 +337,32 @@ void RiscVJitBackend::MovToPC(RiscVReg r) {
|
|||
SW(r, CTXREG, offsetof(MIPSState, pc));
|
||||
}
|
||||
|
||||
void RiscVJitBackend::WriteDebugPC(uint32_t pc) {
|
||||
if (hooks_.profilerPC) {
|
||||
int offset = (const u8 *)hooks_.profilerPC - GetBasePtr();
|
||||
LI(SCRATCH2, hooks_.profilerPC);
|
||||
LI(R_RA, (int32_t)pc);
|
||||
SW(R_RA, SCRATCH2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVJitBackend::WriteDebugPC(RiscVReg r) {
|
||||
if (hooks_.profilerPC) {
|
||||
int offset = (const u8 *)hooks_.profilerPC - GetBasePtr();
|
||||
LI(SCRATCH2, hooks_.profilerPC);
|
||||
SW(r, SCRATCH2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVJitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
|
||||
if (hooks_.profilerPC) {
|
||||
int offset = (const u8 *)hooks_.profilerStatus - GetBasePtr();
|
||||
LI(SCRATCH2, hooks_.profilerStatus);
|
||||
LI(R_RA, (int)status);
|
||||
SW(R_RA, SCRATCH2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVJitBackend::SaveStaticRegisters() {
|
||||
if (jo.useStaticAlloc) {
|
||||
QuickCallFunction(saveStaticRegisters_);
|
||||
|
|
|
@ -50,6 +50,9 @@ private:
|
|||
void ApplyRoundingMode(bool force = false);
|
||||
void MovFromPC(RiscVGen::RiscVReg r);
|
||||
void MovToPC(RiscVGen::RiscVReg r);
|
||||
void WriteDebugPC(uint32_t pc);
|
||||
void WriteDebugPC(RiscVGen::RiscVReg r);
|
||||
void WriteDebugProfilerStatus(IRProfilerStatus status);
|
||||
|
||||
void SaveStaticRegisters();
|
||||
void LoadStaticRegisters();
|
||||
|
|
|
@ -303,11 +303,11 @@ void RiscVRegCache::AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) {
|
|||
}
|
||||
}
|
||||
|
||||
bool RiscVRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) {
|
||||
bool RiscVRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) {
|
||||
// No special flags except VREG, skip the check for a little speed.
|
||||
if (type != MIPSLoc::VREG)
|
||||
return true;
|
||||
return IRNativeRegCacheBase::IsNativeRegCompatible(nreg, type, flags);
|
||||
return IRNativeRegCacheBase::IsNativeRegCompatible(nreg, type, flags, lanes);
|
||||
}
|
||||
|
||||
void RiscVRegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
|
||||
|
|
|
@ -76,7 +76,7 @@ protected:
|
|||
const int *GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &count, int &base) const override;
|
||||
void AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) override;
|
||||
|
||||
bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags) override;
|
||||
bool IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) override;
|
||||
void LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) override;
|
||||
void StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) override;
|
||||
void SetNativeRegValue(IRNativeReg nreg, uint32_t imm) override;
|
||||
|
|
|
@ -605,7 +605,7 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) {
|
|||
|
||||
const ReplacementTableEntry *entry = GetReplacementFunc(index);
|
||||
if (!entry) {
|
||||
ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding);
|
||||
ERROR_LOG_REPORT_ONCE(replFunc, HLE, "Invalid replacement op %08x at %08x", op.encoding, js.compilerPC);
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -708,7 +708,7 @@ static void HitInvalidBranch(uint32_t dest) {
|
|||
}
|
||||
|
||||
void Jit::WriteExit(u32 destination, int exit_num) {
|
||||
_dbg_assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num");
|
||||
_assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num. dest=%08x", destination);
|
||||
|
||||
if (!Memory::IsValidAddress(destination) || (destination & 3) != 0) {
|
||||
ERROR_LOG_REPORT(JIT, "Trying to write block exit to illegal destination %08x: pc = %08x", destination, currentMIPS->pc);
|
||||
|
|
|
@ -49,8 +49,21 @@ static void ShowPC(void *membase, void *jitbase) {
|
|||
}
|
||||
|
||||
void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
BeginWrite(GetMemoryProtectPageSize());
|
||||
// This will be used as a writable scratch area, always 32-bit accessible.
|
||||
const u8 *start = AlignCodePage();
|
||||
if (DebugProfilerEnabled()) {
|
||||
ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
|
||||
Write32(0);
|
||||
hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr();
|
||||
Write32(0);
|
||||
}
|
||||
|
||||
EmitFPUConstants();
|
||||
EmitVecConstants();
|
||||
|
||||
const u8 *disasmStart = AlignCodePage();
|
||||
BeginWrite(GetMemoryProtectPageSize());
|
||||
|
||||
jo.downcountInRegister = false;
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
|
@ -58,7 +71,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
int jitbaseCtxDisp = 0;
|
||||
// We pre-bake the MIPS_EMUHACK_OPCODE subtraction into our jitbase value.
|
||||
intptr_t jitbase = (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE;
|
||||
if ((jitbase < -0x80000000LL || jitbase > 0x7FFFFFFFLL) && !Accessible((const u8 *)&mipsState->f[0], GetBasePtr())) {
|
||||
if ((jitbase < -0x80000000LL || jitbase > 0x7FFFFFFFLL) && !Accessible((const u8 *)&mipsState->f[0], (const u8 *)jitbase)) {
|
||||
jo.reserveR15ForAsm = true;
|
||||
jitbaseInR15 = true;
|
||||
} else {
|
||||
|
@ -83,8 +96,6 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
if (jo.downcountInRegister)
|
||||
MOV(32, R(DOWNCOUNTREG), MDisp(CTXREG, downcountOffset));
|
||||
RET();
|
||||
|
||||
start = saveStaticRegisters_;
|
||||
} else {
|
||||
saveStaticRegisters_ = nullptr;
|
||||
loadStaticRegisters_ = nullptr;
|
||||
|
@ -146,14 +157,18 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
MOV(PTRBITS, R(CTXREG), ImmPtr(&mipsState->f[0]));
|
||||
|
||||
LoadStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
MovFromPC(SCRATCH1);
|
||||
WriteDebugPC(SCRATCH1);
|
||||
outerLoopPCInSCRATCH1_ = GetCodePtr();
|
||||
MovToPC(SCRATCH1);
|
||||
outerLoop_ = GetCodePtr();
|
||||
// Advance can change the downcount (or thread), so must save/restore around it.
|
||||
SaveStaticRegisters();
|
||||
RestoreRoundingMode(true);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
ApplyRoundingMode(true);
|
||||
LoadStaticRegisters();
|
||||
|
||||
|
@ -209,6 +224,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
}
|
||||
|
||||
MovFromPC(SCRATCH1);
|
||||
WriteDebugPC(SCRATCH1);
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
AND(32, R(SCRATCH1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
#endif
|
||||
|
@ -247,7 +263,9 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
|
||||
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
|
||||
RestoreRoundingMode(true);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
|
||||
ABI_CallFunction(&MIPSComp::JitAt);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
ApplyRoundingMode(true);
|
||||
// Let's just dispatch again, we'll enter the block since we know it's there.
|
||||
JMP(dispatcherNoCheck_, true);
|
||||
|
@ -265,6 +283,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
const uint8_t *quitLoop = GetCodePtr();
|
||||
SetJumpTarget(badCoreState);
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
|
||||
SaveStaticRegisters();
|
||||
RestoreRoundingMode(true);
|
||||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||
|
@ -283,16 +302,13 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
|||
// Leave this at the end, add more stuff above.
|
||||
if (enableDisasm) {
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
std::vector<std::string> lines = DisassembleX86(start, (int)(GetCodePtr() - start));
|
||||
std::vector<std::string> lines = DisassembleX86(disasmStart, (int)(GetCodePtr() - disasmStart));
|
||||
for (auto s : lines) {
|
||||
INFO_LOG(JIT, "%s", s.c_str());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
EmitFPUConstants();
|
||||
EmitVecConstants();
|
||||
|
||||
// Let's spare the pre-generated code from unprotect-reprotect.
|
||||
AlignCodePage();
|
||||
jitStartOffset_ = (int)(GetCodePtr() - start);
|
||||
|
|
|
@ -151,8 +151,52 @@ void X64JitBackend::CompIR_Bits(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::ReverseBits:
|
||||
regs_.Map(inst);
|
||||
if (inst.src1 != inst.dest) {
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
|
||||
}
|
||||
|
||||
// Swap even/odd bits (in bits: 0123 -> 1032.)
|
||||
LEA(32, SCRATCH1, MScaled(regs_.RX(inst.dest), 2, 0));
|
||||
SHR(32, regs_.R(inst.dest), Imm8(1));
|
||||
XOR(32, regs_.R(inst.dest), R(SCRATCH1));
|
||||
AND(32, regs_.R(inst.dest), Imm32(0x55555555));
|
||||
XOR(32, regs_.R(inst.dest), R(SCRATCH1));
|
||||
|
||||
// Swap pairs of bits (in bits: 10325476 -> 32107654.)
|
||||
LEA(32, SCRATCH1, MScaled(regs_.RX(inst.dest), 4, 0));
|
||||
SHR(32, regs_.R(inst.dest), Imm8(2));
|
||||
XOR(32, regs_.R(inst.dest), R(SCRATCH1));
|
||||
AND(32, regs_.R(inst.dest), Imm32(0x33333333));
|
||||
XOR(32, regs_.R(inst.dest), R(SCRATCH1));
|
||||
|
||||
// Swap nibbles (in nibbles: ABCD -> BADC.)
|
||||
MOV(32, R(SCRATCH1), regs_.R(inst.dest));
|
||||
SHL(32, R(SCRATCH1), Imm8(4));
|
||||
SHR(32, regs_.R(inst.dest), Imm8(4));
|
||||
XOR(32, regs_.R(inst.dest), R(SCRATCH1));
|
||||
AND(32, regs_.R(inst.dest), Imm32(0x0F0F0F0F));
|
||||
XOR(32, regs_.R(inst.dest), R(SCRATCH1));
|
||||
|
||||
// Finally, swap the bytes to drop everything into place (nibbles: BADCFEHG -> HGFEDCBA.)
|
||||
BSWAP(32, regs_.RX(inst.dest));
|
||||
break;
|
||||
|
||||
case IROp::BSwap16:
|
||||
CompIR_Generic(inst);
|
||||
regs_.Map(inst);
|
||||
if (cpu_info.bBMI2) {
|
||||
// Rotate to put it into the correct register, then swap.
|
||||
if (inst.dest != inst.src1)
|
||||
RORX(32, regs_.RX(inst.dest), regs_.R(inst.src1), 16);
|
||||
else
|
||||
ROR(32, regs_.R(inst.dest), Imm8(16));
|
||||
BSWAP(32, regs_.RX(inst.dest));
|
||||
} else {
|
||||
if (inst.dest != inst.src1)
|
||||
MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));
|
||||
BSWAP(32, regs_.RX(inst.dest));
|
||||
ROR(32, regs_.R(inst.dest), Imm8(16));
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::Clz:
|
||||
|
@ -220,8 +264,24 @@ void X64JitBackend::CompIR_Compare(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::SltU:
|
||||
if (regs_.IsGPRImm(inst.src1) && regs_.GetGPRImm(inst.src1) == 0) {
|
||||
// This is kinda common, same as != 0. Avoid flushing src1.
|
||||
regs_.SpillLockGPR(inst.src2, inst.dest);
|
||||
regs_.MapGPR(inst.src2);
|
||||
regs_.MapGPR(inst.dest, MIPSMap::NOINIT);
|
||||
if (inst.dest != inst.src2 && regs_.HasLowSubregister(regs_.RX(inst.dest))) {
|
||||
XOR(32, regs_.R(inst.dest), regs_.R(inst.dest));
|
||||
TEST(32, regs_.R(inst.src2), regs_.R(inst.src2));
|
||||
SETcc(CC_NE, regs_.R(inst.dest));
|
||||
} else {
|
||||
CMP(32, regs_.R(inst.src2), Imm8(0));
|
||||
SETcc(CC_NE, R(SCRATCH1));
|
||||
MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));
|
||||
}
|
||||
} else {
|
||||
regs_.Map(inst);
|
||||
setCC(regs_.R(inst.src2), CC_B);
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::SltUConst:
|
||||
|
|
|
@ -43,10 +43,12 @@ using namespace X64IRJitConstants;
|
|||
void X64JitBackend::EmitFPUConstants() {
|
||||
EmitConst4x32(&constants.noSignMask, 0x7FFFFFFF);
|
||||
EmitConst4x32(&constants.signBitAll, 0x80000000);
|
||||
EmitConst4x32(&constants.positiveZeroes, 0x00000000);
|
||||
EmitConst4x32(&constants.positiveInfinity, 0x7F800000);
|
||||
EmitConst4x32(&constants.qNAN, 0x7FC00000);
|
||||
EmitConst4x32(&constants.positiveOnes, 0x3F800000);
|
||||
EmitConst4x32(&constants.negativeOnes, 0xBF800000);
|
||||
EmitConst4x32(&constants.maxIntBelowAsFloat, 0x4EFFFFFF);
|
||||
|
||||
constants.mulTableVi2f = (const float *)GetCodePointer();
|
||||
for (uint8_t i = 0; i < 32; ++i) {
|
||||
|
@ -57,20 +59,14 @@ void X64JitBackend::EmitFPUConstants() {
|
|||
Write32(val);
|
||||
}
|
||||
|
||||
constants.mulTableVf2i = (const double *)GetCodePointer();
|
||||
constants.mulTableVf2i = (const float *)GetCodePointer();
|
||||
for (uint8_t i = 0; i < 32; ++i) {
|
||||
double fval = (1UL << i);
|
||||
uint64_t val;
|
||||
float fval = (float)(1ULL << i);
|
||||
uint32_t val;
|
||||
memcpy(&val, &fval, sizeof(val));
|
||||
|
||||
Write64(val);
|
||||
Write32(val);
|
||||
}
|
||||
|
||||
// Note: this first one is (double)(int)0x80000000, sign extended.
|
||||
constants.minIntAsDouble = (const double *)GetCodePointer();
|
||||
Write64(0xC1E0000000000000ULL);
|
||||
constants.maxIntAsDouble = (const double *)GetCodePointer();
|
||||
Write64(0x41DFFFFFFFC00000ULL);
|
||||
}
|
||||
|
||||
void X64JitBackend::CopyVec4ToFPRLane0(Gen::X64Reg dest, Gen::X64Reg src, int lane) {
|
||||
|
@ -210,9 +206,9 @@ void X64JitBackend::CompIR_FAssign(IRInst inst) {
|
|||
// Just to make sure we don't generate bad code.
|
||||
if (inst.dest == inst.src1)
|
||||
break;
|
||||
if (regs_.IsFPRMapped(inst.src1 & 3) && regs_.GetFPRLaneCount(inst.src1 & ~3) == 4 && (inst.dest & ~3) != (inst.src1 & ~3)) {
|
||||
if (regs_.IsFPRMapped(inst.src1 & 3) && regs_.GetFPRLaneCount(inst.src1) == 4 && (inst.dest & ~3) != (inst.src1 & ~3)) {
|
||||
// Okay, this is an extract. Avoid unvec4ing src1.
|
||||
regs_.SpillLockFPR(inst.src1);
|
||||
regs_.SpillLockFPR(inst.src1 & ~3);
|
||||
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
|
||||
CopyVec4ToFPRLane0(regs_.FX(inst.dest), regs_.FX(inst.src1 & ~3), inst.src1 & 3);
|
||||
} else {
|
||||
|
@ -233,8 +229,30 @@ void X64JitBackend::CompIR_FAssign(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::FSign:
|
||||
CompIR_Generic(inst);
|
||||
{
|
||||
X64Reg tempReg = regs_.MapWithFPRTemp(inst);
|
||||
|
||||
// Set tempReg to +1.0 or -1.0 per sign bit.
|
||||
if (cpu_info.bAVX) {
|
||||
VANDPS(128, tempReg, regs_.FX(inst.src1), M(constants.signBitAll)); // rip accessible
|
||||
} else {
|
||||
MOVAPS(tempReg, regs_.F(inst.src1));
|
||||
ANDPS(tempReg, M(constants.signBitAll)); // rip accessible
|
||||
}
|
||||
ORPS(tempReg, M(constants.positiveOnes)); // rip accessible
|
||||
|
||||
// Set dest = 0xFFFFFFFF if +0.0 or -0.0.
|
||||
if (inst.dest != inst.src1) {
|
||||
XORPS(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
CMPPS(regs_.FX(inst.dest), regs_.F(inst.src1), CMP_EQ);
|
||||
} else {
|
||||
CMPPS(regs_.FX(inst.dest), M(constants.positiveZeroes), CMP_EQ); // rip accessible
|
||||
}
|
||||
|
||||
// Now not the mask to keep zero if it was zero.
|
||||
ANDNPS(regs_.FX(inst.dest), R(tempReg));
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
INVALIDOP;
|
||||
|
@ -273,25 +291,22 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IRFpCompareMode::EqualOrdered:
|
||||
{
|
||||
// Since UCOMISS doesn't give us ordered == directly, CMPSS is better.
|
||||
regs_.SpillLockFPR(inst.src1, inst.src2);
|
||||
X64Reg tempReg = regs_.GetAndLockTempFPR();
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
||||
// Clear the upper bits of SCRATCH1 so we can AND later.
|
||||
// We don't have a single flag we can check, unfortunately.
|
||||
XOR(32, R(SCRATCH1), R(SCRATCH1));
|
||||
UCOMISS(regs_.FX(inst.src1), regs_.F(inst.src2));
|
||||
// E/ZF = EQUAL or UNORDERED (not exactly what we want.)
|
||||
SETcc(CC_E, R(SCRATCH1));
|
||||
if (regs_.HasLowSubregister(regs_.RX(IRREG_FPCOND))) {
|
||||
// NP/!PF = ORDERED.
|
||||
SETcc(CC_NP, regs_.R(IRREG_FPCOND));
|
||||
AND(32, regs_.R(IRREG_FPCOND), R(SCRATCH1));
|
||||
|
||||
if (cpu_info.bAVX) {
|
||||
VCMPSS(tempReg, regs_.FX(inst.src1), regs_.F(inst.src2), CMP_EQ);
|
||||
} else {
|
||||
MOVZX(32, 8, regs_.RX(IRREG_FPCOND), R(SCRATCH1));
|
||||
// Neither of those affected flags, luckily.
|
||||
// NP/!PF = ORDERED.
|
||||
SETcc(CC_NP, R(SCRATCH1));
|
||||
AND(32, regs_.R(IRREG_FPCOND), R(SCRATCH1));
|
||||
MOVAPS(tempReg, regs_.F(inst.src1));
|
||||
CMPSS(tempReg, regs_.F(inst.src2), CMP_EQ);
|
||||
}
|
||||
MOVD_xmm(regs_.R(IRREG_FPCOND), tempReg);
|
||||
AND(32, regs_.R(IRREG_FPCOND), Imm32(1));
|
||||
break;
|
||||
}
|
||||
|
||||
case IRFpCompareMode::EqualUnordered:
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
||||
|
@ -458,15 +473,58 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) {
|
|||
|
||||
case IROp::FCmpVfpuAggregate:
|
||||
regs_.MapGPR(IRREG_VFPU_CC, MIPSMap::DIRTY);
|
||||
// First, clear out the bits we're aggregating.
|
||||
if (inst.dest == 1) {
|
||||
// Special case 1, which is not uncommon.
|
||||
AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF));
|
||||
BT(32, regs_.R(IRREG_VFPU_CC), Imm8(0));
|
||||
FixupBranch skip = J_CC(CC_NC);
|
||||
OR(32, regs_.R(IRREG_VFPU_CC), Imm8(0x30));
|
||||
SetJumpTarget(skip);
|
||||
} else if (inst.dest == 3) {
|
||||
AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF));
|
||||
MOV(32, R(SCRATCH1), regs_.R(IRREG_VFPU_CC));
|
||||
AND(32, R(SCRATCH1), Imm8(3));
|
||||
// 0, 1, and 3 are already correct for the any and all bits.
|
||||
CMP(32, R(SCRATCH1), Imm8(2));
|
||||
|
||||
FixupBranch skip = J_CC(CC_NE);
|
||||
SUB(32, R(SCRATCH1), Imm8(1));
|
||||
SetJumpTarget(skip);
|
||||
|
||||
SHL(32, R(SCRATCH1), Imm8(4));
|
||||
OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1));
|
||||
} else if (inst.dest == 0xF) {
|
||||
XOR(32, R(SCRATCH1), R(SCRATCH1));
|
||||
|
||||
// Clear out the bits we're aggregating.
|
||||
// The register refuses writes to bits outside 0x3F, and we're setting 0x30.
|
||||
AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF));
|
||||
|
||||
// Set the any bit, just using the AND above.
|
||||
FixupBranch noneSet = J_CC(CC_Z);
|
||||
OR(32, regs_.R(IRREG_VFPU_CC), Imm8(0x10));
|
||||
|
||||
// Next up, the "all" bit.
|
||||
CMP(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF));
|
||||
SETcc(CC_E, R(SCRATCH1));
|
||||
SHL(32, R(SCRATCH1), Imm8(5));
|
||||
OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1));
|
||||
|
||||
SetJumpTarget(noneSet);
|
||||
} else {
|
||||
XOR(32, R(SCRATCH1), R(SCRATCH1));
|
||||
|
||||
// Clear out the bits we're aggregating.
|
||||
// The register refuses writes to bits outside 0x3F, and we're setting 0x30.
|
||||
AND(32, regs_.R(IRREG_VFPU_CC), Imm8(0xF));
|
||||
|
||||
// Set the any bit.
|
||||
if (regs_.HasLowSubregister(regs_.RX(IRREG_VFPU_CC)))
|
||||
TEST(8, regs_.R(IRREG_VFPU_CC), Imm8(inst.dest));
|
||||
else
|
||||
TEST(32, regs_.R(IRREG_VFPU_CC), Imm32(inst.dest));
|
||||
SETcc(CC_NZ, R(SCRATCH1));
|
||||
SHL(32, R(SCRATCH1), Imm8(4));
|
||||
OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1));
|
||||
FixupBranch noneSet = J_CC(CC_Z);
|
||||
OR(32, regs_.R(IRREG_VFPU_CC), Imm8(0x10));
|
||||
|
||||
// Next up, the "all" bit. A bit annoying...
|
||||
MOV(32, R(SCRATCH1), regs_.R(IRREG_VFPU_CC));
|
||||
|
@ -475,6 +533,9 @@ void X64JitBackend::CompIR_FCompare(IRInst inst) {
|
|||
SETcc(CC_E, R(SCRATCH1));
|
||||
SHL(32, R(SCRATCH1), Imm8(5));
|
||||
OR(32, regs_.R(IRREG_VFPU_CC), R(SCRATCH1));
|
||||
|
||||
SetJumpTarget(noneSet);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -579,11 +640,14 @@ void X64JitBackend::CompIR_FCvt(IRInst inst) {
|
|||
case IROp::FCvtWS:
|
||||
{
|
||||
regs_.Map(inst);
|
||||
UCOMISS(regs_.FX(inst.src1), M(constants.positiveInfinity)); // rip accessible
|
||||
UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||
|
||||
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
// UCOMISS set ZF if EQUAL (to infinity) or UNORDERED.
|
||||
FixupBranch skip = J_CC(CC_NZ);
|
||||
// UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
|
||||
// We want noSignMask otherwise, GREATER or UNORDERED.
|
||||
FixupBranch isNAN = J_CC(CC_P);
|
||||
FixupBranch skip = J_CC(CC_BE);
|
||||
SetJumpTarget(isNAN);
|
||||
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||
|
||||
SetJumpTarget(skip);
|
||||
|
@ -599,54 +663,65 @@ void X64JitBackend::CompIR_FCvt(IRInst inst) {
|
|||
regs_.Map(inst);
|
||||
if (cpu_info.bSSE4_1) {
|
||||
int scale = inst.src2 & 0x1F;
|
||||
int rmode = inst.src2 >> 6;
|
||||
IRRoundMode rmode = (IRRoundMode)(inst.src2 >> 6);
|
||||
|
||||
CVTSS2SD(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
if (scale != 0 && cpu_info.bAVX) {
|
||||
VMULSS(regs_.FX(inst.dest), regs_.FX(inst.src1), M(&constants.mulTableVf2i[scale])); // rip accessible
|
||||
} else {
|
||||
if (inst.dest != inst.src1)
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
if (scale != 0)
|
||||
MULSD(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); // rip accessible
|
||||
MULSS(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); // rip accessible
|
||||
}
|
||||
|
||||
// On NAN, we want maxInt anyway, so let's let it be the second param.
|
||||
MAXSD(regs_.FX(inst.dest), M(constants.minIntAsDouble)); // rip accessible
|
||||
MINSD(regs_.FX(inst.dest), M(constants.maxIntAsDouble)); // rip accessible
|
||||
UCOMISS(regs_.FX(inst.dest), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||
|
||||
switch (rmode) {
|
||||
case 0:
|
||||
ROUNDNEARPD(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
CVTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
case IRRoundMode::RINT_0:
|
||||
ROUNDNEARPS(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
break;
|
||||
|
||||
case 1:
|
||||
CVTTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
case IRRoundMode::CAST_1:
|
||||
CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
break;
|
||||
|
||||
case 2:
|
||||
ROUNDCEILPD(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
CVTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
case IRRoundMode::CEIL_2:
|
||||
ROUNDCEILPS(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
break;
|
||||
|
||||
case 3:
|
||||
ROUNDFLOORPD(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
CVTPD2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
case IRRoundMode::FLOOR_3:
|
||||
ROUNDFLOORPS(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
break;
|
||||
}
|
||||
|
||||
// UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
|
||||
// We want noSignMask otherwise, GREATER or UNORDERED.
|
||||
FixupBranch isNAN = J_CC(CC_P);
|
||||
FixupBranch skip = J_CC(CC_BE);
|
||||
SetJumpTarget(isNAN);
|
||||
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||
SetJumpTarget(skip);
|
||||
} else {
|
||||
int scale = inst.src2 & 0x1F;
|
||||
int rmode = inst.src2 >> 6;
|
||||
IRRoundMode rmode = (IRRoundMode)(inst.src2 >> 6);
|
||||
|
||||
int setMXCSR = -1;
|
||||
bool useTrunc = false;
|
||||
switch (rmode) {
|
||||
case 0:
|
||||
case IRRoundMode::RINT_0:
|
||||
// TODO: Could skip if hasSetRounding, but we don't have the flag.
|
||||
setMXCSR = 0;
|
||||
break;
|
||||
case 1:
|
||||
case IRRoundMode::CAST_1:
|
||||
useTrunc = true;
|
||||
break;
|
||||
case 2:
|
||||
case IRRoundMode::CEIL_2:
|
||||
setMXCSR = 2;
|
||||
break;
|
||||
case 3:
|
||||
case IRRoundMode::FLOOR_3:
|
||||
setMXCSR = 1;
|
||||
break;
|
||||
}
|
||||
|
@ -665,21 +740,26 @@ void X64JitBackend::CompIR_FCvt(IRInst inst) {
|
|||
LDMXCSR(MDisp(CTXREG, tempOffset));
|
||||
}
|
||||
|
||||
CVTSS2SD(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
if (inst.dest != inst.src1)
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
if (scale != 0)
|
||||
MULSD(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale]));
|
||||
MULSS(regs_.FX(inst.dest), M(&constants.mulTableVf2i[scale])); // rip accessible
|
||||
|
||||
// On NAN, we want maxInt anyway, so let's let it be the second param.
|
||||
MAXSD(regs_.FX(inst.dest), M(constants.minIntAsDouble));
|
||||
MINSD(regs_.FX(inst.dest), M(constants.maxIntAsDouble));
|
||||
UCOMISS(regs_.FX(inst.dest), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||
|
||||
if (useTrunc) {
|
||||
CVTTSD2SI(SCRATCH1, regs_.F(inst.dest));
|
||||
CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
} else {
|
||||
CVTSD2SI(SCRATCH1, regs_.F(inst.dest));
|
||||
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
}
|
||||
|
||||
MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
|
||||
// UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
|
||||
// We want noSignMask otherwise, GREATER or UNORDERED.
|
||||
FixupBranch isNAN = J_CC(CC_P);
|
||||
FixupBranch skip = J_CC(CC_BE);
|
||||
SetJumpTarget(isNAN);
|
||||
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||
SetJumpTarget(skip);
|
||||
|
||||
// Return MXCSR to its previous value.
|
||||
if (setMXCSR != -1) {
|
||||
|
@ -704,47 +784,106 @@ void X64JitBackend::CompIR_FRound(IRInst inst) {
|
|||
CONDITIONAL_DISABLE;
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::FCeil:
|
||||
case IROp::FFloor:
|
||||
case IROp::FRound:
|
||||
CompIR_Generic(inst);
|
||||
if (cpu_info.bSSE4_1) {
|
||||
regs_.Map(inst);
|
||||
UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||
|
||||
switch (inst.op) {
|
||||
case IROp::FCeil:
|
||||
ROUNDCEILPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::FFloor:
|
||||
ROUNDFLOORPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
break;
|
||||
|
||||
case IROp::FRound:
|
||||
ROUNDNEARPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
break;
|
||||
|
||||
default:
|
||||
INVALIDOP;
|
||||
}
|
||||
CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
// UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
|
||||
// We want noSignMask otherwise, GREATER or UNORDERED.
|
||||
FixupBranch isNAN = J_CC(CC_P);
|
||||
FixupBranch skip = J_CC(CC_BE);
|
||||
SetJumpTarget(isNAN);
|
||||
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||
|
||||
SetJumpTarget(skip);
|
||||
} else {
|
||||
regs_.Map(inst);
|
||||
|
||||
int setMXCSR = -1;
|
||||
switch (inst.op) {
|
||||
case IROp::FRound:
|
||||
// TODO: Could skip if hasSetRounding, but we don't have the flag.
|
||||
setMXCSR = 0;
|
||||
break;
|
||||
case IROp::FCeil:
|
||||
setMXCSR = 2;
|
||||
break;
|
||||
case IROp::FFloor:
|
||||
setMXCSR = 1;
|
||||
break;
|
||||
default:
|
||||
INVALIDOP;
|
||||
}
|
||||
|
||||
// TODO: Might be possible to cache this and update between instructions?
|
||||
// Probably kinda expensive to switch each time...
|
||||
if (setMXCSR != -1) {
|
||||
STMXCSR(MDisp(CTXREG, mxcsrTempOffset));
|
||||
MOV(32, R(SCRATCH1), MDisp(CTXREG, mxcsrTempOffset));
|
||||
AND(32, R(SCRATCH1), Imm32(~(3 << 13)));
|
||||
if (setMXCSR != 0) {
|
||||
OR(32, R(SCRATCH1), Imm32(setMXCSR << 13));
|
||||
}
|
||||
MOV(32, MDisp(CTXREG, tempOffset), R(SCRATCH1));
|
||||
LDMXCSR(MDisp(CTXREG, tempOffset));
|
||||
}
|
||||
|
||||
UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||
|
||||
CVTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
// UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
|
||||
// We want noSignMask otherwise, GREATER or UNORDERED.
|
||||
FixupBranch isNAN = J_CC(CC_P);
|
||||
FixupBranch skip = J_CC(CC_BE);
|
||||
SetJumpTarget(isNAN);
|
||||
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||
|
||||
SetJumpTarget(skip);
|
||||
|
||||
// Return MXCSR to its previous value.
|
||||
if (setMXCSR != -1) {
|
||||
LDMXCSR(MDisp(CTXREG, mxcsrTempOffset));
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FTrunc:
|
||||
{
|
||||
regs_.SpillLockFPR(inst.dest, inst.src1);
|
||||
X64Reg tempZero = regs_.GetAndLockTempFPR();
|
||||
regs_.Map(inst);
|
||||
UCOMISS(regs_.FX(inst.src1), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||
|
||||
CVTTSS2SI(SCRATCH1, regs_.F(inst.src1));
|
||||
CVTTPS2DQ(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
// UCOMISS set CF if LESS and ZF if EQUAL to maxIntBelowAsFloat.
|
||||
// We want noSignMask otherwise, GREATER or UNORDERED.
|
||||
FixupBranch isNAN = J_CC(CC_P);
|
||||
FixupBranch skip = J_CC(CC_BE);
|
||||
SetJumpTarget(isNAN);
|
||||
MOVAPS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||
|
||||
// Did we get an indefinite integer value?
|
||||
CMP(32, R(SCRATCH1), Imm32(0x80000000));
|
||||
FixupBranch wasExact = J_CC(CC_NE);
|
||||
|
||||
XORPS(tempZero, R(tempZero));
|
||||
if (inst.dest == inst.src1) {
|
||||
CMPSS(regs_.FX(inst.dest), R(tempZero), CMP_LT);
|
||||
} else if (cpu_info.bAVX) {
|
||||
VCMPSS(regs_.FX(inst.dest), regs_.FX(inst.src1), R(tempZero), CMP_LT);
|
||||
} else {
|
||||
MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
|
||||
CMPSS(regs_.FX(inst.dest), R(tempZero), CMP_LT);
|
||||
}
|
||||
|
||||
// At this point, -inf = 0xffffffff, inf/nan = 0x00000000.
|
||||
// We want -inf to be 0x80000000 inf/nan to be 0x7fffffff, so we flip those bits.
|
||||
MOVD_xmm(R(SCRATCH1), regs_.FX(inst.dest));
|
||||
XOR(32, R(SCRATCH1), Imm32(0x7fffffff));
|
||||
|
||||
SetJumpTarget(wasExact);
|
||||
MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
|
||||
SetJumpTarget(skip);
|
||||
break;
|
||||
}
|
||||
|
||||
case IROp::FCeil:
|
||||
case IROp::FFloor:
|
||||
CompIR_Generic(inst);
|
||||
break;
|
||||
|
||||
default:
|
||||
INVALIDOP;
|
||||
break;
|
||||
|
@ -833,6 +972,7 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) {
|
|||
|
||||
auto callFuncF_F = [&](const void *func) {
|
||||
regs_.FlushBeforeCall();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
|
||||
|
||||
#if X64JIT_USE_XMM_CALL
|
||||
if (regs_.IsFPRMapped(inst.src1)) {
|
||||
|
@ -865,6 +1005,8 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) {
|
|||
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
|
||||
MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
|
||||
#endif
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
};
|
||||
|
||||
switch (inst.op) {
|
||||
|
|
|
@ -45,35 +45,41 @@ Gen::OpArg X64JitBackend::PrepareSrc1Address(IRInst inst) {
|
|||
// If it's about to be clobbered, don't waste time pointerifying. Use displacement.
|
||||
bool clobbersSrc1 = !readsFromSrc1 && regs_.IsGPRClobbered(inst.src1);
|
||||
|
||||
int32_t disp = (int32_t)inst.constant;
|
||||
// It can't be this negative, must be a constant address with the top bit set.
|
||||
if ((disp & 0xC0000000) == 0x80000000) {
|
||||
disp = inst.constant & 0x7FFFFFFF;
|
||||
}
|
||||
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
if (inst.constant > 0)
|
||||
inst.constant &= Memory::MEMVIEW32_MASK;
|
||||
if (disp > 0)
|
||||
disp &= Memory::MEMVIEW32_MASK;
|
||||
#endif
|
||||
|
||||
OpArg addrArg;
|
||||
if (inst.src1 == MIPS_REG_ZERO) {
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
inst.constant &= Memory::MEMVIEW32_MASK;
|
||||
disp &= Memory::MEMVIEW32_MASK;
|
||||
#endif
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
addrArg = MDisp(MEMBASEREG, inst.constant & 0x7FFFFFFF);
|
||||
addrArg = MDisp(MEMBASEREG, disp & 0x7FFFFFFF);
|
||||
#else
|
||||
addrArg = M(Memory::base + inst.constant);
|
||||
addrArg = M(Memory::base + disp);
|
||||
#endif
|
||||
} else if ((jo.cachePointers || src1IsPointer) && !readsFromSrc1 && (!clobbersSrc1 || src1IsPointer)) {
|
||||
X64Reg src1 = regs_.MapGPRAsPointer(inst.src1);
|
||||
addrArg = MDisp(src1, (int)inst.constant);
|
||||
addrArg = MDisp(src1, disp);
|
||||
} else {
|
||||
regs_.MapGPR(inst.src1);
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), (int)inst.constant));
|
||||
LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), disp));
|
||||
AND(PTRBITS, R(SCRATCH1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
addrArg = MDisp(SCRATCH1, (intptr_t)Memory::base);
|
||||
#else
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
addrArg = MComplex(MEMBASEREG, regs_.RX(inst.src1), SCALE_1, (int)inst.constant);
|
||||
addrArg = MComplex(MEMBASEREG, regs_.RX(inst.src1), SCALE_1, disp);
|
||||
#else
|
||||
addrArg = MDisp(regs_.RX(inst.src1), Memory::base + inst.constant);
|
||||
addrArg = MDisp(regs_.RX(inst.src1), Memory::base + disp);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -20,9 +20,11 @@
|
|||
|
||||
#include "Common/Profiler/Profiler.h"
|
||||
#include "Core/Core.h"
|
||||
#include "Core/Debugger/Breakpoints.h"
|
||||
#include "Core/HLE/HLE.h"
|
||||
#include "Core/HLE/ReplaceTables.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/MIPS/MIPSAnalyst.h"
|
||||
#include "Core/MIPS/IR/IRInterpreter.h"
|
||||
#include "Core/MIPS/x86/X64IRJit.h"
|
||||
#include "Core/MIPS/x86/X64IRRegCache.h"
|
||||
|
@ -62,6 +64,20 @@ void X64JitBackend::CompIR_Basic(IRInst inst) {
|
|||
regs_.Map(inst);
|
||||
if (inst.constant == 0) {
|
||||
XORPS(regs_.FX(inst.dest), regs_.F(inst.dest));
|
||||
} else if (inst.constant == 0x7FFFFFFF) {
|
||||
MOVSS(regs_.FX(inst.dest), M(constants.noSignMask)); // rip accessible
|
||||
} else if (inst.constant == 0x80000000) {
|
||||
MOVSS(regs_.FX(inst.dest), M(constants.signBitAll)); // rip accessible
|
||||
} else if (inst.constant == 0x7F800000) {
|
||||
MOVSS(regs_.FX(inst.dest), M(constants.positiveInfinity)); // rip accessible
|
||||
} else if (inst.constant == 0x7FC00000) {
|
||||
MOVSS(regs_.FX(inst.dest), M(constants.qNAN)); // rip accessible
|
||||
} else if (inst.constant == 0x3F800000) {
|
||||
MOVSS(regs_.FX(inst.dest), M(constants.positiveOnes)); // rip accessible
|
||||
} else if (inst.constant == 0xBF800000) {
|
||||
MOVSS(regs_.FX(inst.dest), M(constants.negativeOnes)); // rip accessible
|
||||
} else if (inst.constant == 0x4EFFFFFF) {
|
||||
MOVSS(regs_.FX(inst.dest), M(constants.maxIntBelowAsFloat)); // rip accessible
|
||||
} else {
|
||||
MOV(32, R(SCRATCH1), Imm32(inst.constant));
|
||||
MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
|
||||
|
@ -74,6 +90,7 @@ void X64JitBackend::CompIR_Basic(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::SetPCConst:
|
||||
lastConstPC_ = inst.constant;
|
||||
MOV(32, R(SCRATCH1), Imm32(inst.constant));
|
||||
MovToPC(SCRATCH1);
|
||||
break;
|
||||
|
@ -97,17 +114,80 @@ void X64JitBackend::CompIR_Breakpoint(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::MemoryCheck:
|
||||
{
|
||||
X64Reg addrBase = regs_.MapGPR(inst.src1);
|
||||
if (regs_.IsGPRImm(inst.src1)) {
|
||||
uint32_t iaddr = regs_.GetGPRImm(inst.src1) + inst.constant;
|
||||
uint32_t checkedPC = lastConstPC_ + inst.dest;
|
||||
int size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
|
||||
if (size == 0) {
|
||||
checkedPC += 4;
|
||||
size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
|
||||
}
|
||||
bool isWrite = MIPSAnalyst::IsOpMemoryWrite(checkedPC);
|
||||
|
||||
MemCheck check;
|
||||
if (CBreakPoints::GetMemCheckInRange(iaddr, size, &check)) {
|
||||
if (!(check.cond & MEMCHECK_READ) && !isWrite)
|
||||
break;
|
||||
if (!(check.cond & (MEMCHECK_WRITE | MEMCHECK_WRITE_ONCHANGE)) && isWrite)
|
||||
break;
|
||||
|
||||
// We need to flush, or conditions and log expressions will see old register values.
|
||||
FlushAll();
|
||||
LEA(32, addrBase, MDisp(addrBase, inst.constant));
|
||||
MovFromPC(SCRATCH1);
|
||||
LEA(32, SCRATCH1, MDisp(SCRATCH1, inst.dest));
|
||||
ABI_CallFunctionRR((const void *)&IRRunMemCheck, SCRATCH1, addrBase);
|
||||
|
||||
ABI_CallFunctionCC((const void *)&IRRunMemCheck, checkedPC, iaddr);
|
||||
TEST(32, R(EAX), R(EAX));
|
||||
J_CC(CC_NZ, dispatcherCheckCoreState_, true);
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
uint32_t checkedPC = lastConstPC_ + inst.dest;
|
||||
int size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
|
||||
if (size == 0) {
|
||||
checkedPC += 4;
|
||||
size = MIPSAnalyst::OpMemoryAccessSize(checkedPC);
|
||||
}
|
||||
bool isWrite = MIPSAnalyst::IsOpMemoryWrite(checkedPC);
|
||||
|
||||
const auto memchecks = CBreakPoints::GetMemCheckRanges(isWrite);
|
||||
// We can trivially skip if there are no checks for this type (i.e. read vs write.)
|
||||
if (memchecks.empty())
|
||||
break;
|
||||
|
||||
X64Reg addrBase = regs_.MapGPR(inst.src1);
|
||||
LEA(32, SCRATCH1, MDisp(addrBase, inst.constant));
|
||||
|
||||
// We need to flush, or conditions and log expressions will see old register values.
|
||||
FlushAll();
|
||||
|
||||
std::vector<FixupBranch> hitChecks;
|
||||
for (auto it : memchecks) {
|
||||
if (it.end != 0) {
|
||||
CMP(32, R(SCRATCH1), Imm32(it.start - size));
|
||||
FixupBranch skipNext = J_CC(CC_BE);
|
||||
|
||||
CMP(32, R(SCRATCH1), Imm32(it.end));
|
||||
hitChecks.push_back(J_CC(CC_B, true));
|
||||
|
||||
SetJumpTarget(skipNext);
|
||||
} else {
|
||||
CMP(32, R(SCRATCH1), Imm32(it.start));
|
||||
hitChecks.push_back(J_CC(CC_E, true));
|
||||
}
|
||||
}
|
||||
|
||||
FixupBranch noHits = J(true);
|
||||
|
||||
// Okay, now land any hit here.
|
||||
for (auto &fixup : hitChecks)
|
||||
SetJumpTarget(fixup);
|
||||
hitChecks.clear();
|
||||
|
||||
ABI_CallFunctionAA((const void *)&IRRunMemCheck, Imm32(checkedPC), R(SCRATCH1));
|
||||
TEST(32, R(EAX), R(EAX));
|
||||
J_CC(CC_NZ, dispatcherCheckCoreState_, true);
|
||||
|
||||
SetJumpTarget(noHits);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
INVALIDOP;
|
||||
|
@ -123,6 +203,7 @@ void X64JitBackend::CompIR_System(IRInst inst) {
|
|||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
|
||||
#ifdef USE_PROFILER
|
||||
// When profiling, we can't skip CallSyscall, since it times syscalls.
|
||||
ABI_CallFunctionC((const u8 *)&CallSyscall, inst.constant);
|
||||
|
@ -139,6 +220,7 @@ void X64JitBackend::CompIR_System(IRInst inst) {
|
|||
}
|
||||
#endif
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
// This is always followed by an ExitToPC, where we check coreState.
|
||||
break;
|
||||
|
@ -146,14 +228,26 @@ void X64JitBackend::CompIR_System(IRInst inst) {
|
|||
case IROp::CallReplacement:
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
|
||||
ABI_CallFunction(GetReplacementFunc(inst.constant)->replaceFunc);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
//SUB(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG), R(EAX));
|
||||
SUB(32, MDisp(CTXREG, downcountOffset), R(EAX));
|
||||
break;
|
||||
|
||||
case IROp::Break:
|
||||
CompIR_Generic(inst);
|
||||
FlushAll();
|
||||
// This doesn't naturally have restore/apply around it.
|
||||
RestoreRoundingMode(true);
|
||||
SaveStaticRegisters();
|
||||
MovFromPC(SCRATCH1);
|
||||
ABI_CallFunctionR((const void *)&Core_Break, SCRATCH1);
|
||||
LoadStaticRegisters();
|
||||
ApplyRoundingMode(true);
|
||||
MovFromPC(SCRATCH1);
|
||||
LEA(32, SCRATCH1, MDisp(SCRATCH1, 4));
|
||||
JMP(dispatcherPCInSCRATCH1_, true);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -191,8 +285,34 @@ void X64JitBackend::CompIR_Transfer(IRInst inst) {
|
|||
break;
|
||||
|
||||
case IROp::FpCtrlFromReg:
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::NOINIT } });
|
||||
// Mask out the unused bits, and store fcr31 (using fpcond as a temp.)
|
||||
MOV(32, regs_.R(IRREG_FPCOND), Imm32(0x0181FFFF));
|
||||
AND(32, regs_.R(IRREG_FPCOND), regs_.R(inst.src1));
|
||||
MOV(32, MDisp(CTXREG, fcr31Offset), regs_.R(IRREG_FPCOND));
|
||||
|
||||
// With that done, grab bit 23, the actual fpcond.
|
||||
SHR(32, regs_.R(IRREG_FPCOND), Imm8(23));
|
||||
AND(32, regs_.R(IRREG_FPCOND), Imm32(1));
|
||||
break;
|
||||
|
||||
case IROp::FpCtrlToReg:
|
||||
CompIR_Generic(inst);
|
||||
regs_.MapWithExtra(inst, { { 'G', IRREG_FPCOND, 1, MIPSMap::INIT } });
|
||||
// Start by clearing the fpcond bit (might as well mask while we're here.)
|
||||
MOV(32, regs_.R(inst.dest), Imm32(0x0101FFFF));
|
||||
AND(32, regs_.R(inst.dest), MDisp(CTXREG, fcr31Offset));
|
||||
|
||||
AND(32, regs_.R(IRREG_FPCOND), Imm32(1));
|
||||
if (cpu_info.bBMI2) {
|
||||
RORX(32, SCRATCH1, regs_.R(IRREG_FPCOND), 32 - 23);
|
||||
} else {
|
||||
MOV(32, R(SCRATCH1), regs_.R(IRREG_FPCOND));
|
||||
SHL(32, R(SCRATCH1), Imm8(23));
|
||||
}
|
||||
OR(32, regs_.R(inst.dest), R(SCRATCH1));
|
||||
|
||||
// Update fcr31 while we were here, for consistency.
|
||||
MOV(32, MDisp(CTXREG, fcr31Offset), regs_.R(inst.dest));
|
||||
break;
|
||||
|
||||
case IROp::VfpuCtrlToReg:
|
||||
|
@ -221,23 +341,6 @@ void X64JitBackend::CompIR_Transfer(IRInst inst) {
|
|||
}
|
||||
}
|
||||
|
||||
int ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_t isWrite) {
|
||||
const auto toss = [&](MemoryExceptionType t) {
|
||||
Core_MemoryException(addr, alignment, currentMIPS->pc, t);
|
||||
return coreState != CORE_RUNNING ? 1 : 0;
|
||||
};
|
||||
|
||||
if (!Memory::IsValidRange(addr, alignment)) {
|
||||
MemoryExceptionType t = isWrite == 1 ? MemoryExceptionType::WRITE_WORD : MemoryExceptionType::READ_WORD;
|
||||
if (alignment > 4)
|
||||
t = isWrite ? MemoryExceptionType::WRITE_BLOCK : MemoryExceptionType::READ_BLOCK;
|
||||
return toss(t);
|
||||
} else if (alignment > 1 && (addr & (alignment - 1)) != 0) {
|
||||
return toss(MemoryExceptionType::ALIGNMENT);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void X64JitBackend::CompIR_ValidateAddress(IRInst inst) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
|
@ -265,10 +368,17 @@ void X64JitBackend::CompIR_ValidateAddress(IRInst inst) {
|
|||
break;
|
||||
}
|
||||
|
||||
// This is unfortunate...
|
||||
FlushAll();
|
||||
if (regs_.IsGPRMappedAsPointer(inst.src1)) {
|
||||
LEA(PTRBITS, SCRATCH1, MDisp(regs_.RXPtr(inst.src1), inst.constant));
|
||||
#if defined(MASKED_PSP_MEMORY)
|
||||
SUB(PTRBITS, R(SCRATCH1), ImmPtr(Memory::base));
|
||||
#else
|
||||
SUB(PTRBITS, R(SCRATCH1), R(MEMBASEREG));
|
||||
#endif
|
||||
} else {
|
||||
regs_.Map(inst);
|
||||
LEA(PTRBITS, SCRATCH1, MDisp(regs_.RX(inst.src1), inst.constant));
|
||||
}
|
||||
AND(32, R(SCRATCH1), Imm32(0x3FFFFFFF));
|
||||
|
||||
std::vector<FixupBranch> validJumps;
|
||||
|
@ -282,25 +392,32 @@ void X64JitBackend::CompIR_ValidateAddress(IRInst inst) {
|
|||
CMP(32, R(SCRATCH1), Imm32(PSP_GetUserMemoryEnd() - alignment));
|
||||
FixupBranch tooHighRAM = J_CC(CC_A);
|
||||
CMP(32, R(SCRATCH1), Imm32(PSP_GetKernelMemoryBase()));
|
||||
validJumps.push_back(J_CC(CC_AE));
|
||||
validJumps.push_back(J_CC(CC_AE, true));
|
||||
|
||||
CMP(32, R(SCRATCH1), Imm32(PSP_GetVidMemEnd() - alignment));
|
||||
FixupBranch tooHighVid = J_CC(CC_A);
|
||||
CMP(32, R(SCRATCH1), Imm32(PSP_GetVidMemBase()));
|
||||
validJumps.push_back(J_CC(CC_AE));
|
||||
validJumps.push_back(J_CC(CC_AE, true));
|
||||
|
||||
CMP(32, R(SCRATCH1), Imm32(PSP_GetScratchpadMemoryEnd() - alignment));
|
||||
FixupBranch tooHighScratch = J_CC(CC_A);
|
||||
CMP(32, R(SCRATCH1), Imm32(PSP_GetScratchpadMemoryBase()));
|
||||
validJumps.push_back(J_CC(CC_AE));
|
||||
validJumps.push_back(J_CC(CC_AE, true));
|
||||
|
||||
if (alignment != 1)
|
||||
SetJumpTarget(unaligned);
|
||||
SetJumpTarget(tooHighRAM);
|
||||
SetJumpTarget(tooHighVid);
|
||||
SetJumpTarget(tooHighScratch);
|
||||
|
||||
// If we got here, something unusual and bad happened, so we'll always go back to the dispatcher.
|
||||
// Because of that, we can avoid flushing outside this case.
|
||||
auto regsCopy = regs_;
|
||||
regsCopy.FlushAll();
|
||||
|
||||
// Ignores the return value, always returns to the dispatcher.
|
||||
// Otherwise would need a thunk to restore regs.
|
||||
ABI_CallFunctionACC((const void *)&ReportBadAddress, R(SCRATCH1), alignment, isWrite);
|
||||
TEST(32, R(EAX), R(EAX));
|
||||
validJumps.push_back(J_CC(CC_Z));
|
||||
JMP(dispatcherCheckCoreState_, true);
|
||||
|
||||
for (FixupBranch &b : validJumps)
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
|
||||
|
||||
#include <cstddef>
|
||||
#include "Common/StringUtils.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
#include "Core/MIPS/x86/X64IRJit.h"
|
||||
|
@ -63,6 +64,8 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
|
|||
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
|
||||
wroteCheckedOffset = true;
|
||||
|
||||
WriteDebugPC(startPC);
|
||||
|
||||
// TODO: See if we can get flags to always have the downcount compare.
|
||||
if (jo.downcountInRegister) {
|
||||
TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));
|
||||
|
@ -79,6 +82,7 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
|
|||
const u8 *blockStart = GetCodePointer();
|
||||
block->SetTargetOffset((int)GetOffset(blockStart));
|
||||
compilingBlockNum_ = block_num;
|
||||
lastConstPC_ = 0;
|
||||
|
||||
regs_.Start(block);
|
||||
|
||||
|
@ -120,6 +124,8 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
|
|||
}
|
||||
|
||||
if (jo.enableBlocklink && jo.useBackJump) {
|
||||
WriteDebugPC(startPC);
|
||||
|
||||
if (jo.downcountInRegister) {
|
||||
TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));
|
||||
} else {
|
||||
|
@ -214,11 +220,13 @@ void X64JitBackend::CompIR_Generic(IRInst inst) {
|
|||
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
ABI_CallFunctionP((const void *)&DoIRInst, (void *)value);
|
||||
#else
|
||||
ABI_CallFunctionCC((const void *)&DoIRInst, (u32)(value & 0xFFFFFFFF), (u32)(value >> 32));
|
||||
#endif
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
|
||||
// We only need to check the return value if it's a potential exit.
|
||||
|
@ -236,10 +244,12 @@ void X64JitBackend::CompIR_Interpret(IRInst inst) {
|
|||
// IR protects us against this being a branching instruction (well, hopefully.)
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
|
||||
if (DebugStatsEnabled()) {
|
||||
ABI_CallFunctionP((const void *)&NotifyMIPSInterpret, (void *)MIPSGetName(op));
|
||||
}
|
||||
ABI_CallFunctionC((const void *)MIPSGetInterpretFunc(op), inst.constant);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
}
|
||||
|
||||
|
@ -265,7 +275,31 @@ bool X64JitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {
|
|||
} else if (ptr == applyRoundingMode_) {
|
||||
name = "applyRoundingMode";
|
||||
} else if (ptr >= GetBasePtr() && ptr < GetBasePtr() + jitStartOffset_) {
|
||||
if (ptr == constants.noSignMask) {
|
||||
name = "constants.noSignMask";
|
||||
} else if (ptr == constants.signBitAll) {
|
||||
name = "constants.signBitAll";
|
||||
} else if (ptr == constants.positiveZeroes) {
|
||||
name = "constants.positiveZeroes";
|
||||
} else if (ptr == constants.positiveInfinity) {
|
||||
name = "constants.positiveInfinity";
|
||||
} else if (ptr == constants.positiveOnes) {
|
||||
name = "constants.positiveOnes";
|
||||
} else if (ptr == constants.negativeOnes) {
|
||||
name = "constants.negativeOnes";
|
||||
} else if (ptr == constants.qNAN) {
|
||||
name = "constants.qNAN";
|
||||
} else if (ptr == constants.maxIntBelowAsFloat) {
|
||||
name = "constants.maxIntBelowAsFloat";
|
||||
} else if ((const float *)ptr >= constants.mulTableVi2f && (const float *)ptr < constants.mulTableVi2f + 32) {
|
||||
name = StringFromFormat("constants.mulTableVi2f[%d]", (int)((const float *)ptr - constants.mulTableVi2f));
|
||||
} else if ((const float *)ptr >= constants.mulTableVf2i && (const float *)ptr < constants.mulTableVf2i + 32) {
|
||||
name = StringFromFormat("constants.mulTableVf2i[%d]", (int)((const float *)ptr - constants.mulTableVf2i));
|
||||
} else if ((const Float4Constant *)ptr >= constants.vec4InitValues && (const Float4Constant *)ptr < constants.vec4InitValues + 8) {
|
||||
name = StringFromFormat("constants.vec4InitValues[%d]", (int)((const Float4Constant *)ptr - constants.vec4InitValues));
|
||||
} else {
|
||||
name = "fixedCode";
|
||||
}
|
||||
} else {
|
||||
return IRNativeBackend::DescribeCodePtr(ptr, name);
|
||||
}
|
||||
|
@ -320,6 +354,21 @@ void X64JitBackend::MovToPC(X64Reg r) {
|
|||
MOV(32, MDisp(CTXREG, pcOffset), R(r));
|
||||
}
|
||||
|
||||
void X64JitBackend::WriteDebugPC(uint32_t pc) {
|
||||
if (hooks_.profilerPC)
|
||||
MOV(32, M(hooks_.profilerPC), Imm32(pc));
|
||||
}
|
||||
|
||||
void X64JitBackend::WriteDebugPC(Gen::X64Reg r) {
|
||||
if (hooks_.profilerPC)
|
||||
MOV(32, M(hooks_.profilerPC), R(r));
|
||||
}
|
||||
|
||||
void X64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
|
||||
if (hooks_.profilerPC)
|
||||
MOV(32, M(hooks_.profilerStatus), Imm32((int32_t)status));
|
||||
}
|
||||
|
||||
void X64JitBackend::SaveStaticRegisters() {
|
||||
if (jo.useStaticAlloc) {
|
||||
//CALL(saveStaticRegisters_);
|
||||
|
|
|
@ -66,6 +66,9 @@ private:
|
|||
void ApplyRoundingMode(bool force = false);
|
||||
void MovFromPC(Gen::X64Reg r);
|
||||
void MovToPC(Gen::X64Reg r);
|
||||
void WriteDebugPC(uint32_t pc);
|
||||
void WriteDebugPC(Gen::X64Reg r);
|
||||
void WriteDebugProfilerStatus(IRProfilerStatus status);
|
||||
|
||||
void SaveStaticRegisters();
|
||||
void LoadStaticRegisters();
|
||||
|
@ -144,14 +147,14 @@ private:
|
|||
struct Constants {
|
||||
const void *noSignMask;
|
||||
const void *signBitAll;
|
||||
const void *positiveZeroes;
|
||||
const void *positiveInfinity;
|
||||
const void *positiveOnes;
|
||||
const void *negativeOnes;
|
||||
const void *qNAN;
|
||||
const void *maxIntBelowAsFloat;
|
||||
const float *mulTableVi2f;
|
||||
const double *mulTableVf2i;
|
||||
const double *minIntAsDouble;
|
||||
const double *maxIntAsDouble;
|
||||
const float *mulTableVf2i;
|
||||
const Float4Constant *vec4InitValues;
|
||||
};
|
||||
Constants constants;
|
||||
|
@ -159,6 +162,8 @@ private:
|
|||
int jitStartOffset_ = 0;
|
||||
int compilingBlockNum_ = -1;
|
||||
int logBlocks_ = 0;
|
||||
// Only useful in breakpoints, where it's set immediately prior.
|
||||
uint32_t lastConstPC_ = 0;
|
||||
};
|
||||
|
||||
class X64IRJit : public IRNativeJit {
|
||||
|
|
|
@ -147,6 +147,67 @@ void X64IRRegCache::FlushBeforeCall() {
|
|||
#endif
|
||||
}
|
||||
|
||||
void X64IRRegCache::FlushAll(bool gprs, bool fprs) {
|
||||
// Note: make sure not to change the registers when flushing:
|
||||
// Branching code may expect the x64reg to retain its value.
|
||||
|
||||
auto needsFlush = [&](IRReg i) {
|
||||
if (mr[i].loc != MIPSLoc::MEM || mr[i].isStatic)
|
||||
return false;
|
||||
if (mr[i].nReg == -1 || !nr[mr[i].nReg].isDirty)
|
||||
return false;
|
||||
return true;
|
||||
};
|
||||
|
||||
auto isSingleFloat = [&](IRReg i) {
|
||||
if (mr[i].lane != -1 || mr[i].loc != MIPSLoc::FREG)
|
||||
return false;
|
||||
return true;
|
||||
};
|
||||
|
||||
// Sometimes, float/vector regs may be in separate regs in a sequence.
|
||||
// It's worth combining and flushing together.
|
||||
for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) {
|
||||
if (!needsFlush(i) || !needsFlush(i + 1))
|
||||
continue;
|
||||
// GPRs are probably not worth it. Merging Vec2s might be, but pretty uncommon.
|
||||
if (!isSingleFloat(i) || !isSingleFloat(i + 1))
|
||||
continue;
|
||||
|
||||
X64Reg regs[4]{ INVALID_REG, INVALID_REG, INVALID_REG, INVALID_REG };
|
||||
regs[0] = FromNativeReg(mr[i + 0].nReg);
|
||||
regs[1] = FromNativeReg(mr[i + 1].nReg);
|
||||
|
||||
bool flushVec4 = i + 3 < TOTAL_MAPPABLE_IRREGS && needsFlush(i + 2) && needsFlush(i + 3);
|
||||
if (flushVec4 && isSingleFloat(i + 2) && isSingleFloat(i + 3) && (i & 3) == 0) {
|
||||
regs[2] = FromNativeReg(mr[i + 2].nReg);
|
||||
regs[3] = FromNativeReg(mr[i + 3].nReg);
|
||||
|
||||
// Note that this doesn't change the low lane of any of these regs.
|
||||
emit_->UNPCKLPS(regs[1], ::R(regs[3]));
|
||||
emit_->UNPCKLPS(regs[0], ::R(regs[2]));
|
||||
emit_->UNPCKLPS(regs[0], ::R(regs[1]));
|
||||
emit_->MOVAPS(MDisp(CTXREG, -128 + GetMipsRegOffset(i)), regs[0]);
|
||||
|
||||
for (int j = 0; j < 4; ++j)
|
||||
DiscardReg(i + j);
|
||||
i += 3;
|
||||
continue;
|
||||
}
|
||||
|
||||
// TODO: Maybe this isn't always worth doing.
|
||||
emit_->UNPCKLPS(regs[0], ::R(regs[1]));
|
||||
emit_->MOVLPS(MDisp(CTXREG, -128 + GetMipsRegOffset(i)), regs[0]);
|
||||
|
||||
DiscardReg(i);
|
||||
DiscardReg(i + 1);
|
||||
++i;
|
||||
continue;
|
||||
}
|
||||
|
||||
IRNativeRegCacheBase::FlushAll(gprs, fprs);
|
||||
}
|
||||
|
||||
X64Reg X64IRRegCache::TryMapTempImm(IRReg r, X64Map flags) {
|
||||
_dbg_assert_(IsValidGPR(r));
|
||||
|
||||
|
@ -353,6 +414,8 @@ void X64IRRegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
|
|||
emit_->MOVSS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
|
||||
else if (lanes == 2)
|
||||
emit_->MOVLPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
|
||||
else if (lanes == 4 && (first & 3) == 0)
|
||||
emit_->MOVAPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
|
||||
else if (lanes == 4)
|
||||
emit_->MOVUPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
|
||||
else
|
||||
|
@ -381,6 +444,8 @@ void X64IRRegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
|
|||
emit_->MOVSS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
|
||||
else if (lanes == 2)
|
||||
emit_->MOVLPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
|
||||
else if (lanes == 4 && (first & 3) == 0)
|
||||
emit_->MOVAPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
|
||||
else if (lanes == 4)
|
||||
emit_->MOVUPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
|
||||
else
|
||||
|
@ -388,6 +453,275 @@ void X64IRRegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
|
|||
}
|
||||
}
|
||||
|
||||
bool X64IRRegCache::TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) {
|
||||
bool allowed = !mr[nr[nreg].mipsReg].isStatic;
|
||||
// There's currently no support for non-XMMs here.
|
||||
allowed = allowed && type == MIPSLoc::FREG;
|
||||
|
||||
if (dest == -1)
|
||||
dest = nreg;
|
||||
|
||||
if (allowed && (flags == MIPSMap::INIT || flags == MIPSMap::DIRTY)) {
|
||||
// Alright, changing lane count (possibly including lane position.)
|
||||
IRReg oldfirst = nr[nreg].mipsReg;
|
||||
int oldlanes = 0;
|
||||
while (mr[oldfirst + oldlanes].nReg == nreg)
|
||||
oldlanes++;
|
||||
_assert_msg_(oldlanes != 0, "TransferNativeReg encountered nreg mismatch");
|
||||
_assert_msg_(oldlanes != lanes, "TransferNativeReg transfer to same lanecount, misaligned?");
|
||||
|
||||
if (lanes == 1 && TransferVecTo1(nreg, dest, first, oldlanes))
|
||||
return true;
|
||||
if (oldlanes == 1 && Transfer1ToVec(nreg, dest, first, lanes))
|
||||
return true;
|
||||
}
|
||||
|
||||
return IRNativeRegCacheBase::TransferNativeReg(nreg, dest, type, first, lanes, flags);
|
||||
}
|
||||
|
||||
bool X64IRRegCache::TransferVecTo1(IRNativeReg nreg, IRNativeReg dest, IRReg first, int oldlanes) {
|
||||
IRReg oldfirst = nr[nreg].mipsReg;
|
||||
|
||||
// Is it worth preserving any of the old regs?
|
||||
int numKept = 0;
|
||||
for (int i = 0; i < oldlanes; ++i) {
|
||||
// Skip whichever one this is extracting.
|
||||
if (oldfirst + i == first)
|
||||
continue;
|
||||
// If 0 isn't being transfered, easy to keep in its original reg.
|
||||
if (i == 0 && dest != nreg) {
|
||||
numKept++;
|
||||
continue;
|
||||
}
|
||||
|
||||
IRNativeReg freeReg = FindFreeReg(MIPSLoc::FREG, MIPSMap::INIT);
|
||||
if (freeReg != -1 && IsRegRead(MIPSLoc::FREG, oldfirst + i)) {
|
||||
// If there's one free, use it. Don't modify nreg, though.
|
||||
u8 shuf = VFPU_SWIZZLE(i, i, i, i);
|
||||
if (i == 0) {
|
||||
emit_->MOVAPS(FromNativeReg(freeReg), ::R(FromNativeReg(nreg)));
|
||||
} else if (cpu_info.bAVX) {
|
||||
emit_->VPERMILPS(128, FromNativeReg(freeReg), ::R(FromNativeReg(nreg)), shuf);
|
||||
} else if (i == 2) {
|
||||
emit_->MOVHLPS(FromNativeReg(freeReg), FromNativeReg(nreg));
|
||||
} else {
|
||||
emit_->MOVAPS(FromNativeReg(freeReg), ::R(FromNativeReg(nreg)));
|
||||
emit_->SHUFPS(FromNativeReg(freeReg), ::R(FromNativeReg(freeReg)), shuf);
|
||||
}
|
||||
|
||||
// Update accounting.
|
||||
nr[freeReg].isDirty = nr[nreg].isDirty;
|
||||
nr[freeReg].mipsReg = oldfirst + i;
|
||||
mr[oldfirst + i].lane = -1;
|
||||
mr[oldfirst + i].nReg = freeReg;
|
||||
numKept++;
|
||||
}
|
||||
}
|
||||
|
||||
// Unless all other lanes were kept, store.
|
||||
if (nr[nreg].isDirty && numKept < oldlanes - 1) {
|
||||
StoreNativeReg(nreg, oldfirst, oldlanes);
|
||||
// Set false even for regs that were split out, since they were flushed too.
|
||||
for (int i = 0; i < oldlanes; ++i) {
|
||||
if (mr[oldfirst + i].nReg != -1)
|
||||
nr[mr[oldfirst + i].nReg].isDirty = false;
|
||||
}
|
||||
}
|
||||
|
||||
// Next, shuffle the desired element into first place.
|
||||
u8 shuf = VFPU_SWIZZLE(mr[first].lane, mr[first].lane, mr[first].lane, mr[first].lane);
|
||||
if (mr[first].lane > 0 && cpu_info.bAVX && dest != nreg) {
|
||||
emit_->VPERMILPS(128, FromNativeReg(dest), ::R(FromNativeReg(nreg)), shuf);
|
||||
} else if (mr[first].lane <= 0 && dest != nreg) {
|
||||
emit_->MOVAPS(FromNativeReg(dest), ::R(FromNativeReg(nreg)));
|
||||
} else if (mr[first].lane == 2) {
|
||||
emit_->MOVHLPS(FromNativeReg(dest), FromNativeReg(nreg));
|
||||
} else if (mr[first].lane > 0) {
|
||||
if (dest != nreg)
|
||||
emit_->MOVAPS(FromNativeReg(dest), ::R(FromNativeReg(nreg)));
|
||||
emit_->SHUFPS(FromNativeReg(dest), ::R(FromNativeReg(dest)), shuf);
|
||||
}
|
||||
|
||||
// Now update accounting.
|
||||
for (int i = 0; i < oldlanes; ++i) {
|
||||
auto &mreg = mr[oldfirst + i];
|
||||
if (oldfirst + i == first) {
|
||||
mreg.lane = -1;
|
||||
mreg.nReg = dest;
|
||||
} else if (mreg.nReg == nreg && i == 0 && nreg != dest) {
|
||||
// Still in the same register, but no longer a vec.
|
||||
mreg.lane = -1;
|
||||
} else if (mreg.nReg == nreg) {
|
||||
// No longer in a register.
|
||||
mreg.nReg = -1;
|
||||
mreg.lane = -1;
|
||||
mreg.loc = MIPSLoc::MEM;
|
||||
}
|
||||
}
|
||||
|
||||
if (dest != nreg) {
|
||||
nr[dest].isDirty = nr[nreg].isDirty;
|
||||
if (oldfirst == first) {
|
||||
nr[nreg].mipsReg = -1;
|
||||
nr[nreg].isDirty = false;
|
||||
}
|
||||
}
|
||||
nr[dest].mipsReg = first;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool X64IRRegCache::Transfer1ToVec(IRNativeReg nreg, IRNativeReg dest, IRReg first, int lanes) {
|
||||
X64Reg cur[4]{};
|
||||
int numInRegs = 0;
|
||||
u8 blendMask = 0;
|
||||
for (int i = 0; i < lanes; ++i) {
|
||||
if (mr[first + i].lane != -1 || (i != 0 && mr[first + i].spillLockIRIndex >= irIndex_)) {
|
||||
// Can't do it, either double mapped or overlapping vec.
|
||||
return false;
|
||||
}
|
||||
|
||||
if (mr[first + i].nReg == -1) {
|
||||
cur[i] = INVALID_REG;
|
||||
blendMask |= 1 << i;
|
||||
} else {
|
||||
cur[i] = FromNativeReg(mr[first + i].nReg);
|
||||
numInRegs++;
|
||||
}
|
||||
}
|
||||
|
||||
// Shouldn't happen, this should only get called to transfer one in a reg.
|
||||
if (numInRegs == 0)
|
||||
return false;
|
||||
|
||||
// Move things together into a reg.
|
||||
if (lanes == 4 && cpu_info.bSSE4_1 && numInRegs == 1 && (first & 3) == 0) {
|
||||
// Use a blend to grab the rest. BLENDPS is pretty good.
|
||||
if (cpu_info.bAVX && nreg != dest) {
|
||||
if (cur[0] == INVALID_REG) {
|
||||
// Broadcast to all lanes, then blend from memory to replace.
|
||||
emit_->VPERMILPS(128, FromNativeReg(dest), ::R(FromNativeReg(nreg)), 0);
|
||||
emit_->BLENDPS(FromNativeReg(dest), MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
|
||||
} else {
|
||||
emit_->VBLENDPS(128, FromNativeReg(dest), FromNativeReg(nreg), MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
|
||||
}
|
||||
cur[0] = FromNativeReg(dest);
|
||||
} else {
|
||||
if (cur[0] == INVALID_REG)
|
||||
emit_->SHUFPS(FromNativeReg(nreg), ::R(FromNativeReg(nreg)), 0);
|
||||
emit_->BLENDPS(FromNativeReg(nreg), MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
|
||||
// If this is not dest, it'll get moved there later.
|
||||
cur[0] = FromNativeReg(nreg);
|
||||
}
|
||||
} else if (lanes == 4) {
|
||||
if (blendMask == 0) {
|
||||
// y = yw##, x = xz##, x = xyzw.
|
||||
emit_->UNPCKLPS(cur[1], ::R(cur[3]));
|
||||
emit_->UNPCKLPS(cur[0], ::R(cur[2]));
|
||||
emit_->UNPCKLPS(cur[0], ::R(cur[1]));
|
||||
} else if (blendMask == 0b1100) {
|
||||
// x = xy##, then load zw.
|
||||
emit_->UNPCKLPS(cur[0], ::R(cur[1]));
|
||||
emit_->MOVHPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 2)));
|
||||
} else if (blendMask == 0b1010 && cpu_info.bSSE4_1 && (first & 3) == 0) {
|
||||
// x = x#z#, x = xyzw.
|
||||
emit_->SHUFPS(cur[0], ::R(cur[2]), VFPU_SWIZZLE(0, 0, 0, 0));
|
||||
emit_->BLENDPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
|
||||
} else if (blendMask == 0b0110 && cpu_info.bSSE4_1 && (first & 3) == 0) {
|
||||
// x = x##w, x = xyzw.
|
||||
emit_->SHUFPS(cur[0], ::R(cur[3]), VFPU_SWIZZLE(0, 0, 0, 0));
|
||||
emit_->BLENDPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
|
||||
} else if (blendMask == 0b1001 && cpu_info.bSSE4_1 && (first & 3) == 0) {
|
||||
// y = #yz#, y = xyzw.
|
||||
emit_->SHUFPS(cur[1], ::R(cur[2]), VFPU_SWIZZLE(0, 0, 0, 0));
|
||||
emit_->BLENDPS(cur[1], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
|
||||
// Will be moved to dest as needed.
|
||||
cur[0] = cur[1];
|
||||
} else if (blendMask == 0b0101 && cpu_info.bSSE4_1 && (first & 3) == 0) {
|
||||
// y = #y#w, y = xyzw.
|
||||
emit_->SHUFPS(cur[1], ::R(cur[3]), VFPU_SWIZZLE(0, 0, 0, 0));
|
||||
emit_->BLENDPS(cur[1], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
|
||||
// Will be moved to dest as needed.
|
||||
cur[0] = cur[1];
|
||||
} else if (blendMask == 0b1000) {
|
||||
// x = xz##, z = w###, y = yw##, x = xyzw.
|
||||
emit_->UNPCKLPS(cur[0], ::R(cur[2]));
|
||||
emit_->MOVSS(cur[2], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 3)));
|
||||
emit_->UNPCKLPS(cur[1], ::R(cur[2]));
|
||||
emit_->UNPCKLPS(cur[0], ::R(cur[1]));
|
||||
} else if (blendMask == 0b0100) {
|
||||
// y = yw##, w = z###, x = xz##, x = xyzw.
|
||||
emit_->UNPCKLPS(cur[1], ::R(cur[3]));
|
||||
emit_->MOVSS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 2)));
|
||||
emit_->UNPCKLPS(cur[0], ::R(cur[3]));
|
||||
emit_->UNPCKLPS(cur[0], ::R(cur[1]));
|
||||
} else if (blendMask == 0b0010) {
|
||||
// z = zw##, w = y###, x = xy##, x = xyzw.
|
||||
emit_->UNPCKLPS(cur[2], ::R(cur[3]));
|
||||
emit_->MOVSS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 1)));
|
||||
emit_->UNPCKLPS(cur[0], ::R(cur[3]));
|
||||
emit_->MOVLHPS(cur[0], cur[2]);
|
||||
} else if (blendMask == 0b0001) {
|
||||
// y = yw##, w = x###, w = xz##, w = xyzw.
|
||||
emit_->UNPCKLPS(cur[1], ::R(cur[3]));
|
||||
emit_->MOVSS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 0)));
|
||||
emit_->UNPCKLPS(cur[3], ::R(cur[2]));
|
||||
emit_->UNPCKLPS(cur[3], ::R(cur[1]));
|
||||
// Will be moved to dest as needed.
|
||||
cur[0] = cur[3];
|
||||
} else if (blendMask == 0b0011) {
|
||||
// z = zw##, w = xy##, w = xyzw.
|
||||
emit_->UNPCKLPS(cur[2], ::R(cur[3]));
|
||||
emit_->MOVLPS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 0)));
|
||||
emit_->MOVLHPS(cur[3], cur[2]);
|
||||
// Will be moved to dest as needed.
|
||||
cur[0] = cur[3];
|
||||
} else {
|
||||
// This must mean no SSE4, and numInRegs <= 2 in trickier cases.
|
||||
return false;
|
||||
}
|
||||
} else if (lanes == 2) {
|
||||
if (cur[0] != INVALID_REG && cur[1] != INVALID_REG) {
|
||||
emit_->UNPCKLPS(cur[0], ::R(cur[1]));
|
||||
} else if (cur[0] != INVALID_REG && cpu_info.bSSE4_1) {
|
||||
emit_->INSERTPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 1)), 1);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
|
||||
mr[first].lane = 0;
|
||||
for (int i = 0; i < lanes; ++i) {
|
||||
if (mr[first + i].nReg != -1) {
|
||||
// If this was dirty, the combined reg is now dirty.
|
||||
if (nr[mr[first + i].nReg].isDirty)
|
||||
nr[dest].isDirty = true;
|
||||
|
||||
// Throw away the other register we're no longer using.
|
||||
if (i != 0)
|
||||
DiscardNativeReg(mr[first + i].nReg);
|
||||
}
|
||||
|
||||
// And set it as using the new one.
|
||||
mr[first + i].lane = i;
|
||||
mr[first + i].loc = MIPSLoc::FREG;
|
||||
mr[first + i].nReg = dest;
|
||||
}
|
||||
|
||||
if (cur[0] != FromNativeReg(dest))
|
||||
emit_->MOVAPS(FromNativeReg(dest), ::R(cur[0]));
|
||||
|
||||
if (dest != nreg) {
|
||||
nr[dest].mipsReg = first;
|
||||
nr[nreg].mipsReg = -1;
|
||||
nr[nreg].isDirty = false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void X64IRRegCache::SetNativeRegValue(IRNativeReg nreg, uint32_t imm) {
|
||||
X64Reg r = FromNativeReg(nreg);
|
||||
_dbg_assert_(nreg >= 0 && nreg < NUM_X_REGS);
|
||||
|
|
|
@ -92,6 +92,8 @@ public:
|
|||
|
||||
void MapWithFlags(IRInst inst, X64IRJitConstants::X64Map destFlags, X64IRJitConstants::X64Map src1Flags = X64IRJitConstants::X64Map::NONE, X64IRJitConstants::X64Map src2Flags = X64IRJitConstants::X64Map::NONE);
|
||||
|
||||
// Note: may change the high lanes of single-register XMMs.
|
||||
void FlushAll(bool gprs = true, bool fprs = true) override;
|
||||
void FlushBeforeCall();
|
||||
|
||||
Gen::X64Reg GetAndLockTempGPR();
|
||||
|
@ -115,8 +117,12 @@ protected:
|
|||
void StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) override;
|
||||
void SetNativeRegValue(IRNativeReg nreg, uint32_t imm) override;
|
||||
void StoreRegValue(IRReg mreg, uint32_t imm) override;
|
||||
bool TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) override;
|
||||
|
||||
private:
|
||||
bool TransferVecTo1(IRNativeReg nreg, IRNativeReg dest, IRReg first, int oldlanes);
|
||||
bool Transfer1ToVec(IRNativeReg nreg, IRNativeReg dest, IRReg first, int lanes);
|
||||
|
||||
IRNativeReg GPRToNativeReg(Gen::X64Reg r) {
|
||||
return (IRNativeReg)r;
|
||||
}
|
||||
|
|
|
@ -69,15 +69,14 @@ inline void Memcpy(const u32 to_address, const u32 from_address, const u32 len,
|
|||
memcpy(to, from, len);
|
||||
|
||||
if (MemBlockInfoDetailed(len)) {
|
||||
char tagData[128];
|
||||
if (!tag) {
|
||||
tagLen = FormatMemWriteTagAt(tagData, sizeof(tagData), "Memcpy/", from_address, len);
|
||||
tag = tagData;
|
||||
}
|
||||
NotifyMemInfoCopy(to_address, from_address, len, "Memcpy/");
|
||||
} else {
|
||||
NotifyMemInfo(MemBlockFlags::READ, from_address, len, tag, tagLen);
|
||||
NotifyMemInfo(MemBlockFlags::WRITE, to_address, len, tag, tagLen);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<size_t tagLen>
|
||||
inline void Memcpy(const u32 to_address, const void *from_data, const u32 len, const char(&tag)[tagLen]) {
|
||||
|
|
|
@ -91,7 +91,7 @@ MetaFileSystem pspFileSystem;
|
|||
ParamSFOData g_paramSFO;
|
||||
static GlobalUIState globalUIState;
|
||||
CoreParameter g_CoreParameter;
|
||||
static FileLoader *loadedFile;
|
||||
static FileLoader *g_loadedFile;
|
||||
// For background loading thread.
|
||||
static std::mutex loadingLock;
|
||||
// For loadingReason updates.
|
||||
|
@ -324,6 +324,7 @@ bool CPU_Init(std::string *errorString, FileLoader *loadedFile) {
|
|||
|
||||
// If they shut down early, we'll catch it when load completes.
|
||||
// Note: this may return before init is complete, which is checked if CPU_IsReady().
|
||||
g_loadedFile = loadedFile;
|
||||
if (!LoadFile(&loadedFile, &g_CoreParameter.errorString)) {
|
||||
CPU_Shutdown();
|
||||
g_CoreParameter.fileToStart.clear();
|
||||
|
@ -368,8 +369,8 @@ void CPU_Shutdown() {
|
|||
Memory::Shutdown();
|
||||
HLEPlugins::Shutdown();
|
||||
|
||||
delete loadedFile;
|
||||
loadedFile = nullptr;
|
||||
delete g_loadedFile;
|
||||
g_loadedFile = nullptr;
|
||||
|
||||
delete g_CoreParameter.mountIsoLoader;
|
||||
delete g_symbolMap;
|
||||
|
@ -380,8 +381,8 @@ void CPU_Shutdown() {
|
|||
|
||||
// TODO: Maybe loadedFile doesn't even belong here...
|
||||
void UpdateLoadedFile(FileLoader *fileLoader) {
|
||||
delete loadedFile;
|
||||
loadedFile = fileLoader;
|
||||
delete g_loadedFile;
|
||||
g_loadedFile = fileLoader;
|
||||
}
|
||||
|
||||
void Core_UpdateState(CoreState newState) {
|
||||
|
|
|
@ -19,6 +19,12 @@ static u32 tiltButtonsDown = 0;
|
|||
float rawTiltAnalogX;
|
||||
float rawTiltAnalogY;
|
||||
|
||||
float g_currentYAngle = 0.0f;
|
||||
|
||||
float GetCurrentYAngle() {
|
||||
return g_currentYAngle;
|
||||
}
|
||||
|
||||
// These functions generate tilt events given the current Tilt amount,
|
||||
// and the deadzone radius.
|
||||
void GenerateAnalogStickEvent(float analogX, float analogY);
|
||||
|
@ -73,6 +79,7 @@ void ProcessTilt(bool landscape, float calibrationAngle, float x, float y, float
|
|||
Lin::Vec3 down = Lin::Vec3(x, y, z).normalized();
|
||||
|
||||
float angleAroundX = atan2(down.z, down.y);
|
||||
g_currentYAngle = angleAroundX; // TODO: Should smooth this out over time a bit.
|
||||
float yAngle = angleAroundX - calibrationAngle;
|
||||
float xAngle = asinf(down.x);
|
||||
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "Common/Math/lin/vec3.h"
|
||||
|
||||
namespace TiltEventProcessor {
|
||||
|
||||
// generates a tilt in the correct coordinate system based on
|
||||
|
@ -7,6 +9,8 @@ namespace TiltEventProcessor {
|
|||
void ProcessTilt(bool landscape, const float calibrationAngle, float x, float y, float z, bool invertX, bool invertY, float xSensitivity, float ySensitivity);
|
||||
void ResetTiltEvents();
|
||||
|
||||
float GetCurrentYAngle();
|
||||
|
||||
// Lets you preview the amount of tilt in TiltAnalogSettingsScreen.
|
||||
extern float rawTiltAnalogX;
|
||||
extern float rawTiltAnalogY;
|
||||
|
|
|
@ -827,7 +827,7 @@ static void PPGeResetCurrentText() {
|
|||
// Draws some text using the one font we have in the atlas.
|
||||
void PPGeDrawCurrentText(u32 color) {
|
||||
// If the atlas is larger than 512x512, need to use windows into it.
|
||||
bool useTextureWindow = g_Config.bSoftwareRendering && atlasWidth > 512 || atlasHeight > 512;
|
||||
bool useTextureWindow = g_Config.bSoftwareRendering && (atlasWidth > 512 || atlasHeight > 512);
|
||||
uint32_t texturePosX = 0;
|
||||
uint32_t texturePosY = 0;
|
||||
|
||||
|
@ -855,7 +855,7 @@ void PPGeDrawCurrentText(u32 color) {
|
|||
|
||||
int wantedPosX = (int)floorf(c.sx * textureMaxPosX);
|
||||
int wantedPosY = (int)floorf(c.sy * textureMaxPosY);
|
||||
if (useTextureWindow && wantedPosX != texturePosX || wantedPosY != texturePosY) {
|
||||
if (useTextureWindow && (wantedPosX != texturePosX || wantedPosY != texturePosY)) {
|
||||
EndVertexDataAndDraw(GE_PRIM_RECTANGLES);
|
||||
|
||||
uint32_t offset = atlasWidth * wantedPosY * 256 + wantedPosX * 256;
|
||||
|
|
|
@ -290,8 +290,15 @@ ReplaceBlendType ReplaceBlendWithShader(GEBufferFormat bufferFormat) {
|
|||
return REPLACE_BLEND_READ_FRAMEBUFFER;
|
||||
}
|
||||
|
||||
default:
|
||||
case GE_BLENDMODE_MUL_AND_ADD:
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT:
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
|
||||
// Handled below.
|
||||
break;
|
||||
|
||||
default:
|
||||
// Other blend equations simply don't blend on hardware.
|
||||
return REPLACE_BLEND_NO;
|
||||
}
|
||||
|
||||
GEBlendSrcFactor funcA = gstate.getBlendFuncA();
|
||||
|
|
|
@ -275,21 +275,6 @@ bool FragmentIdNeedsFramebufferRead(const FShaderID &id) {
|
|||
(ReplaceBlendType)id.Bits(FS_BIT_REPLACE_BLEND, 3) == REPLACE_BLEND_READ_FRAMEBUFFER;
|
||||
}
|
||||
|
||||
static GEBlendMode SanitizeBlendEq(GEBlendMode beq) {
|
||||
switch (beq) {
|
||||
case GE_BLENDMODE_MUL_AND_ADD:
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT:
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
|
||||
case GE_BLENDMODE_MIN:
|
||||
case GE_BLENDMODE_MAX:
|
||||
case GE_BLENDMODE_ABSDIFF:
|
||||
return beq;
|
||||
default:
|
||||
// Just return something that won't cause a shader gen failure.
|
||||
return GE_BLENDMODE_MUL_AND_ADD;
|
||||
}
|
||||
}
|
||||
|
||||
// Here we must take all the bits of the gstate that determine what the fragment shader will
|
||||
// look like, and concatenate them together into an ID.
|
||||
void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pipelineState, const Draw::Bugs &bugs) {
|
||||
|
@ -384,7 +369,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
|
|||
// 3 bits.
|
||||
id.SetBits(FS_BIT_REPLACE_BLEND, 3, replaceBlend);
|
||||
// 11 bits total.
|
||||
id.SetBits(FS_BIT_BLENDEQ, 3, SanitizeBlendEq(gstate.getBlendEq()));
|
||||
id.SetBits(FS_BIT_BLENDEQ, 3, gstate.getBlendEq());
|
||||
id.SetBits(FS_BIT_BLENDFUNC_A, 4, gstate.getBlendFuncA());
|
||||
id.SetBits(FS_BIT_BLENDFUNC_B, 4, gstate.getBlendFuncB());
|
||||
}
|
||||
|
|
|
@ -90,19 +90,22 @@ static void RotateUVThrough(TransformedVertex v[4]) {
|
|||
// Clears on the PSP are best done by drawing a series of vertical strips
|
||||
// in clear mode. This tries to detect that.
|
||||
static bool IsReallyAClear(const TransformedVertex *transformed, int numVerts, float x2, float y2) {
|
||||
if (transformed[0].x != 0.0f || transformed[0].y != 0.0f)
|
||||
if (transformed[0].x < 0.0f || transformed[0].y < 0.0f || transformed[0].x > 0.5f || transformed[0].y > 0.5f)
|
||||
return false;
|
||||
|
||||
const float originY = transformed[0].y;
|
||||
|
||||
// Color and Z are decided by the second vertex, so only need to check those for matching color.
|
||||
u32 matchcolor = transformed[1].color0_32;
|
||||
float matchz = transformed[1].z;
|
||||
const u32 matchcolor = transformed[1].color0_32;
|
||||
const float matchz = transformed[1].z;
|
||||
|
||||
for (int i = 1; i < numVerts; i++) {
|
||||
if ((i & 1) == 0) {
|
||||
// Top left of a rectangle
|
||||
if (transformed[i].y != 0.0f)
|
||||
if (transformed[i].y != originY)
|
||||
return false;
|
||||
if (i > 0 && transformed[i].x != transformed[i - 1].x)
|
||||
float gap = fabsf(transformed[i].x - transformed[i - 1].x); // Should probably do some smarter check.
|
||||
if (i > 0 && gap > 0.0625)
|
||||
return false;
|
||||
} else {
|
||||
if (transformed[i].color0_32 != matchcolor || transformed[i].z != matchz)
|
||||
|
@ -547,7 +550,7 @@ void SoftwareTransform::DetectOffsetTexture(int maxIndex) {
|
|||
}
|
||||
|
||||
// NOTE: The viewport must be up to date!
|
||||
void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *inds, int &indsOffset, int indexBufferSize, int &maxIndex, SoftwareTransformResult *result) {
|
||||
void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *&inds, int &maxIndex, SoftwareTransformResult *result) {
|
||||
TransformedVertex *transformed = params_.transformed;
|
||||
TransformedVertex *transformedExpanded = params_.transformedExpanded;
|
||||
bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0;
|
||||
|
@ -560,11 +563,7 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy
|
|||
bool useBufferedRendering = fbman->UseBufferedRendering();
|
||||
|
||||
if (prim == GE_PRIM_RECTANGLES) {
|
||||
if (!ExpandRectangles(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) {
|
||||
result->drawIndexed = false;
|
||||
result->drawNumTrans = 0;
|
||||
return;
|
||||
}
|
||||
ExpandRectangles(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode);
|
||||
result->drawBuffer = transformedExpanded;
|
||||
result->drawIndexed = true;
|
||||
|
||||
|
@ -582,19 +581,11 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy
|
|||
}
|
||||
}
|
||||
} else if (prim == GE_PRIM_POINTS) {
|
||||
if (!ExpandPoints(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) {
|
||||
result->drawIndexed = false;
|
||||
result->drawNumTrans = 0;
|
||||
return;
|
||||
}
|
||||
ExpandPoints(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode);
|
||||
result->drawBuffer = transformedExpanded;
|
||||
result->drawIndexed = true;
|
||||
} else if (prim == GE_PRIM_LINES) {
|
||||
if (!ExpandLines(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) {
|
||||
result->drawIndexed = false;
|
||||
result->drawNumTrans = 0;
|
||||
return;
|
||||
}
|
||||
ExpandLines(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode);
|
||||
result->drawBuffer = transformedExpanded;
|
||||
result->drawIndexed = true;
|
||||
} else {
|
||||
|
@ -686,21 +677,15 @@ void SoftwareTransform::CalcCullParams(float &minZValue, float &maxZValue) {
|
|||
std::swap(minZValue, maxZValue);
|
||||
}
|
||||
|
||||
bool SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
|
||||
// Before we start, do a sanity check - does the output fit?
|
||||
if ((vertexCount / 2) * 6 > indexBufferSize - indsOffset) {
|
||||
// Won't fit, kill the draw.
|
||||
return false;
|
||||
}
|
||||
|
||||
void SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
|
||||
// Rectangles always need 2 vertices, disregard the last one if there's an odd number.
|
||||
vertexCount = vertexCount & ~1;
|
||||
numTrans = 0;
|
||||
TransformedVertex *trans = &transformedExpanded[0];
|
||||
|
||||
const u16 *indsIn = (const u16 *)(inds + indsOffset);
|
||||
int newIndsOffset = indsOffset + vertexCount;
|
||||
u16 *indsOut = inds + newIndsOffset;
|
||||
const u16 *indsIn = (const u16 *)inds;
|
||||
u16 *newInds = inds + vertexCount;
|
||||
u16 *indsOut = newInds;
|
||||
|
||||
maxIndex = 4 * (vertexCount / 2);
|
||||
for (int i = 0; i < vertexCount; i += 2) {
|
||||
|
@ -745,33 +730,23 @@ bool SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *in
|
|||
indsOut[3] = i * 2 + 3;
|
||||
indsOut[4] = i * 2 + 0;
|
||||
indsOut[5] = i * 2 + 2;
|
||||
|
||||
trans += 4;
|
||||
indsOut += 6;
|
||||
|
||||
numTrans += 6;
|
||||
}
|
||||
|
||||
indsOffset = newIndsOffset;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
|
||||
// Before we start, do a sanity check - does the output fit?
|
||||
if ((vertexCount / 2) * 6 > indexBufferSize - indsOffset) {
|
||||
// Won't fit, kill the draw.
|
||||
return false;
|
||||
inds = newInds;
|
||||
}
|
||||
|
||||
void SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
|
||||
// Lines always need 2 vertices, disregard the last one if there's an odd number.
|
||||
vertexCount = vertexCount & ~1;
|
||||
numTrans = 0;
|
||||
TransformedVertex *trans = &transformedExpanded[0];
|
||||
|
||||
|
||||
const u16 *indsIn = (const u16 *)(inds + indsOffset);
|
||||
int newIndsOffset = indsOffset + vertexCount;
|
||||
u16 *indsOut = inds + newIndsOffset;
|
||||
const u16 *indsIn = (const u16 *)inds;
|
||||
u16 *newInds = inds + vertexCount;
|
||||
u16 *indsOut = newInds;
|
||||
|
||||
float dx = 1.0f * gstate_c.vpWidthScale * (1.0f / fabsf(gstate.getViewportXScale()));
|
||||
float dy = 1.0f * gstate_c.vpHeightScale * (1.0f / fabsf(gstate.getViewportYScale()));
|
||||
|
@ -884,23 +859,17 @@ bool SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *inds, i
|
|||
}
|
||||
}
|
||||
|
||||
indsOffset = newIndsOffset;
|
||||
return true;
|
||||
inds = newInds;
|
||||
}
|
||||
|
||||
bool SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
|
||||
// Before we start, do a sanity check - does the output fit?
|
||||
if (vertexCount * 6 > indexBufferSize - indsOffset) {
|
||||
// Won't fit, kill the draw.
|
||||
return false;
|
||||
}
|
||||
|
||||
void SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) {
|
||||
numTrans = 0;
|
||||
TransformedVertex *trans = &transformedExpanded[0];
|
||||
|
||||
const u16 *indsIn = (const u16 *)(inds + indsOffset);
|
||||
int newIndsOffset = indsOffset + vertexCount;
|
||||
u16 *indsOut = inds + newIndsOffset;
|
||||
const u16 *indsIn = (const u16 *)inds;
|
||||
u16 *newInds = inds + vertexCount;
|
||||
u16 *indsOut = newInds;
|
||||
|
||||
float dx = 1.0f * gstate_c.vpWidthScale * (1.0f / gstate.getViewportXScale());
|
||||
float dy = 1.0f * gstate_c.vpHeightScale * (1.0f / gstate.getViewportYScale());
|
||||
|
@ -959,7 +928,5 @@ bool SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *inds,
|
|||
|
||||
numTrans += 6;
|
||||
}
|
||||
|
||||
indsOffset = newIndsOffset;
|
||||
return true;
|
||||
inds = newInds;
|
||||
}
|
||||
|
|
|
@ -62,18 +62,19 @@ struct SoftwareTransformParams {
|
|||
|
||||
class SoftwareTransform {
|
||||
public:
|
||||
SoftwareTransform(SoftwareTransformParams ¶ms) : params_(params) {}
|
||||
SoftwareTransform(SoftwareTransformParams ¶ms) : params_(params) {
|
||||
}
|
||||
|
||||
void SetProjMatrix(const float mtx[14], bool invertedX, bool invertedY, const Lin::Vec3 &trans, const Lin::Vec3 &scale);
|
||||
void Decode(int prim, u32 vertexType, const DecVtxFormat &decVtxFormat, int maxIndex, SoftwareTransformResult *result);
|
||||
void DetectOffsetTexture(int maxIndex);
|
||||
void BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *inds, int &indsOffset, int indexBufferSize, int &maxIndex, SoftwareTransformResult *result);
|
||||
void BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *&inds, int &maxIndex, SoftwareTransformResult *result);
|
||||
|
||||
protected:
|
||||
void CalcCullParams(float &minZValue, float &maxZValue);
|
||||
bool ExpandRectangles(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode);
|
||||
bool ExpandLines(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode);
|
||||
bool ExpandPoints(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode);
|
||||
void ExpandRectangles(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode);
|
||||
void ExpandLines(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode);
|
||||
void ExpandPoints(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode);
|
||||
|
||||
const SoftwareTransformParams ¶ms_;
|
||||
Lin::Matrix4x4 projMatrix_;
|
||||
|
|
|
@ -27,10 +27,6 @@
|
|||
#include "GPU/Common/VertexDecoderCommon.h"
|
||||
|
||||
alignas(16) static float bones[16 * 8]; // First four are kept in registers
|
||||
alignas(16) static float boneMask[4] = {1.0f, 1.0f, 1.0f, 0.0f};
|
||||
|
||||
static const float by128 = 1.0f / 128.0f;
|
||||
static const float by32768 = 1.0f / 32768.0f;
|
||||
|
||||
using namespace Arm64Gen;
|
||||
|
||||
|
@ -50,7 +46,7 @@ static const ARM64Reg scratchReg = W6;
|
|||
static const ARM64Reg scratchReg64 = X6;
|
||||
static const ARM64Reg scratchReg2 = W7;
|
||||
static const ARM64Reg scratchReg3 = W8;
|
||||
static const ARM64Reg fullAlphaReg = W12;
|
||||
static const ARM64Reg alphaNonFullReg = W12;
|
||||
static const ARM64Reg boundsMinUReg = W13;
|
||||
static const ARM64Reg boundsMinVReg = W14;
|
||||
static const ARM64Reg boundsMaxUReg = W15;
|
||||
|
@ -63,6 +59,8 @@ static const ARM64Reg fpScratchReg4 = S7;
|
|||
|
||||
static const ARM64Reg neonScratchRegD = D2;
|
||||
static const ARM64Reg neonScratchRegQ = Q2;
|
||||
static const ARM64Reg neonScratchReg2D = D3;
|
||||
static const ARM64Reg neonScratchReg2Q = Q3;
|
||||
|
||||
static const ARM64Reg neonUVScaleReg = D0;
|
||||
static const ARM64Reg neonUVOffsetReg = D1;
|
||||
|
@ -150,6 +148,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
|||
|
||||
bool prescaleStep = false;
|
||||
bool skinning = false;
|
||||
bool updateTexBounds = false;
|
||||
|
||||
bool log = false;
|
||||
|
||||
|
@ -165,6 +164,9 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
|||
dec.steps_[i] == &VertexDecoder::Step_WeightsFloatSkin) {
|
||||
skinning = true;
|
||||
}
|
||||
if (dec.steps_[i] == &VertexDecoder::Step_TcU16ThroughToFloat) {
|
||||
updateTexBounds = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Not used below, but useful for logging.
|
||||
|
@ -172,24 +174,22 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
|||
|
||||
// if (skinning) log = true;
|
||||
|
||||
bool updateFullAlpha = dec.col;
|
||||
if (updateFullAlpha && (dec.VertexType() & GE_VTYPE_COL_MASK) == GE_VTYPE_COL_565)
|
||||
updateFullAlpha = false;
|
||||
|
||||
// GPRs 0-15 do not need to be saved.
|
||||
// We don't use any higher GPRs than 16. So:
|
||||
uint64_t regs_to_save = 1 << 16; // Arm64Gen::ALL_CALLEE_SAVED;
|
||||
uint64_t regs_to_save = updateTexBounds ? 1 << 16 : 0;
|
||||
// We only need to save Q8-Q15 if skinning is used.
|
||||
uint64_t regs_to_save_fp = dec.skinInDecode ? Arm64Gen::ALL_CALLEE_SAVED_FP : 0;
|
||||
// Only bother making stack space and setting up FP if there are saved regs.
|
||||
if (regs_to_save || regs_to_save_fp)
|
||||
fp.ABI_PushRegisters(regs_to_save, regs_to_save_fp);
|
||||
|
||||
// Keep the scale/offset in a few fp registers if we need it.
|
||||
if (prescaleStep) {
|
||||
fp.LDR(64, INDEX_UNSIGNED, neonUVScaleReg, X3, 0);
|
||||
fp.LDR(64, INDEX_UNSIGNED, neonUVOffsetReg, X3, 8);
|
||||
if ((dec.VertexType() & GE_VTYPE_TC_MASK) == GE_VTYPE_TC_8BIT) {
|
||||
fp.MOVI2FDUP(neonScratchRegD, by128, scratchReg);
|
||||
fp.FMUL(32, neonUVScaleReg, neonUVScaleReg, neonScratchRegD);
|
||||
} else if ((dec.VertexType() & GE_VTYPE_TC_MASK) == GE_VTYPE_TC_16BIT) {
|
||||
fp.MOVI2FDUP(neonScratchRegD, by32768, scratchReg);
|
||||
fp.FMUL(32, neonUVScaleReg, neonUVScaleReg, neonScratchRegD);
|
||||
}
|
||||
fp.LDP(64, INDEX_SIGNED, neonUVScaleReg, neonUVOffsetReg, X3, 0);
|
||||
}
|
||||
|
||||
// Add code to convert matrices to 4x4.
|
||||
|
@ -197,43 +197,48 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
|||
if (dec.skinInDecode) {
|
||||
// Copying from R3 to R4
|
||||
MOVP2R(X3, gstate.boneMatrix);
|
||||
MOVP2R(X4, bones);
|
||||
MOVP2R(X5, boneMask);
|
||||
fp.LDR(128, INDEX_UNSIGNED, Q3, X5, 0);
|
||||
// This is only used with more than 4 weights, and points to the first of them.
|
||||
if (dec.nweights > 4)
|
||||
MOVP2R(X4, &bones[16 * 4]);
|
||||
|
||||
// Construct a mask to zero out the top lane with.
|
||||
fp.MVNI(32, Q3, 0);
|
||||
fp.MOVI(32, Q4, 0);
|
||||
fp.EXT(Q3, Q3, Q4, 4);
|
||||
|
||||
for (int i = 0; i < dec.nweights; i++) {
|
||||
// Note that INDEX_UNSIGNED does not support offsets not aligned to the data size so we must use POST.
|
||||
fp.LDR(128, INDEX_POST, Q4, X3, 12); // Load 128 bits even though we just want 96
|
||||
fp.LDR(128, INDEX_POST, Q5, X3, 12);
|
||||
fp.LDR(128, INDEX_POST, Q6, X3, 12);
|
||||
fp.LDR(128, INDEX_POST, Q7, X3, 12);
|
||||
// This loads Q4,Q5,Q6 with 12 floats and increases X3, all in one go.
|
||||
fp.LD1(32, 3, INDEX_POST, Q4, X3);
|
||||
// Now sort those floats into 4 regs: ABCD EFGH IJKL -> ABC0 DEF0 GHI0 JKL0.
|
||||
// Go backwards to avoid overwriting.
|
||||
fp.EXT(Q7, Q6, Q6, 4); // I[JKLI]JKL
|
||||
fp.EXT(Q6, Q5, Q6, 8); // EF[GHIJ]KL
|
||||
fp.EXT(Q5, Q4, Q5, 12); // ABC[DEFG]H
|
||||
|
||||
ARM64Reg matrixRow[4]{ Q4, Q5, Q6, Q7 };
|
||||
// First four matrices are in registers Q16+.
|
||||
if (i < 4) {
|
||||
fp.FMUL(32, (ARM64Reg)(Q16 + i * 4), Q4, Q3);
|
||||
fp.FMUL(32, (ARM64Reg)(Q17 + i * 4), Q5, Q3);
|
||||
fp.FMUL(32, (ARM64Reg)(Q18 + i * 4), Q6, Q3);
|
||||
fp.FMUL(32, (ARM64Reg)(Q19 + i * 4), Q7, Q3);
|
||||
ADDI2R(X4, X4, 16 * 4);
|
||||
} else {
|
||||
fp.FMUL(32, Q4, Q4, Q3);
|
||||
fp.FMUL(32, Q5, Q5, Q3);
|
||||
fp.FMUL(32, Q6, Q6, Q3);
|
||||
fp.FMUL(32, Q7, Q7, Q3);
|
||||
fp.STR(128, INDEX_UNSIGNED, Q4, X4, 0);
|
||||
fp.STR(128, INDEX_UNSIGNED, Q5, X4, 16);
|
||||
fp.STR(128, INDEX_UNSIGNED, Q6, X4, 32);
|
||||
fp.STR(128, INDEX_UNSIGNED, Q7, X4, 48);
|
||||
ADDI2R(X4, X4, 16 * 4);
|
||||
for (int w = 0; w < 4; ++w)
|
||||
matrixRow[w] = (ARM64Reg)(Q16 + i * 4 + w);
|
||||
}
|
||||
// Zero out the top lane of each one with the mask created above.
|
||||
fp.AND(matrixRow[0], Q4, Q3);
|
||||
fp.AND(matrixRow[1], Q5, Q3);
|
||||
fp.AND(matrixRow[2], Q6, Q3);
|
||||
fp.AND(matrixRow[3], Q7, Q3);
|
||||
|
||||
if (i >= 4)
|
||||
fp.ST1(32, 4, INDEX_POST, matrixRow[0], X4);
|
||||
}
|
||||
}
|
||||
|
||||
if (dec.col) {
|
||||
// Or LDB and skip the conditional? This is probably cheaper.
|
||||
MOVI2R(fullAlphaReg, 0xFF);
|
||||
if (updateFullAlpha) {
|
||||
// This ends up non-zero if alpha is not full.
|
||||
// Often we just ORN into it.
|
||||
MOVI2R(alphaNonFullReg, 0);
|
||||
}
|
||||
|
||||
if (dec.tc && dec.throughmode) {
|
||||
// TODO: Smarter, only when doing bounds.
|
||||
if (updateTexBounds) {
|
||||
MOVP2R(scratchReg64, &gstate_c.vertBounds.minU);
|
||||
LDRH(INDEX_UNSIGNED, boundsMinUReg, scratchReg64, offsetof(KnownVertexBounds, minU));
|
||||
LDRH(INDEX_UNSIGNED, boundsMaxUReg, scratchReg64, offsetof(KnownVertexBounds, maxU));
|
||||
|
@ -259,16 +264,14 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
|||
SUBS(counterReg, counterReg, 1);
|
||||
B(CC_NEQ, loopStart);
|
||||
|
||||
if (dec.col) {
|
||||
if (updateFullAlpha) {
|
||||
FixupBranch skip = CBZ(alphaNonFullReg);
|
||||
MOVP2R(tempRegPtr, &gstate_c.vertexFullAlpha);
|
||||
CMP(fullAlphaReg, 0);
|
||||
FixupBranch skip = B(CC_NEQ);
|
||||
STRB(INDEX_UNSIGNED, fullAlphaReg, tempRegPtr, 0);
|
||||
STRB(INDEX_UNSIGNED, WZR, tempRegPtr, 0);
|
||||
SetJumpTarget(skip);
|
||||
}
|
||||
|
||||
if (dec.tc && dec.throughmode) {
|
||||
// TODO: Smarter, only when doing bounds.
|
||||
if (updateTexBounds) {
|
||||
MOVP2R(scratchReg64, &gstate_c.vertBounds.minU);
|
||||
STRH(INDEX_UNSIGNED, boundsMinUReg, scratchReg64, offsetof(KnownVertexBounds, minU));
|
||||
STRH(INDEX_UNSIGNED, boundsMaxUReg, scratchReg64, offsetof(KnownVertexBounds, maxU));
|
||||
|
@ -276,6 +279,7 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
|||
STRH(INDEX_UNSIGNED, boundsMaxVReg, scratchReg64, offsetof(KnownVertexBounds, maxV));
|
||||
}
|
||||
|
||||
if (regs_to_save || regs_to_save_fp)
|
||||
fp.ABI_PopRegisters(regs_to_save, regs_to_save_fp);
|
||||
|
||||
RET();
|
||||
|
@ -342,13 +346,11 @@ void VertexDecoderJitCache::Jit_ApplyWeights() {
|
|||
break;
|
||||
default:
|
||||
// Matrices 4+ need to be loaded from memory.
|
||||
fp.LDP(128, INDEX_SIGNED, Q8, Q9, scratchReg64, 0);
|
||||
fp.LDP(128, INDEX_SIGNED, Q10, Q11, scratchReg64, 2 * 16);
|
||||
fp.LD1(32, 4, INDEX_POST, Q8, scratchReg64);
|
||||
fp.FMLA(32, Q4, Q8, neonWeightRegsQ[i >> 2], i & 3);
|
||||
fp.FMLA(32, Q5, Q9, neonWeightRegsQ[i >> 2], i & 3);
|
||||
fp.FMLA(32, Q6, Q10, neonWeightRegsQ[i >> 2], i & 3);
|
||||
fp.FMLA(32, Q7, Q11, neonWeightRegsQ[i >> 2], i & 3);
|
||||
ADDI2R(scratchReg64, scratchReg64, 4 * 16);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -482,13 +484,8 @@ void VertexDecoderJitCache::Jit_WeightsFloatSkin() {
|
|||
void VertexDecoderJitCache::Jit_Color8888() {
|
||||
LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->coloff);
|
||||
|
||||
// Set flags to determine if alpha != 0xFF.
|
||||
ORN(tempReg2, WZR, tempReg1, ArithOption(tempReg1, ST_ASR, 24));
|
||||
CMP(tempReg2, 0);
|
||||
|
||||
// Clear fullAlphaReg when the inverse was not 0.
|
||||
// fullAlphaReg = tempReg2 == 0 ? fullAlphaReg : 0 + 1;
|
||||
CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ);
|
||||
// Or any non-set bits into alphaNonFullReg. This way it's non-zero if not full.
|
||||
ORN(alphaNonFullReg, alphaNonFullReg, tempReg1, ArithOption(tempReg1, ST_ASR, 24));
|
||||
|
||||
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.c0off);
|
||||
}
|
||||
|
@ -508,15 +505,10 @@ void VertexDecoderJitCache::Jit_Color4444() {
|
|||
// And expand to 8 bits.
|
||||
ORR(tempReg1, tempReg2, tempReg2, ArithOption(tempReg2, ST_LSL, 4));
|
||||
|
||||
// Or any non-set bits into alphaNonFullReg. This way it's non-zero if not full.
|
||||
ORN(alphaNonFullReg, alphaNonFullReg, tempReg1, ArithOption(tempReg1, ST_ASR, 24));
|
||||
|
||||
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.c0off);
|
||||
|
||||
// Set flags to determine if alpha != 0xFF.
|
||||
ORN(tempReg2, WZR, tempReg1, ArithOption(tempReg1, ST_ASR, 24));
|
||||
CMP(tempReg2, 0);
|
||||
|
||||
// Clear fullAlphaReg when the inverse was not 0.
|
||||
// fullAlphaReg = tempReg2 == 0 ? fullAlphaReg : 0 + 1;
|
||||
CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color565() {
|
||||
|
@ -540,7 +532,7 @@ void VertexDecoderJitCache::Jit_Color565() {
|
|||
ORR(tempReg3, tempReg3, tempReg1, ArithOption(tempReg1, ST_LSR, 4));
|
||||
ORR(tempReg2, tempReg2, tempReg3, ArithOption(tempReg3, ST_LSL, 8));
|
||||
|
||||
// Add in full alpha. No need to update fullAlphaReg.
|
||||
// Add in full alpha. No need to update alphaNonFullReg.
|
||||
ORRI2R(tempReg1, tempReg2, 0xFF000000, scratchReg);
|
||||
|
||||
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.c0off);
|
||||
|
@ -566,15 +558,10 @@ void VertexDecoderJitCache::Jit_Color5551() {
|
|||
ANDI2R(tempReg1, tempReg1, 0xFF000000, scratchReg);
|
||||
ORR(tempReg2, tempReg2, tempReg1);
|
||||
|
||||
// Set flags to determine if alpha != 0xFF.
|
||||
ORN(tempReg3, WZR, tempReg1, ArithOption(tempReg1, ST_ASR, 24));
|
||||
CMP(tempReg3, 0);
|
||||
// Or any non-set bits into alphaNonFullReg. This way it's non-zero if not full.
|
||||
ORN(alphaNonFullReg, alphaNonFullReg, tempReg1, ArithOption(tempReg1, ST_ASR, 24));
|
||||
|
||||
STR(INDEX_UNSIGNED, tempReg2, dstReg, dec_->decFmt.c0off);
|
||||
|
||||
// Clear fullAlphaReg when the inverse was not 0.
|
||||
// fullAlphaReg = tempReg3 == 0 ? fullAlphaReg : 0 + 1;
|
||||
CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() {
|
||||
|
@ -608,12 +595,12 @@ void VertexDecoderJitCache::Jit_TcFloat() {
|
|||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU8Prescale() {
|
||||
fp.LDUR(16, neonScratchRegD, srcReg, dec_->tcoff);
|
||||
fp.UXTL(8, neonScratchRegQ, neonScratchRegD); // Widen to 16-bit
|
||||
fp.UXTL(16, neonScratchRegQ, neonScratchRegD); // Widen to 32-bit
|
||||
fp.UCVTF(32, neonScratchRegD, neonScratchRegD);
|
||||
fp.FMUL(32, neonScratchRegD, neonScratchRegD, neonUVScaleReg); // TODO: FMLA
|
||||
fp.FADD(32, neonScratchRegD, neonScratchRegD, neonUVOffsetReg);
|
||||
fp.LDUR(16, neonScratchReg2D, srcReg, dec_->tcoff);
|
||||
fp.UXTL(8, neonScratchReg2Q, neonScratchReg2D); // Widen to 16-bit
|
||||
fp.UXTL(16, neonScratchReg2Q, neonScratchReg2D); // Widen to 32-bit
|
||||
fp.UCVTF(32, neonScratchReg2D, neonScratchReg2D, 7);
|
||||
fp.MOV(neonScratchRegD, neonUVOffsetReg);
|
||||
fp.FMLA(32, neonScratchRegD, neonScratchReg2D, neonUVScaleReg);
|
||||
fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff);
|
||||
}
|
||||
|
||||
|
@ -626,11 +613,11 @@ void VertexDecoderJitCache::Jit_TcU8ToFloat() {
|
|||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16Prescale() {
|
||||
fp.LDUR(32, neonScratchRegD, srcReg, dec_->tcoff);
|
||||
fp.UXTL(16, neonScratchRegQ, neonScratchRegD); // Widen to 32-bit
|
||||
fp.UCVTF(32, neonScratchRegD, neonScratchRegD);
|
||||
fp.FMUL(32, neonScratchRegD, neonScratchRegD, neonUVScaleReg); // TODO: FMLA
|
||||
fp.FADD(32, neonScratchRegD, neonScratchRegD, neonUVOffsetReg);
|
||||
fp.LDUR(32, neonScratchReg2D, srcReg, dec_->tcoff);
|
||||
fp.UXTL(16, neonScratchReg2Q, neonScratchReg2D); // Widen to 32-bit
|
||||
fp.UCVTF(32, neonScratchReg2D, neonScratchReg2D, 15);
|
||||
fp.MOV(neonScratchRegD, neonUVOffsetReg);
|
||||
fp.FMLA(32, neonScratchRegD, neonScratchReg2D, neonUVScaleReg);
|
||||
fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff);
|
||||
}
|
||||
|
||||
|
@ -642,9 +629,9 @@ void VertexDecoderJitCache::Jit_TcU16ToFloat() {
|
|||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcFloatPrescale() {
|
||||
fp.LDUR(64, neonScratchRegD, srcReg, dec_->tcoff);
|
||||
fp.FMUL(32, neonScratchRegD, neonScratchRegD, neonUVScaleReg); // TODO: FMLA
|
||||
fp.FADD(32, neonScratchRegD, neonScratchRegD, neonUVOffsetReg);
|
||||
fp.LDUR(64, neonScratchReg2D, srcReg, dec_->tcoff);
|
||||
fp.MOV(neonScratchRegD, neonUVOffsetReg);
|
||||
fp.FMLA(32, neonScratchRegD, neonScratchReg2D, neonUVScaleReg);
|
||||
fp.STUR(64, neonScratchRegD, dstReg, dec_->decFmt.uvoff);
|
||||
}
|
||||
|
||||
|
|
|
@ -108,19 +108,10 @@ void DecVtxFormat::InitializeFromID(uint32_t id) {
|
|||
void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound) {
|
||||
// Find index bounds. Could cache this in display lists.
|
||||
// Also, this could be greatly sped up with SSE2/NEON, although rarely a bottleneck.
|
||||
int lowerBound = 0x7FFFFFFF;
|
||||
int upperBound = 0;
|
||||
u32 idx = vertType & GE_VTYPE_IDX_MASK;
|
||||
if (idx == GE_VTYPE_IDX_8BIT) {
|
||||
const u8 *ind8 = (const u8 *)inds;
|
||||
for (int i = 0; i < count; i++) {
|
||||
u8 value = ind8[i];
|
||||
if (value > upperBound)
|
||||
upperBound = value;
|
||||
if (value < lowerBound)
|
||||
lowerBound = value;
|
||||
}
|
||||
} else if (idx == GE_VTYPE_IDX_16BIT) {
|
||||
if (idx == GE_VTYPE_IDX_16BIT) {
|
||||
uint16_t upperBound = 0;
|
||||
uint16_t lowerBound = 0xFFFF;
|
||||
const u16_le *ind16 = (const u16_le *)inds;
|
||||
for (int i = 0; i < count; i++) {
|
||||
u16 value = ind16[i];
|
||||
|
@ -129,7 +120,24 @@ void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBo
|
|||
if (value < lowerBound)
|
||||
lowerBound = value;
|
||||
}
|
||||
*indexLowerBound = lowerBound;
|
||||
*indexUpperBound = upperBound;
|
||||
} else if (idx == GE_VTYPE_IDX_8BIT) {
|
||||
uint8_t upperBound = 0;
|
||||
uint8_t lowerBound = 0xFF;
|
||||
const u8 *ind8 = (const u8 *)inds;
|
||||
for (int i = 0; i < count; i++) {
|
||||
u8 value = ind8[i];
|
||||
if (value > upperBound)
|
||||
upperBound = value;
|
||||
if (value < lowerBound)
|
||||
lowerBound = value;
|
||||
}
|
||||
*indexLowerBound = lowerBound;
|
||||
*indexUpperBound = upperBound;
|
||||
} else if (idx == GE_VTYPE_IDX_32BIT) {
|
||||
int lowerBound = 0x7FFFFFFF;
|
||||
int upperBound = 0;
|
||||
WARN_LOG_REPORT_ONCE(indexBounds32, G3D, "GetIndexBounds: Decoding 32-bit indexes");
|
||||
const u32_le *ind32 = (const u32_le *)inds;
|
||||
for (int i = 0; i < count; i++) {
|
||||
|
@ -143,12 +151,12 @@ void GetIndexBounds(const void *inds, int count, u32 vertType, u16 *indexLowerBo
|
|||
if (value < lowerBound)
|
||||
lowerBound = value;
|
||||
}
|
||||
} else {
|
||||
lowerBound = 0;
|
||||
upperBound = count - 1;
|
||||
}
|
||||
*indexLowerBound = (u16)lowerBound;
|
||||
*indexUpperBound = (u16)upperBound;
|
||||
} else {
|
||||
*indexLowerBound = 0;
|
||||
*indexUpperBound = count - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void PrintDecodedVertex(const VertexReader &vtx) {
|
||||
|
|
|
@ -598,7 +598,7 @@ rotateVBO:
|
|||
prim = GE_PRIM_TRIANGLES;
|
||||
VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
|
||||
|
||||
u16 *const inds = decIndex_;
|
||||
u16 *inds = decIndex_;
|
||||
SoftwareTransformResult result{};
|
||||
SoftwareTransformParams params{};
|
||||
params.decoded = decoded_;
|
||||
|
@ -644,9 +644,8 @@ rotateVBO:
|
|||
// Need to ApplyDrawState after ApplyTexture because depal can launch a render pass and that wrecks the state.
|
||||
ApplyDrawState(prim);
|
||||
|
||||
int indsOffset = 0;
|
||||
if (result.action == SW_NOT_READY)
|
||||
swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result);
|
||||
swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result);
|
||||
if (result.setSafeSize)
|
||||
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
|
||||
|
||||
|
@ -684,11 +683,11 @@ rotateVBO:
|
|||
UINT iOffset;
|
||||
int iSize = sizeof(uint16_t) * result.drawNumTrans;
|
||||
uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize);
|
||||
memcpy(iptr, inds + indsOffset, iSize);
|
||||
memcpy(iptr, inds, iSize);
|
||||
pushInds_->EndPush(context_);
|
||||
context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset);
|
||||
context_->DrawIndexed(result.drawNumTrans, 0, 0);
|
||||
} else if (result.drawNumTrans > 0) {
|
||||
} else {
|
||||
context_->Draw(result.drawNumTrans, 0);
|
||||
}
|
||||
} else if (result.action == SW_CLEAR) {
|
||||
|
|
|
@ -558,7 +558,7 @@ rotateVBO:
|
|||
prim = GE_PRIM_TRIANGLES;
|
||||
VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
|
||||
|
||||
u16 *const inds = decIndex_;
|
||||
u16 *inds = decIndex_;
|
||||
SoftwareTransformResult result{};
|
||||
SoftwareTransformParams params{};
|
||||
params.decoded = decoded_;
|
||||
|
@ -607,9 +607,8 @@ rotateVBO:
|
|||
|
||||
ApplyDrawState(prim);
|
||||
|
||||
int indsOffset = 0;
|
||||
if (result.action == SW_NOT_READY)
|
||||
swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result);
|
||||
swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result);
|
||||
if (result.setSafeSize)
|
||||
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
|
||||
|
||||
|
@ -629,8 +628,8 @@ rotateVBO:
|
|||
|
||||
device_->SetVertexDeclaration(transformedVertexDecl_);
|
||||
if (result.drawIndexed) {
|
||||
device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, maxIndex, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds + indsOffset, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex));
|
||||
} else if (result.drawNumTrans > 0) {
|
||||
device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, maxIndex, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex));
|
||||
} else {
|
||||
device_->DrawPrimitiveUP(d3d_prim[prim], D3DPrimCount(d3d_prim[prim], result.drawNumTrans), result.drawBuffer, sizeof(TransformedVertex));
|
||||
}
|
||||
} else if (result.action == SW_CLEAR) {
|
||||
|
|
|
@ -123,7 +123,7 @@ void GPU_DX9::BeginFrame() {
|
|||
drawEngine_.BeginFrame();
|
||||
|
||||
GPUCommonHW::BeginFrame();
|
||||
shaderManagerDX9_->DirtyShader();
|
||||
shaderManagerDX9_->DirtyLastShader();
|
||||
|
||||
framebufferManager_->BeginFrame();
|
||||
|
||||
|
|
|
@ -535,27 +535,23 @@ void ShaderManagerDX9::Clear() {
|
|||
}
|
||||
fsCache_.clear();
|
||||
vsCache_.clear();
|
||||
DirtyShader();
|
||||
DirtyLastShader();
|
||||
}
|
||||
|
||||
void ShaderManagerDX9::ClearShaders() {
|
||||
Clear();
|
||||
}
|
||||
|
||||
void ShaderManagerDX9::DirtyShader() {
|
||||
void ShaderManagerDX9::DirtyLastShader() {
|
||||
// Forget the last shader ID
|
||||
lastFSID_.set_invalid();
|
||||
lastVSID_.set_invalid();
|
||||
lastVShader_ = nullptr;
|
||||
lastPShader_ = nullptr;
|
||||
// TODO: Probably not necessary to dirty uniforms here on DX9.
|
||||
gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
|
||||
}
|
||||
|
||||
void ShaderManagerDX9::DirtyLastShader() {
|
||||
lastVShader_ = nullptr;
|
||||
lastPShader_ = nullptr;
|
||||
}
|
||||
|
||||
VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellation, VertexDecoder *decoder, bool weightsAsFloat, bool useSkinInDecode, const ComputedPipelineState &pipelineState) {
|
||||
VShaderID VSID;
|
||||
if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) {
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue