Minor optimizations (use the new hashmap in a few more places)

This commit is contained in:
Henrik Rydgård 2017-08-20 19:03:16 +02:00
parent 9bce767b73
commit 2f85e6516e
17 changed files with 75 additions and 94 deletions

View file

@ -178,6 +178,7 @@ struct TexCacheEntry {
class FramebufferManagerCommon;
// Can't be unordered_map, we use lower_bound ... although for some reason that compiles on MSVC.
// Would really like to replace this with DenseHashMap but can't as long as we need lower_bound.
typedef std::map<u64, std::unique_ptr<TexCacheEntry>> TexCache;
class TextureCacheCommon {

View file

@ -16,7 +16,7 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <algorithm>
#include <stdio.h>
#include <cstdio>
#include "base/basictypes.h"
#include "base/logging.h"

View file

@ -20,6 +20,7 @@
#include <cstring>
#include "ppsspp_config.h"
#include "base/basictypes.h"
#include "Common/Hashmaps.h"
#include "Common/Log.h"
#include "Common/CommonTypes.h"
#include "Core/Reporting.h"

View file

@ -543,14 +543,16 @@ void GPU_D3D11::Execute_Prim(u32 op, u32 diff) {
}
#endif
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
vertexCost_ = EstimatePerVertexCost();
}
gpuStats.vertexGPUCycles += vertexCost_ * count;
cyclesExecuted += vertexCost_* count;
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
int vertexCost = EstimatePerVertexCost() * count;
gpuStats.vertexGPUCycles += vertexCost;
cyclesExecuted += vertexCost;
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.

View file

@ -117,6 +117,7 @@ private:
static CommandInfo cmdInfo_[256];
int lastVsync_;
int vertexCost_ = 0;
std::string reportingPrimaryInfo_;
std::string reportingFullInfo_;

View file

@ -509,14 +509,16 @@ void GPU_DX9::Execute_Prim(u32 op, u32 diff) {
}
#endif
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
vertexCost_ = EstimatePerVertexCost();
}
gpuStats.vertexGPUCycles += vertexCost_ * count;
cyclesExecuted += vertexCost_* count;
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngine_.SubmitPrim(verts, inds, prim, count, vertexType, &bytesRead);
int vertexCost = EstimatePerVertexCost() * count;
gpuStats.vertexGPUCycles += vertexCost;
cyclesExecuted += vertexCost;
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.

View file

@ -117,6 +117,7 @@ private:
static CommandInfo cmdInfo_[256];
int lastVsync_;
int vertexCost_ = 0;
std::string reportingPrimaryInfo_;
std::string reportingFullInfo_;

View file

@ -681,14 +681,16 @@ void GPU_GLES::Execute_Prim(u32 op, u32 diff) {
}
#endif
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
vertexCost_ = EstimatePerVertexCost();
}
gpuStats.vertexGPUCycles += vertexCost_ * count;
cyclesExecuted += vertexCost_* count;
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
int vertexCost = EstimatePerVertexCost();
gpuStats.vertexGPUCycles += vertexCost * count;
cyclesExecuted += vertexCost * count;
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.

View file

@ -120,6 +120,7 @@ private:
ShaderManagerGLES *shaderManagerGL_;
int lastVsync_;
int vertexCost_ = 0;
std::string reportingPrimaryInfo_;
std::string reportingFullInfo_;

View file

@ -207,16 +207,6 @@ private:
VkSampler sampler_;
VkBuffer base_, light_, bone_; // All three UBO slots will be set to this. This will usually be identical
// for all draws in a frame, except when the buffer has to grow.
bool operator < (const DescriptorSetKey &other) const {
if (imageView_ < other.imageView_) return true; else if (imageView_ > other.imageView_) return false;
if (sampler_ < other.sampler_) return true; else if (sampler_ > other.sampler_) return false;
if (secondaryImageView_ < other.secondaryImageView_) return true; else if (secondaryImageView_ > other.secondaryImageView_) return false;
if (base_ < other.base_) return true; else if (base_ > other.base_) return false;
if (light_ < other.light_) return true; else if (light_ > other.light_) return false;
if (bone_ < other.bone_) return true; else if (bone_ > other.bone_) return false;
return false;
}
};
// We alternate between these.

View file

@ -511,14 +511,16 @@ void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) {
}
#endif
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
vertexCost_ = EstimatePerVertexCost();
}
gpuStats.vertexGPUCycles += vertexCost_ * count;
cyclesExecuted += vertexCost_* count;
int bytesRead = 0;
UpdateUVScaleOffset();
drawEngine_.SubmitPrim(verts, inds, prim, count, gstate.vertType, &bytesRead);
int vertexCost = EstimatePerVertexCost() * count;
gpuStats.vertexGPUCycles += vertexCost;
cyclesExecuted += vertexCost;
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
// The VADDR/IADDR registers are NOT updated.

View file

@ -120,6 +120,7 @@ private:
int lastVsync_;
VkCommandBuffer curCmd_;
int vertexCost_ = 0;
std::string reportingPrimaryInfo_;
std::string reportingFullInfo_;

View file

@ -45,16 +45,6 @@ struct VulkanPipelineKey {
VkShaderModule vShader;
VkShaderModule fShader;
// TODO: Probably better to use a hash function instead.
bool operator < (const VulkanPipelineKey &other) const {
if (raster < other.raster) return true; else if (other.raster < raster) return false;
if (renderPass < other.renderPass) return true; else if (other.renderPass < renderPass) return false;
if (useHWTransform < other.useHWTransform) return true; else if (other.useHWTransform < useHWTransform) return false;
if (vtxDec < other.vtxDec) return true; else if (other.vtxDec < vtxDec) return false;
if (vShader < other.vShader) return true; else if (other.vShader < vShader) return false;
if (fShader < other.fShader) return true; else if (other.fShader < fShader) return false;
return false;
}
void ToString(std::string *str) const {
str->resize(sizeof(*this));
memcpy(&(*str)[0], this, sizeof(*this));

View file

@ -154,7 +154,7 @@ std::string VulkanVertexShader::GetShaderString(DebugShaderStringType type) cons
}
ShaderManagerVulkan::ShaderManagerVulkan(VulkanContext *vulkan)
: vulkan_(vulkan), lastVShader_(nullptr), lastFShader_(nullptr) {
: vulkan_(vulkan), lastVShader_(nullptr), lastFShader_(nullptr), fsCache_(16), vsCache_(16) {
codeBuffer_ = new char[16384];
uboAlignment_ = vulkan_->GetPhysicalDeviceProperties().limits.minUniformBufferOffsetAlignment;
memset(&ub_base, 0, sizeof(ub_base));
@ -177,14 +177,14 @@ void ShaderManagerVulkan::DeviceRestore(VulkanContext *vulkan) {
}
void ShaderManagerVulkan::Clear() {
for (auto iter = fsCache_.begin(); iter != fsCache_.end(); ++iter) {
delete iter->second;
}
for (auto iter = vsCache_.begin(); iter != vsCache_.end(); ++iter) {
delete iter->second;
}
fsCache_.clear();
vsCache_.clear();
fsCache_.Iterate([&](const ShaderID &key, VulkanFragmentShader *shader) {
delete shader;
});
vsCache_.Iterate([&](const ShaderID &key, VulkanVertexShader *shader) {
delete shader;
});
fsCache_.Clear();
vsCache_.Clear();
lastFSID_.clear();
lastVSID_.clear();
}
@ -248,28 +248,22 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader
return;
}
VSCache::iterator vsIter = vsCache_.find(VSID);
VulkanVertexShader *vs;
if (vsIter == vsCache_.end()) {
VulkanVertexShader *vs = vsCache_.Get(VSID);
if (!vs) {
// Vertex shader not in cache. Let's compile it.
bool usesLighting;
GenerateVulkanGLSLVertexShader(VSID, codeBuffer_, &usesLighting);
vs = new VulkanVertexShader(vulkan_, VSID, codeBuffer_, vertType, useHWTransform, usesLighting);
vsCache_[VSID] = vs;
} else {
vs = vsIter->second;
vsCache_.Insert(VSID, vs);
}
lastVSID_ = VSID;
FSCache::iterator fsIter = fsCache_.find(FSID);
VulkanFragmentShader *fs;
if (fsIter == fsCache_.end()) {
VulkanFragmentShader *fs = fsCache_.Get(FSID);
if (!fs) {
// Fragment shader not in cache. Let's compile it.
GenerateVulkanGLSLFragmentShader(FSID, codeBuffer_);
fs = new VulkanFragmentShader(vulkan_, FSID, codeBuffer_, useHWTransform);
fsCache_[FSID] = fs;
} else {
fs = fsIter->second;
fsCache_.Insert(FSID, fs);
}
lastFSID_ = FSID;
@ -282,23 +276,24 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader
}
std::vector<std::string> ShaderManagerVulkan::DebugGetShaderIDs(DebugShaderType type) {
std::string id;
std::vector<std::string> ids;
switch (type) {
case SHADER_TYPE_VERTEX:
{
for (auto iter : vsCache_) {
iter.first.ToString(&id);
ids.push_back(id);
}
vsCache_.Iterate([&](const ShaderID &id, VulkanVertexShader *shader) {
std::string idstr;
id.ToString(&idstr);
ids.push_back(idstr);
});
break;
}
case SHADER_TYPE_FRAGMENT:
{
for (auto iter : fsCache_) {
iter.first.ToString(&id);
ids.push_back(id);
}
fsCache_.Iterate([&](const ShaderID &id, VulkanFragmentShader *shader) {
std::string idstr;
id.ToString(&idstr);
ids.push_back(idstr);
});
break;
}
default:
@ -313,20 +308,14 @@ std::string ShaderManagerVulkan::DebugGetShaderString(std::string id, DebugShade
switch (type) {
case SHADER_TYPE_VERTEX:
{
auto iter = vsCache_.find(shaderId);
if (iter == vsCache_.end()) {
return "";
}
return iter->second->GetShaderString(stringType);
VulkanVertexShader *vs = vsCache_.Get(shaderId);
return vs ? vs->GetShaderString(stringType) : "";
}
case SHADER_TYPE_FRAGMENT:
{
auto iter = fsCache_.find(shaderId);
if (iter == fsCache_.end()) {
return "";
}
return iter->second->GetShaderString(stringType);
VulkanFragmentShader *fs = fsCache_.Get(shaderId);
return fs ? fs->GetShaderString(stringType) : "";
}
default:
return "N/A";

View file

@ -20,6 +20,7 @@
#include <map>
#include "base/basictypes.h"
#include "Common/Hashmaps.h"
#include "Globals.h"
#include "Common/Vulkan/VulkanMemory.h"
#include "GPU/Common/ShaderCommon.h"
@ -130,10 +131,10 @@ private:
VulkanContext *vulkan_;
typedef std::map<ShaderID, VulkanFragmentShader *> FSCache;
typedef DenseHashMap<ShaderID, VulkanFragmentShader *, nullptr> FSCache;
FSCache fsCache_;
typedef std::map<ShaderID, VulkanVertexShader *> VSCache;
typedef DenseHashMap<ShaderID, VulkanVertexShader *, nullptr> VSCache;
VSCache vsCache_;
char *codeBuffer_;

View file

@ -72,16 +72,13 @@ CachedTextureVulkan::~CachedTextureVulkan() {
}
SamplerCache::~SamplerCache() {
for (auto iter : cache_) {
vulkan_->Delete().QueueDeleteSampler(iter.second);
}
DeviceLost();
}
VkSampler SamplerCache::GetOrCreateSampler(const SamplerCacheKey &key) {
auto iter = cache_.find(key);
if (iter != cache_.end()) {
return iter->second;
}
VkSampler sampler = cache_.Get(key);
if (sampler != VK_NULL_HANDLE)
return sampler;
VkSamplerCreateInfo samp = { VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO };
samp.addressModeU = key.sClamp ? VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE : VK_SAMPLER_ADDRESS_MODE_REPEAT;
@ -107,18 +104,17 @@ VkSampler SamplerCache::GetOrCreateSampler(const SamplerCacheKey &key) {
samp.minLod = 0.0f;
samp.mipLodBias = 0.0f;
VkSampler sampler;
VkResult res = vkCreateSampler(vulkan_->GetDevice(), &samp, nullptr, &sampler);
assert(res == VK_SUCCESS);
cache_[key] = sampler;
cache_.Insert(key, sampler);
return sampler;
}
void SamplerCache::DeviceLost() {
for (auto iter : cache_) {
vulkan_->Delete().QueueDeleteSampler(iter.second);
}
cache_.clear();
cache_.Iterate([&](const SamplerCacheKey &key, VkSampler sampler) {
vulkan_->Delete().QueueDeleteSampler(sampler);
});
cache_.Clear();
}
void SamplerCache::DeviceRestore(VulkanContext *vulkan) {

View file

@ -19,6 +19,7 @@
#include <map>
#include "Common/Hashmaps.h"
#include "Globals.h"
#include "GPU/GPUInterface.h"
#include "GPU/GPUState.h"
@ -49,7 +50,7 @@ public:
class SamplerCache {
public:
SamplerCache(VulkanContext *vulkan) : vulkan_(vulkan) {}
SamplerCache(VulkanContext *vulkan) : vulkan_(vulkan), cache_(16) {}
~SamplerCache();
VkSampler GetOrCreateSampler(const SamplerCacheKey &key);
@ -58,7 +59,7 @@ public:
private:
VulkanContext *vulkan_;
std::map<SamplerCacheKey, VkSampler> cache_;
DenseHashMap<SamplerCacheKey, VkSampler, VK_NULL_HANDLE> cache_;
};