From d6d7a15d25261a02dc20689e36a2f10e339a502f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Wed, 14 Sep 2022 21:57:09 +0200 Subject: [PATCH] Get depal-from-dynamic-CLUT working --- Common/GPU/Vulkan/VulkanRenderManager.cpp | 2 + GPU/Common/TextureCacheCommon.cpp | 91 +++++++++++++++++++++-- GPU/Common/TextureCacheCommon.h | 4 + GPU/Vulkan/TextureCacheVulkan.cpp | 22 +++++- GPU/Vulkan/VulkanUtil.h | 1 + assets/compat.ini | 4 +- 6 files changed, 111 insertions(+), 13 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index 10cc89000..56f40655f 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -150,6 +150,8 @@ VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRe width = _width; height = _height; + _dbg_assert_(tag); + CreateImage(vulkan_, initCmd, color, width, height, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag); CreateImage(vulkan_, initCmd, depth, width, height, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag); diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 6b7228740..b1ddc448a 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -381,6 +381,8 @@ TexCacheEntry *TextureCacheCommon::SetTexture() { u32 cluthash; if (hasClut) { if (clutRenderAddress_ != 0xFFFFFFFF) { + gstate_c.curTextureXOffset = 0.0f; + gstate_c.curTextureYOffset = 0.0f; hasClutGPU = true; cluthash = 0; // Or should we use some other marker value? } else { @@ -1491,12 +1493,28 @@ inline u32 TfmtRawToFullAlpha(GETextureFormat fmt) { } } +// Used for converting CLUT4 to CLUT8. +// Could SIMD or whatever, though will hardly be a bottleneck. +static void Expand4To8Bits(u8 *dest, const u8 *src, int srcWidth) { + for (int i = 0; i < (srcWidth + 1) / 2; i++) { + u8 lower = src[i] & 0xF; + u8 upper = src[i] >> 4; + dest[i * 2] = lower; + dest[i * 2 + 1] = upper; + } +} + CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, TexDecodeFlags flags) { u32 alphaSum = 0xFFFFFFFF; u32 fullAlphaMask = 0x0; bool expandTo32bit = (flags & TexDecodeFlags::EXPAND32) != 0; bool reverseColors = (flags & TexDecodeFlags::REVERSE_COLORS) != 0; + bool toClut8 = (flags & TexDecodeFlags::TO_CLUT8) != 0; + + if (toClut8 && format != GE_TFMT_CLUT8 && format != GE_TFMT_CLUT4) { + _dbg_assert_(false); + } bool swizzled = gstate.isTextureSwizzled(); if ((texaddr & 0x00600000) != 0 && Memory::IsVRAMAddress(texaddr)) { @@ -1531,6 +1549,15 @@ CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, G texptr = (u8 *)tmpTexBuf32_.data(); } + if (toClut8) { + // We just need to expand from 4 to 8 bits. + for (int y = 0; y < h; ++y) { + Expand4To8Bits((u8 *)out + outPitch * y, texptr + (bufw * y) / 2, w); + } + // We can't know anything about alpha. + return CHECKALPHA_ANY; + } + switch (clutformat) { case GE_CMODE_16BIT_BGR5650: case GE_CMODE_16BIT_ABGR5551: @@ -1593,6 +1620,19 @@ CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, G break; case GE_TFMT_CLUT8: + if (toClut8) { + if (gstate.isTextureSwizzled()) { + tmpTexBuf32_.resize(bufw * ((h + 7) & ~7)); + UnswizzleFromMem(tmpTexBuf32_.data(), bufw, texptr, bufw, h, 1); + texptr = (u8 *)tmpTexBuf32_.data(); + } + // After deswizzling, we are in the correct format and can just copy. + for (int y = 0; y < h; ++y) { + memcpy((u8 *)out + outPitch * y, texptr + (bufw * y), w); + } + // We can't know anything about alpha. + return CHECKALPHA_ANY; + } return ReadIndexedTex(out, outPitch, level, texptr, 1, bufw, reverseColors, expandTo32bit); case GE_TFMT_CLUT16: @@ -1878,10 +1918,18 @@ void TextureCacheCommon::ApplyTexture() { InvalidateLastTexture(); } - entry->lastFrame = gpuStats.numFlips; - BindTexture(entry); - gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL); - gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); + if (entry->status & TexCacheEntry::STATUS_CLUT_GPU) { + // Special process. + ApplyTextureDepal(entry); + entry->lastFrame = gpuStats.numFlips; + gstate_c.SetTextureFullAlpha(false); + gstate_c.SetTextureIs3D(false); + } else { + entry->lastFrame = gpuStats.numFlips; + BindTexture(entry); + gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL); + gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0); + } } static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) { @@ -2093,6 +2141,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE); } +// Applies depal to a normal (non-framebuffer) texture, pre-decoded to CLUT8 format. void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) { Draw2DPipeline *textureShader = nullptr; uint32_t clutMode = gstate.clutformat & 0xFFFFFF; @@ -2114,6 +2163,7 @@ void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) { desc.depth = 1; desc.z_stencil = false; desc.numColorAttachments = 1; + desc.tag = "dynamic_clut"; dynamicClutFbo_ = draw_->CreateFramebuffer(desc); dynamicClutReinterpreted_ = draw_->CreateFramebuffer(desc); } @@ -2144,7 +2194,7 @@ void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) { float scaleFactorX = 1.0f; Draw2DPipeline *reinterpret = framebufferManager_->GetReinterpretPipeline(src->fb_format, expectedCLUTBufferFormat, &scaleFactorX); framebufferManager_->BlitUsingRaster( - dynamicClutFbo_, 0.0f, 0.0f, 512.0f, 1.0f, dynamicClutReinterpreted_, 0.0f, 0.0f, 512.0f, 1.0f, false, 1.0f, framebufferManager_->Get2DPipeline(DRAW2D_COPY_COLOR), "copy_clut"); + dynamicClutFbo_, 0.0f, 0.0f, 512.0f, 1.0f, dynamicClutReinterpreted_, 0.0f, 0.0f, scaleFactorX * 512.0f, 1.0f, false, 1.0f, reinterpret, "reinterpret_clut"); clutFbo = dynamicClutReinterpreted_; } @@ -2158,8 +2208,8 @@ void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) { const KnownVertexBounds &bounds = gstate_c.vertBounds; float u1 = 0.0f; float v1 = 0.0f; - float u2 = 1.0f; - float v2 = 1.0f; + float u2 = texWidth; + float v2 = texHeight; if (bounds.minV < bounds.maxV) { u1 = (bounds.minU + gstate_c.curTextureXOffset) * texWidth; v1 = (bounds.minV + gstate_c.curTextureYOffset) * texHeight; @@ -2232,6 +2282,15 @@ void TextureCacheCommon::Clear(bool delete_them) { secondCacheSizeEstimate_ = 0; } videos_.clear(); + + if (dynamicClutFbo_) { + dynamicClutFbo_->Release(); + dynamicClutFbo_ = nullptr; + } + if (dynamicClutReinterpreted_) { + dynamicClutReinterpreted_->Release(); + dynamicClutReinterpreted_ = nullptr; + } } void TextureCacheCommon::DeleteTexture(TexCache::iterator it) { @@ -2598,6 +2657,21 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt plan.maxPossibleLevels = log2i(std::min(plan.createW, plan.createH)) + 1; } + if (entry->status & TexCacheEntry::TexStatus::STATUS_CLUT_GPU) { + _dbg_assert_(entry->format == GE_TFMT_CLUT4 || entry->format == GE_TFMT_CLUT8); + plan.decodeToClut8 = true; + // We only support 1 mip level when doing CLUT on GPU for now. + // Supporting more would be possible, just not very interesting until we need it. + plan.levelsToCreate = 1; + plan.levelsToLoad = 1; + plan.maxPossibleLevels = 1; + plan.scaleFactor = 1; + plan.saveTexture = false; // Can't yet save these properly. + // TODO: Also forcibly disable replacement, or check that the replacement is a 8-bit paletted texture. + } else { + plan.decodeToClut8 = false; + } + if (plan.levelsToCreate == 1) { entry->status |= TexCacheEntry::STATUS_NO_MIPS; } else { @@ -2639,6 +2713,9 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || dstFmt == Draw::DataFormat::R8G8B8A8_UNORM) { texDecFlags |= TexDecodeFlags::EXPAND32; } + if (entry.status & TexCacheEntry::STATUS_CLUT_GPU) { + texDecFlags |= TexDecodeFlags::TO_CLUT8; + } CheckAlphaResult alphaResult = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, srcLevel, bufw, texDecFlags); entry.SetAlphaStatus(alphaResult, srcLevel); diff --git a/GPU/Common/TextureCacheCommon.h b/GPU/Common/TextureCacheCommon.h index 84b03683a..b24c50b95 100644 --- a/GPU/Common/TextureCacheCommon.h +++ b/GPU/Common/TextureCacheCommon.h @@ -53,6 +53,7 @@ class ShaderManagerCommon; enum class TexDecodeFlags { EXPAND32 = 1, REVERSE_COLORS = 2, + TO_CLUT8 = 4, }; ENUM_CLASS_BITOPS(TexDecodeFlags); @@ -285,6 +286,9 @@ struct BuildTexturePlan { bool replaceValid; bool saveTexture; + // TODO: Expand32 should probably also be decided in PrepareBuildTexture. + bool decodeToClut8; + void GetMipSize(int level, int *w, int *h) const { if (replaceValid) { replaced->GetSize(level, *w, *h); diff --git a/GPU/Vulkan/TextureCacheVulkan.cpp b/GPU/Vulkan/TextureCacheVulkan.cpp index 8a9e9f5a6..56345d4db 100644 --- a/GPU/Vulkan/TextureCacheVulkan.cpp +++ b/GPU/Vulkan/TextureCacheVulkan.cpp @@ -107,6 +107,15 @@ void main() { )"; +static int VkFormatBytesPerPixel(VkFormat format) { + switch (format) { + case VULKAN_8888_FORMAT: return 4; + case VULKAN_CLUT8_FORMAT: return 1; + default: break; + } + return 2; +} + SamplerCache::~SamplerCache() { DeviceLost(); } @@ -448,6 +457,8 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { if (plan.scaleFactor > 1) { // Whether hardware or software scaling, this is the dest format. dstFmt = VULKAN_8888_FORMAT; + } else if (plan.decodeToClut8) { + dstFmt = VULKAN_CLUT8_FORMAT; } // We don't generate mipmaps for 512x512 textures because they're almost exclusively used for menu backgrounds @@ -479,7 +490,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { case VULKAN_4444_FORMAT: mapping = &VULKAN_4444_SWIZZLE; break; case VULKAN_1555_FORMAT: mapping = &VULKAN_1555_SWIZZLE; break; case VULKAN_565_FORMAT: mapping = &VULKAN_565_SWIZZLE; break; - default: mapping = &VULKAN_8888_SWIZZLE; break; + default: mapping = &VULKAN_8888_SWIZZLE; break; // no swizzle } VkImageLayout imageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; @@ -562,7 +573,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { int mipHeight; plan.GetMipSize(i, &mipWidth, &mipHeight); - int bpp = actualFmt == VULKAN_8888_FORMAT ? 4 : 2; // output bpp + int bpp = VkFormatBytesPerPixel(actualFmt); int stride = (mipWidth * bpp + 15) & ~15; // output stride int uploadSize = stride * mipHeight; @@ -602,7 +613,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) { loadLevel(uploadSize, i, stride, plan.scaleFactor); entry->vkTex->UploadMip(cmdInit, 0, mipWidth, mipHeight, i, texBuf, bufferOffset, stride / bpp); } else if (computeUpload) { - int srcBpp = dstFmt == VULKAN_8888_FORMAT ? 4 : 2; + int srcBpp = VkFormatBytesPerPixel(dstFmt); int srcStride = mipUnscaledWidth * srcBpp; int srcSize = srcStride * mipUnscaledHeight; loadLevel(srcSize, i == 0 ? plan.baseLevelSrc : i, srcStride, 1); @@ -723,7 +734,7 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt _assert_msg_(texaddr != 0, "Can't load a texture from address null") int bufw = GetTextureBufw(level, texaddr, tfmt); - int bpp = dstFmt == VULKAN_8888_FORMAT ? 4 : 2; + int bpp = VkFormatBytesPerPixel(dstFmt); u32 *pixelData; int decPitch; @@ -732,6 +743,9 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || scaleFactor > 1 || dstFmt == VULKAN_8888_FORMAT) { texDecFlags |= TexDecodeFlags::EXPAND32; } + if (entry.status & TexCacheEntry::STATUS_CLUT_GPU) { + texDecFlags |= TexDecodeFlags::TO_CLUT8; + } if (scaleFactor > 1) { tmpTexBufRearrange_.resize(std::max(bufw, w) * h); diff --git a/GPU/Vulkan/VulkanUtil.h b/GPU/Vulkan/VulkanUtil.h index 2753fd79e..b31fdb75f 100644 --- a/GPU/Vulkan/VulkanUtil.h +++ b/GPU/Vulkan/VulkanUtil.h @@ -36,6 +36,7 @@ extern const VkComponentMapping VULKAN_8888_SWIZZLE; #define VULKAN_1555_FORMAT VK_FORMAT_A1R5G5B5_UNORM_PACK16 #define VULKAN_565_FORMAT VK_FORMAT_B5G6R5_UNORM_PACK16 // TODO: Does not actually have mandatory support, though R5G6B5 does! See #14602 #define VULKAN_8888_FORMAT VK_FORMAT_R8G8B8A8_UNORM +#define VULKAN_CLUT8_FORMAT VK_FORMAT_R8_UNORM // Manager for compute shaders that upload things (and those have two bindings: a storage buffer to read from and an image to write to). class VulkanComputeShaderManager { diff --git a/assets/compat.ini b/assets/compat.ini index ba515164e..3345917f5 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -1313,8 +1313,8 @@ ULES00703 = true # Temporary compatibility option, while developing a GPU CLUT-from-framebuffer path. # Burnout Dominator - lens flare effect (issue #11100) -ULUS10236 = true -ULES00703 = true +# ULUS10236 = true +# ULES00703 = true [UploadDepthForCLUTTextures] # Burnout Dominator - lens flare effect (issue #11100)