Get depal-from-dynamic-CLUT working
This commit is contained in:
parent
51c97c7a7f
commit
d6d7a15d25
6 changed files with 111 additions and 13 deletions
|
@ -150,6 +150,8 @@ VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRe
|
|||
width = _width;
|
||||
height = _height;
|
||||
|
||||
_dbg_assert_(tag);
|
||||
|
||||
CreateImage(vulkan_, initCmd, color, width, height, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
|
||||
CreateImage(vulkan_, initCmd, depth, width, height, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
|
||||
|
||||
|
|
|
@ -381,6 +381,8 @@ TexCacheEntry *TextureCacheCommon::SetTexture() {
|
|||
u32 cluthash;
|
||||
if (hasClut) {
|
||||
if (clutRenderAddress_ != 0xFFFFFFFF) {
|
||||
gstate_c.curTextureXOffset = 0.0f;
|
||||
gstate_c.curTextureYOffset = 0.0f;
|
||||
hasClutGPU = true;
|
||||
cluthash = 0; // Or should we use some other marker value?
|
||||
} else {
|
||||
|
@ -1491,12 +1493,28 @@ inline u32 TfmtRawToFullAlpha(GETextureFormat fmt) {
|
|||
}
|
||||
}
|
||||
|
||||
// Used for converting CLUT4 to CLUT8.
|
||||
// Could SIMD or whatever, though will hardly be a bottleneck.
|
||||
static void Expand4To8Bits(u8 *dest, const u8 *src, int srcWidth) {
|
||||
for (int i = 0; i < (srcWidth + 1) / 2; i++) {
|
||||
u8 lower = src[i] & 0xF;
|
||||
u8 upper = src[i] >> 4;
|
||||
dest[i * 2] = lower;
|
||||
dest[i * 2 + 1] = upper;
|
||||
}
|
||||
}
|
||||
|
||||
CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, GETextureFormat format, GEPaletteFormat clutformat, uint32_t texaddr, int level, int bufw, TexDecodeFlags flags) {
|
||||
u32 alphaSum = 0xFFFFFFFF;
|
||||
u32 fullAlphaMask = 0x0;
|
||||
|
||||
bool expandTo32bit = (flags & TexDecodeFlags::EXPAND32) != 0;
|
||||
bool reverseColors = (flags & TexDecodeFlags::REVERSE_COLORS) != 0;
|
||||
bool toClut8 = (flags & TexDecodeFlags::TO_CLUT8) != 0;
|
||||
|
||||
if (toClut8 && format != GE_TFMT_CLUT8 && format != GE_TFMT_CLUT4) {
|
||||
_dbg_assert_(false);
|
||||
}
|
||||
|
||||
bool swizzled = gstate.isTextureSwizzled();
|
||||
if ((texaddr & 0x00600000) != 0 && Memory::IsVRAMAddress(texaddr)) {
|
||||
|
@ -1531,6 +1549,15 @@ CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, G
|
|||
texptr = (u8 *)tmpTexBuf32_.data();
|
||||
}
|
||||
|
||||
if (toClut8) {
|
||||
// We just need to expand from 4 to 8 bits.
|
||||
for (int y = 0; y < h; ++y) {
|
||||
Expand4To8Bits((u8 *)out + outPitch * y, texptr + (bufw * y) / 2, w);
|
||||
}
|
||||
// We can't know anything about alpha.
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
switch (clutformat) {
|
||||
case GE_CMODE_16BIT_BGR5650:
|
||||
case GE_CMODE_16BIT_ABGR5551:
|
||||
|
@ -1593,6 +1620,19 @@ CheckAlphaResult TextureCacheCommon::DecodeTextureLevel(u8 *out, int outPitch, G
|
|||
break;
|
||||
|
||||
case GE_TFMT_CLUT8:
|
||||
if (toClut8) {
|
||||
if (gstate.isTextureSwizzled()) {
|
||||
tmpTexBuf32_.resize(bufw * ((h + 7) & ~7));
|
||||
UnswizzleFromMem(tmpTexBuf32_.data(), bufw, texptr, bufw, h, 1);
|
||||
texptr = (u8 *)tmpTexBuf32_.data();
|
||||
}
|
||||
// After deswizzling, we are in the correct format and can just copy.
|
||||
for (int y = 0; y < h; ++y) {
|
||||
memcpy((u8 *)out + outPitch * y, texptr + (bufw * y), w);
|
||||
}
|
||||
// We can't know anything about alpha.
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
return ReadIndexedTex(out, outPitch, level, texptr, 1, bufw, reverseColors, expandTo32bit);
|
||||
|
||||
case GE_TFMT_CLUT16:
|
||||
|
@ -1878,11 +1918,19 @@ void TextureCacheCommon::ApplyTexture() {
|
|||
InvalidateLastTexture();
|
||||
}
|
||||
|
||||
if (entry->status & TexCacheEntry::STATUS_CLUT_GPU) {
|
||||
// Special process.
|
||||
ApplyTextureDepal(entry);
|
||||
entry->lastFrame = gpuStats.numFlips;
|
||||
gstate_c.SetTextureFullAlpha(false);
|
||||
gstate_c.SetTextureIs3D(false);
|
||||
} else {
|
||||
entry->lastFrame = gpuStats.numFlips;
|
||||
BindTexture(entry);
|
||||
gstate_c.SetTextureFullAlpha(entry->GetAlphaStatus() == TexCacheEntry::STATUS_ALPHA_FULL);
|
||||
gstate_c.SetTextureIs3D((entry->status & TexCacheEntry::STATUS_3D) != 0);
|
||||
}
|
||||
}
|
||||
|
||||
static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) {
|
||||
if (IsClutFormat(texFormat)) {
|
||||
|
@ -2093,6 +2141,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
|||
gstate_c.Dirty(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
|
||||
}
|
||||
|
||||
// Applies depal to a normal (non-framebuffer) texture, pre-decoded to CLUT8 format.
|
||||
void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) {
|
||||
Draw2DPipeline *textureShader = nullptr;
|
||||
uint32_t clutMode = gstate.clutformat & 0xFFFFFF;
|
||||
|
@ -2114,6 +2163,7 @@ void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) {
|
|||
desc.depth = 1;
|
||||
desc.z_stencil = false;
|
||||
desc.numColorAttachments = 1;
|
||||
desc.tag = "dynamic_clut";
|
||||
dynamicClutFbo_ = draw_->CreateFramebuffer(desc);
|
||||
dynamicClutReinterpreted_ = draw_->CreateFramebuffer(desc);
|
||||
}
|
||||
|
@ -2144,7 +2194,7 @@ void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) {
|
|||
float scaleFactorX = 1.0f;
|
||||
Draw2DPipeline *reinterpret = framebufferManager_->GetReinterpretPipeline(src->fb_format, expectedCLUTBufferFormat, &scaleFactorX);
|
||||
framebufferManager_->BlitUsingRaster(
|
||||
dynamicClutFbo_, 0.0f, 0.0f, 512.0f, 1.0f, dynamicClutReinterpreted_, 0.0f, 0.0f, 512.0f, 1.0f, false, 1.0f, framebufferManager_->Get2DPipeline(DRAW2D_COPY_COLOR), "copy_clut");
|
||||
dynamicClutFbo_, 0.0f, 0.0f, 512.0f, 1.0f, dynamicClutReinterpreted_, 0.0f, 0.0f, scaleFactorX * 512.0f, 1.0f, false, 1.0f, reinterpret, "reinterpret_clut");
|
||||
clutFbo = dynamicClutReinterpreted_;
|
||||
}
|
||||
|
||||
|
@ -2158,8 +2208,8 @@ void TextureCacheCommon::ApplyTextureDepal(TexCacheEntry *entry) {
|
|||
const KnownVertexBounds &bounds = gstate_c.vertBounds;
|
||||
float u1 = 0.0f;
|
||||
float v1 = 0.0f;
|
||||
float u2 = 1.0f;
|
||||
float v2 = 1.0f;
|
||||
float u2 = texWidth;
|
||||
float v2 = texHeight;
|
||||
if (bounds.minV < bounds.maxV) {
|
||||
u1 = (bounds.minU + gstate_c.curTextureXOffset) * texWidth;
|
||||
v1 = (bounds.minV + gstate_c.curTextureYOffset) * texHeight;
|
||||
|
@ -2232,6 +2282,15 @@ void TextureCacheCommon::Clear(bool delete_them) {
|
|||
secondCacheSizeEstimate_ = 0;
|
||||
}
|
||||
videos_.clear();
|
||||
|
||||
if (dynamicClutFbo_) {
|
||||
dynamicClutFbo_->Release();
|
||||
dynamicClutFbo_ = nullptr;
|
||||
}
|
||||
if (dynamicClutReinterpreted_) {
|
||||
dynamicClutReinterpreted_->Release();
|
||||
dynamicClutReinterpreted_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void TextureCacheCommon::DeleteTexture(TexCache::iterator it) {
|
||||
|
@ -2598,6 +2657,21 @@ bool TextureCacheCommon::PrepareBuildTexture(BuildTexturePlan &plan, TexCacheEnt
|
|||
plan.maxPossibleLevels = log2i(std::min(plan.createW, plan.createH)) + 1;
|
||||
}
|
||||
|
||||
if (entry->status & TexCacheEntry::TexStatus::STATUS_CLUT_GPU) {
|
||||
_dbg_assert_(entry->format == GE_TFMT_CLUT4 || entry->format == GE_TFMT_CLUT8);
|
||||
plan.decodeToClut8 = true;
|
||||
// We only support 1 mip level when doing CLUT on GPU for now.
|
||||
// Supporting more would be possible, just not very interesting until we need it.
|
||||
plan.levelsToCreate = 1;
|
||||
plan.levelsToLoad = 1;
|
||||
plan.maxPossibleLevels = 1;
|
||||
plan.scaleFactor = 1;
|
||||
plan.saveTexture = false; // Can't yet save these properly.
|
||||
// TODO: Also forcibly disable replacement, or check that the replacement is a 8-bit paletted texture.
|
||||
} else {
|
||||
plan.decodeToClut8 = false;
|
||||
}
|
||||
|
||||
if (plan.levelsToCreate == 1) {
|
||||
entry->status |= TexCacheEntry::STATUS_NO_MIPS;
|
||||
} else {
|
||||
|
@ -2639,6 +2713,9 @@ void TextureCacheCommon::LoadTextureLevel(TexCacheEntry &entry, uint8_t *data, i
|
|||
if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || dstFmt == Draw::DataFormat::R8G8B8A8_UNORM) {
|
||||
texDecFlags |= TexDecodeFlags::EXPAND32;
|
||||
}
|
||||
if (entry.status & TexCacheEntry::STATUS_CLUT_GPU) {
|
||||
texDecFlags |= TexDecodeFlags::TO_CLUT8;
|
||||
}
|
||||
|
||||
CheckAlphaResult alphaResult = DecodeTextureLevel((u8 *)pixelData, decPitch, tfmt, clutformat, texaddr, srcLevel, bufw, texDecFlags);
|
||||
entry.SetAlphaStatus(alphaResult, srcLevel);
|
||||
|
|
|
@ -53,6 +53,7 @@ class ShaderManagerCommon;
|
|||
enum class TexDecodeFlags {
|
||||
EXPAND32 = 1,
|
||||
REVERSE_COLORS = 2,
|
||||
TO_CLUT8 = 4,
|
||||
};
|
||||
ENUM_CLASS_BITOPS(TexDecodeFlags);
|
||||
|
||||
|
@ -285,6 +286,9 @@ struct BuildTexturePlan {
|
|||
bool replaceValid;
|
||||
bool saveTexture;
|
||||
|
||||
// TODO: Expand32 should probably also be decided in PrepareBuildTexture.
|
||||
bool decodeToClut8;
|
||||
|
||||
void GetMipSize(int level, int *w, int *h) const {
|
||||
if (replaceValid) {
|
||||
replaced->GetSize(level, *w, *h);
|
||||
|
|
|
@ -107,6 +107,15 @@ void main() {
|
|||
|
||||
)";
|
||||
|
||||
static int VkFormatBytesPerPixel(VkFormat format) {
|
||||
switch (format) {
|
||||
case VULKAN_8888_FORMAT: return 4;
|
||||
case VULKAN_CLUT8_FORMAT: return 1;
|
||||
default: break;
|
||||
}
|
||||
return 2;
|
||||
}
|
||||
|
||||
SamplerCache::~SamplerCache() {
|
||||
DeviceLost();
|
||||
}
|
||||
|
@ -448,6 +457,8 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
|
|||
if (plan.scaleFactor > 1) {
|
||||
// Whether hardware or software scaling, this is the dest format.
|
||||
dstFmt = VULKAN_8888_FORMAT;
|
||||
} else if (plan.decodeToClut8) {
|
||||
dstFmt = VULKAN_CLUT8_FORMAT;
|
||||
}
|
||||
|
||||
// We don't generate mipmaps for 512x512 textures because they're almost exclusively used for menu backgrounds
|
||||
|
@ -479,7 +490,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
|
|||
case VULKAN_4444_FORMAT: mapping = &VULKAN_4444_SWIZZLE; break;
|
||||
case VULKAN_1555_FORMAT: mapping = &VULKAN_1555_SWIZZLE; break;
|
||||
case VULKAN_565_FORMAT: mapping = &VULKAN_565_SWIZZLE; break;
|
||||
default: mapping = &VULKAN_8888_SWIZZLE; break;
|
||||
default: mapping = &VULKAN_8888_SWIZZLE; break; // no swizzle
|
||||
}
|
||||
|
||||
VkImageLayout imageLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
|
||||
|
@ -562,7 +573,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
|
|||
int mipHeight;
|
||||
plan.GetMipSize(i, &mipWidth, &mipHeight);
|
||||
|
||||
int bpp = actualFmt == VULKAN_8888_FORMAT ? 4 : 2; // output bpp
|
||||
int bpp = VkFormatBytesPerPixel(actualFmt);
|
||||
int stride = (mipWidth * bpp + 15) & ~15; // output stride
|
||||
int uploadSize = stride * mipHeight;
|
||||
|
||||
|
@ -602,7 +613,7 @@ void TextureCacheVulkan::BuildTexture(TexCacheEntry *const entry) {
|
|||
loadLevel(uploadSize, i, stride, plan.scaleFactor);
|
||||
entry->vkTex->UploadMip(cmdInit, 0, mipWidth, mipHeight, i, texBuf, bufferOffset, stride / bpp);
|
||||
} else if (computeUpload) {
|
||||
int srcBpp = dstFmt == VULKAN_8888_FORMAT ? 4 : 2;
|
||||
int srcBpp = VkFormatBytesPerPixel(dstFmt);
|
||||
int srcStride = mipUnscaledWidth * srcBpp;
|
||||
int srcSize = srcStride * mipUnscaledHeight;
|
||||
loadLevel(srcSize, i == 0 ? plan.baseLevelSrc : i, srcStride, 1);
|
||||
|
@ -723,7 +734,7 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt
|
|||
_assert_msg_(texaddr != 0, "Can't load a texture from address null")
|
||||
|
||||
int bufw = GetTextureBufw(level, texaddr, tfmt);
|
||||
int bpp = dstFmt == VULKAN_8888_FORMAT ? 4 : 2;
|
||||
int bpp = VkFormatBytesPerPixel(dstFmt);
|
||||
|
||||
u32 *pixelData;
|
||||
int decPitch;
|
||||
|
@ -732,6 +743,9 @@ void TextureCacheVulkan::LoadTextureLevel(TexCacheEntry &entry, uint8_t *writePt
|
|||
if (!gstate_c.Supports(GPU_SUPPORTS_16BIT_FORMATS) || scaleFactor > 1 || dstFmt == VULKAN_8888_FORMAT) {
|
||||
texDecFlags |= TexDecodeFlags::EXPAND32;
|
||||
}
|
||||
if (entry.status & TexCacheEntry::STATUS_CLUT_GPU) {
|
||||
texDecFlags |= TexDecodeFlags::TO_CLUT8;
|
||||
}
|
||||
|
||||
if (scaleFactor > 1) {
|
||||
tmpTexBufRearrange_.resize(std::max(bufw, w) * h);
|
||||
|
|
|
@ -36,6 +36,7 @@ extern const VkComponentMapping VULKAN_8888_SWIZZLE;
|
|||
#define VULKAN_1555_FORMAT VK_FORMAT_A1R5G5B5_UNORM_PACK16
|
||||
#define VULKAN_565_FORMAT VK_FORMAT_B5G6R5_UNORM_PACK16 // TODO: Does not actually have mandatory support, though R5G6B5 does! See #14602
|
||||
#define VULKAN_8888_FORMAT VK_FORMAT_R8G8B8A8_UNORM
|
||||
#define VULKAN_CLUT8_FORMAT VK_FORMAT_R8_UNORM
|
||||
|
||||
// Manager for compute shaders that upload things (and those have two bindings: a storage buffer to read from and an image to write to).
|
||||
class VulkanComputeShaderManager {
|
||||
|
|
|
@ -1313,8 +1313,8 @@ ULES00703 = true
|
|||
# Temporary compatibility option, while developing a GPU CLUT-from-framebuffer path.
|
||||
|
||||
# Burnout Dominator - lens flare effect (issue #11100)
|
||||
ULUS10236 = true
|
||||
ULES00703 = true
|
||||
# ULUS10236 = true
|
||||
# ULES00703 = true
|
||||
|
||||
[UploadDepthForCLUTTextures]
|
||||
# Burnout Dominator - lens flare effect (issue #11100)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue