diff --git a/GPU/Common/DepalettizeShaderCommon.cpp b/GPU/Common/DepalettizeShaderCommon.cpp index 1d3630919..aa128021a 100644 --- a/GPU/Common/DepalettizeShaderCommon.cpp +++ b/GPU/Common/DepalettizeShaderCommon.cpp @@ -78,7 +78,13 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) { // An alternative would be to have a special mode where we keep some extra precision here and sample the CLUT linearly - works for ramps such // as those that Test Drive uses for its color remapping. But would need game specific flagging. - writer.C(" vec4 color = ").SampleTexture2D("tex", "texcoord").C(";\n"); + // TODO: Make generic. + if (config.bufferFormat == GE_FORMAT_5551 && config.textureFormat == GE_FORMAT_CLUT8) { + // The texcoord will already effectively be scaled. + writer.C(" vec4 color = ").SampleTexture2D("tex", "vec2(texcoord.x, texcoord.y)").C(";\n"); + } else { + writer.C(" vec4 color = ").SampleTexture2D("tex", "texcoord").C(";\n"); + } int shiftedMask = mask << shift; switch (config.bufferFormat) { @@ -111,6 +117,12 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) { if (shiftedMask & 0x7C00) writer.C(" int b = int(color.b * 31.99);\n"); else writer.C(" int b = 0;\n"); if (shiftedMask & 0x8000) writer.C(" int a = int(color.a);\n"); else writer.C(" int a = 0;\n"); writer.C(" int index = (a << 15) | (b << 10) | (g << 5) | (r);\n"); + if (config.bufferFormat == GE_FORMAT_5551 && config.textureFormat == GE_FORMAT_CLUT8) { + writer.C(" int tx = int((texcoord.x * 2.0 / scaleFactor) * texSize.x);\n"); + // I think this is backwards, but seems to work. Maybe need some small offset to nudge it right + // when texturing. + writer.C(" if ((tx & 1) == 0) { index >>= 8; } else { index &= 0xFF; }\n"); + } break; case GE_FORMAT_DEPTH16: // Decode depth buffer. @@ -347,7 +359,11 @@ void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) { void GenerateDepalFs(ShaderWriter &writer, const DepalConfig &config) { writer.DeclareSamplers(samplers); writer.HighPrecisionFloat(); - writer.BeginFSMain(config.bufferFormat == GE_FORMAT_DEPTH16 ? g_draw2Duniforms : Slice::empty(), varyings); + + bool needsUniforms = config.bufferFormat == GE_FORMAT_DEPTH16 || + (config.bufferFormat == GE_FORMAT_5551 && config.textureFormat == GE_FORMAT_CLUT8); // The SOCOM problem + + writer.BeginFSMain(needsUniforms ? g_draw2Duniforms : Slice::empty(), varyings); if (config.smoothedDepal) { // Handles a limited set of cases, but doesn't need any integer math so we don't // need two variants. diff --git a/GPU/Common/Draw2D.cpp b/GPU/Common/Draw2D.cpp index 4f1927795..95a53bf2c 100644 --- a/GPU/Common/Draw2D.cpp +++ b/GPU/Common/Draw2D.cpp @@ -261,7 +261,7 @@ Draw2DPipeline *Draw2D::Create2DPipeline(std::functionCreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), info.tag); - _assert_(fs); + _assert_msg_(fs, "Failed to create shader module!\n%s", fsCode); // verts have positions in 2D clip coordinates. static const InputLayoutDesc desc = { diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 4984df18e..d90f14584 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1027,14 +1027,17 @@ bool TextureCacheCommon::MatchFramebuffer( } // Check works for D16 too. + // These are combinations that we have special-cased handling for. There are more + // ones possible, but rare. const bool matchingClutFormat = (fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_CLUT16) || (fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_5650) || (fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) || (fb_format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16) || - (fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT8); + (fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT8) || + (fb_format == GE_FORMAT_5551 && entry.format == GE_TFMT_CLUT8); - const int texBitsPerPixel = std::max(1U, (u32)textureBitsPerPixel[entry.format]); + const int texBitsPerPixel = TextureFormatBitsPerPixel(entry.format); const int byteOffset = texaddr - addr; if (byteOffset > 0) { matchInfo->yOffset = byteOffset / fb_stride_in_bytes; @@ -2144,6 +2147,7 @@ void TextureCacheCommon::ApplyTexture() { } } +// Can we depalettize at all? This refers to both in-fragment-shader depal and "traditional" depal through a separate pass. static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) { if (IsClutFormat(texFormat)) { switch (bufferFormat) { @@ -2154,6 +2158,10 @@ static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferForma if (texFormat == GE_TFMT_CLUT16) { return true; } + if (texFormat == GE_TFMT_CLUT8 && bufferFormat == GE_FORMAT_5551) { + // Wacky case from issue #16210 (SOCOM etc). Special depal mode (separate depalettize only). + return true; + } break; case GE_FORMAT_8888: if (texFormat == GE_TFMT_CLUT32 || texFormat == GE_TFMT_CLUT8) { // clut8 takes a special depal mode. @@ -2213,7 +2221,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer && !depth && clutRenderAddress_ == 0xFFFFFFFF && !gstate_c.curTextureIs3D && - draw_->GetShaderLanguageDesc().bitwiseOps; + draw_->GetShaderLanguageDesc().bitwiseOps + && !(texFormat == GE_TFMT_CLUT8 && framebuffer->fb_format == GE_FORMAT_5551); // This special case we don't handle in the shader. switch (draw_->GetShaderLanguageDesc().shaderLanguage) { case ShaderLanguage::HLSL_D3D9: @@ -2292,6 +2301,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer if (textureShader) { bool needsDepthXSwizzle = depthUpperBits == 2; + float depalXScale = 8.0f * (float)BufferFormatBytesPerPixel(framebuffer->fb_format) / (float)TextureFormatBitsPerPixel(texFormat); + int depalWidth = framebuffer->renderWidth; int texWidth = framebuffer->width; if (needsDepthXSwizzle) { @@ -2315,13 +2326,13 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); } - Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, depalWidth, framebuffer->renderHeight); + Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, depalWidth * depalXScale, framebuffer->renderHeight); draw_->BindTexture(0, nullptr); draw_->BindTexture(1, nullptr); draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "Depal"); draw_->InvalidateFramebuffer(Draw::FB_INVALIDATION_STORE, Draw::FB_DEPTH_BIT | Draw::FB_STENCIL_BIT); - draw_->SetScissorRect(u1, v1, u2 - u1, v2 - v1); - Draw::Viewport viewport{ 0.0f, 0.0f, (float)depalWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f }; + draw_->SetScissorRect(u1 * depalXScale, v1, (u2 - u1) * depalXScale, v2 - v1); + Draw::Viewport viewport{ 0.0f, 0.0f, (float)depalWidth * depalXScale, (float)framebuffer->renderHeight, 0.0f, 1.0f }; draw_->SetViewport(viewport); draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, Draw::ALL_LAYERS); @@ -2335,12 +2346,17 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer draw_->BindSamplerStates(0, 1, &nearest); draw_->BindSamplerStates(1, 1, &clutSampler); - draw2D_->Blit(textureShader, u1, v1, u2, v2, u1, v1, u2, v2, framebuffer->renderWidth, framebuffer->renderHeight, depalWidth, framebuffer->renderHeight, false, framebuffer->renderScaleFactor); + // NOTE: We need to "stretch" if depalXScale is wrong, + draw2D_->Blit(textureShader, u1, v1, u2, v2, u1, v1, u2, v2, + framebuffer->renderWidth, framebuffer->renderHeight, depalWidth, framebuffer->renderHeight, + false, framebuffer->renderScaleFactor); gpuStats.numDepal++; - gstate_c.curTextureWidth = texWidth; - + gstate_c.curTextureWidth = texWidth * depalXScale; + gstate_c.curTextureXOffset /= depalXScale * depalXScale; // This state gets wrecked by SetTexture which happens AFTER apply texture! Something is badly wrong. (D3D11). + gstate_c.Dirty(DIRTY_TEXCLAMP); + draw_->BindTexture(0, nullptr); framebufferManager_->RebindFramebuffer("ApplyTextureFramebuffer"); diff --git a/GPU/Common/TextureDecoder.cpp b/GPU/Common/TextureDecoder.cpp index a12be66cb..32da2dffa 100644 --- a/GPU/Common/TextureDecoder.cpp +++ b/GPU/Common/TextureDecoder.cpp @@ -48,6 +48,25 @@ #define DO_NOT_VECTORIZE_LOOP #endif +const u8 textureBitsPerPixel[16] = { + 16, //GE_TFMT_5650, + 16, //GE_TFMT_5551, + 16, //GE_TFMT_4444, + 32, //GE_TFMT_8888, + 4, //GE_TFMT_CLUT4, + 8, //GE_TFMT_CLUT8, + 16, //GE_TFMT_CLUT16, + 32, //GE_TFMT_CLUT32, + 4, //GE_TFMT_DXT1, + 8, //GE_TFMT_DXT3, + 8, //GE_TFMT_DXT5, + 0, // INVALID, + 0, // INVALID, + 0, // INVALID, + 0, // INVALID, + 0, // INVALID, +}; + #ifdef _M_SSE static u32 QuickTexHashSSE2(const void *checkp, u32 size) { diff --git a/GPU/Common/TextureDecoder.h b/GPU/Common/TextureDecoder.h index ad54815ff..d56cfb361 100644 --- a/GPU/Common/TextureDecoder.h +++ b/GPU/Common/TextureDecoder.h @@ -73,27 +73,16 @@ uint32_t GetDXT1Texel(const DXT1Block *src, int x, int y); uint32_t GetDXT3Texel(const DXT3Block *src, int x, int y); uint32_t GetDXT5Texel(const DXT5Block *src, int x, int y); -static const u8 textureBitsPerPixel[16] = { - 16, //GE_TFMT_5650, - 16, //GE_TFMT_5551, - 16, //GE_TFMT_4444, - 32, //GE_TFMT_8888, - 4, //GE_TFMT_CLUT4, - 8, //GE_TFMT_CLUT8, - 16, //GE_TFMT_CLUT16, - 32, //GE_TFMT_CLUT32, - 4, //GE_TFMT_DXT1, - 8, //GE_TFMT_DXT3, - 8, //GE_TFMT_DXT5, - 0, // INVALID, - 0, // INVALID, - 0, // INVALID, - 0, // INVALID, - 0, // INVALID, -}; +extern const u8 textureBitsPerPixel[16]; u32 GetTextureBufw(int level, u32 texaddr, GETextureFormat format); +// WARNING: Bits not bytes, this is needed due to the presence of 4-bit formats. +inline u32 TextureFormatBitsPerPixel(GETextureFormat format) { + u32 bits = textureBitsPerPixel[(int)format]; + return bits != 0 ? bits : 1; // Best to return 1 here to survive divisions in case of invalid data. +} + inline bool AlphaSumIsFull(u32 alphaSum, u32 fullAlphaMask) { return fullAlphaMask != 0 && (alphaSum & fullAlphaMask) == fullAlphaMask; } diff --git a/ext/zstd b/ext/zstd index 63779c798..096dccbc2 160000 --- a/ext/zstd +++ b/ext/zstd @@ -1 +1 @@ -Subproject commit 63779c798237346c2b245c546c40b72a5a5913fe +Subproject commit 096dccbc2d89a560db0b9892c53ea0c77eff20a1