Compare commits
1 commit
master
...
socom-depa
Author | SHA1 | Date | |
---|---|---|---|
|
7e160de166 |
6 changed files with 71 additions and 31 deletions
|
@ -78,7 +78,13 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
|
|||
// An alternative would be to have a special mode where we keep some extra precision here and sample the CLUT linearly - works for ramps such
|
||||
// as those that Test Drive uses for its color remapping. But would need game specific flagging.
|
||||
|
||||
// TODO: Make generic.
|
||||
if (config.bufferFormat == GE_FORMAT_5551 && config.textureFormat == GE_FORMAT_CLUT8) {
|
||||
// The texcoord will already effectively be scaled.
|
||||
writer.C(" vec4 color = ").SampleTexture2D("tex", "vec2(texcoord.x, texcoord.y)").C(";\n");
|
||||
} else {
|
||||
writer.C(" vec4 color = ").SampleTexture2D("tex", "texcoord").C(";\n");
|
||||
}
|
||||
|
||||
int shiftedMask = mask << shift;
|
||||
switch (config.bufferFormat) {
|
||||
|
@ -111,6 +117,12 @@ void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {
|
|||
if (shiftedMask & 0x7C00) writer.C(" int b = int(color.b * 31.99);\n"); else writer.C(" int b = 0;\n");
|
||||
if (shiftedMask & 0x8000) writer.C(" int a = int(color.a);\n"); else writer.C(" int a = 0;\n");
|
||||
writer.C(" int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");
|
||||
if (config.bufferFormat == GE_FORMAT_5551 && config.textureFormat == GE_FORMAT_CLUT8) {
|
||||
writer.C(" int tx = int((texcoord.x * 2.0 / scaleFactor) * texSize.x);\n");
|
||||
// I think this is backwards, but seems to work. Maybe need some small offset to nudge it right
|
||||
// when texturing.
|
||||
writer.C(" if ((tx & 1) == 0) { index >>= 8; } else { index &= 0xFF; }\n");
|
||||
}
|
||||
break;
|
||||
case GE_FORMAT_DEPTH16:
|
||||
// Decode depth buffer.
|
||||
|
@ -347,7 +359,11 @@ void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) {
|
|||
void GenerateDepalFs(ShaderWriter &writer, const DepalConfig &config) {
|
||||
writer.DeclareSamplers(samplers);
|
||||
writer.HighPrecisionFloat();
|
||||
writer.BeginFSMain(config.bufferFormat == GE_FORMAT_DEPTH16 ? g_draw2Duniforms : Slice<UniformDef>::empty(), varyings);
|
||||
|
||||
bool needsUniforms = config.bufferFormat == GE_FORMAT_DEPTH16 ||
|
||||
(config.bufferFormat == GE_FORMAT_5551 && config.textureFormat == GE_FORMAT_CLUT8); // The SOCOM problem
|
||||
|
||||
writer.BeginFSMain(needsUniforms ? g_draw2Duniforms : Slice<UniformDef>::empty(), varyings);
|
||||
if (config.smoothedDepal) {
|
||||
// Handles a limited set of cases, but doesn't need any integer math so we don't
|
||||
// need two variants.
|
||||
|
|
|
@ -261,7 +261,7 @@ Draw2DPipeline *Draw2D::Create2DPipeline(std::function<Draw2DPipelineInfo (Shade
|
|||
|
||||
ShaderModule *fs = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), info.tag);
|
||||
|
||||
_assert_(fs);
|
||||
_assert_msg_(fs, "Failed to create shader module!\n%s", fsCode);
|
||||
|
||||
// verts have positions in 2D clip coordinates.
|
||||
static const InputLayoutDesc desc = {
|
||||
|
|
|
@ -1027,14 +1027,17 @@ bool TextureCacheCommon::MatchFramebuffer(
|
|||
}
|
||||
|
||||
// Check works for D16 too.
|
||||
// These are combinations that we have special-cased handling for. There are more
|
||||
// ones possible, but rare.
|
||||
const bool matchingClutFormat =
|
||||
(fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_CLUT16) ||
|
||||
(fb_format == GE_FORMAT_DEPTH16 && entry.format == GE_TFMT_5650) ||
|
||||
(fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT32) ||
|
||||
(fb_format != GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT16) ||
|
||||
(fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT8);
|
||||
(fb_format == GE_FORMAT_8888 && entry.format == GE_TFMT_CLUT8) ||
|
||||
(fb_format == GE_FORMAT_5551 && entry.format == GE_TFMT_CLUT8);
|
||||
|
||||
const int texBitsPerPixel = std::max(1U, (u32)textureBitsPerPixel[entry.format]);
|
||||
const int texBitsPerPixel = TextureFormatBitsPerPixel(entry.format);
|
||||
const int byteOffset = texaddr - addr;
|
||||
if (byteOffset > 0) {
|
||||
matchInfo->yOffset = byteOffset / fb_stride_in_bytes;
|
||||
|
@ -2144,6 +2147,7 @@ void TextureCacheCommon::ApplyTexture() {
|
|||
}
|
||||
}
|
||||
|
||||
// Can we depalettize at all? This refers to both in-fragment-shader depal and "traditional" depal through a separate pass.
|
||||
static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferFormat) {
|
||||
if (IsClutFormat(texFormat)) {
|
||||
switch (bufferFormat) {
|
||||
|
@ -2154,6 +2158,10 @@ static bool CanDepalettize(GETextureFormat texFormat, GEBufferFormat bufferForma
|
|||
if (texFormat == GE_TFMT_CLUT16) {
|
||||
return true;
|
||||
}
|
||||
if (texFormat == GE_TFMT_CLUT8 && bufferFormat == GE_FORMAT_5551) {
|
||||
// Wacky case from issue #16210 (SOCOM etc). Special depal mode (separate depalettize only).
|
||||
return true;
|
||||
}
|
||||
break;
|
||||
case GE_FORMAT_8888:
|
||||
if (texFormat == GE_TFMT_CLUT32 || texFormat == GE_TFMT_CLUT8) { // clut8 takes a special depal mode.
|
||||
|
@ -2213,7 +2221,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
|||
bool useShaderDepal = framebufferManager_->GetCurrentRenderVFB() != framebuffer &&
|
||||
!depth && clutRenderAddress_ == 0xFFFFFFFF &&
|
||||
!gstate_c.curTextureIs3D &&
|
||||
draw_->GetShaderLanguageDesc().bitwiseOps;
|
||||
draw_->GetShaderLanguageDesc().bitwiseOps
|
||||
&& !(texFormat == GE_TFMT_CLUT8 && framebuffer->fb_format == GE_FORMAT_5551); // This special case we don't handle in the shader.
|
||||
|
||||
switch (draw_->GetShaderLanguageDesc().shaderLanguage) {
|
||||
case ShaderLanguage::HLSL_D3D9:
|
||||
|
@ -2292,6 +2301,8 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
|||
if (textureShader) {
|
||||
bool needsDepthXSwizzle = depthUpperBits == 2;
|
||||
|
||||
float depalXScale = 8.0f * (float)BufferFormatBytesPerPixel(framebuffer->fb_format) / (float)TextureFormatBitsPerPixel(texFormat);
|
||||
|
||||
int depalWidth = framebuffer->renderWidth;
|
||||
int texWidth = framebuffer->width;
|
||||
if (needsDepthXSwizzle) {
|
||||
|
@ -2315,13 +2326,13 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
|||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
}
|
||||
|
||||
Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, depalWidth, framebuffer->renderHeight);
|
||||
Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, depalWidth * depalXScale, framebuffer->renderHeight);
|
||||
draw_->BindTexture(0, nullptr);
|
||||
draw_->BindTexture(1, nullptr);
|
||||
draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "Depal");
|
||||
draw_->InvalidateFramebuffer(Draw::FB_INVALIDATION_STORE, Draw::FB_DEPTH_BIT | Draw::FB_STENCIL_BIT);
|
||||
draw_->SetScissorRect(u1, v1, u2 - u1, v2 - v1);
|
||||
Draw::Viewport viewport{ 0.0f, 0.0f, (float)depalWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f };
|
||||
draw_->SetScissorRect(u1 * depalXScale, v1, (u2 - u1) * depalXScale, v2 - v1);
|
||||
Draw::Viewport viewport{ 0.0f, 0.0f, (float)depalWidth * depalXScale, (float)framebuffer->renderHeight, 0.0f, 1.0f };
|
||||
draw_->SetViewport(viewport);
|
||||
|
||||
draw_->BindFramebufferAsTexture(framebuffer->fbo, 0, depth ? Draw::FB_DEPTH_BIT : Draw::FB_COLOR_BIT, Draw::ALL_LAYERS);
|
||||
|
@ -2335,11 +2346,16 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer
|
|||
draw_->BindSamplerStates(0, 1, &nearest);
|
||||
draw_->BindSamplerStates(1, 1, &clutSampler);
|
||||
|
||||
draw2D_->Blit(textureShader, u1, v1, u2, v2, u1, v1, u2, v2, framebuffer->renderWidth, framebuffer->renderHeight, depalWidth, framebuffer->renderHeight, false, framebuffer->renderScaleFactor);
|
||||
// NOTE: We need to "stretch" if depalXScale is wrong,
|
||||
draw2D_->Blit(textureShader, u1, v1, u2, v2, u1, v1, u2, v2,
|
||||
framebuffer->renderWidth, framebuffer->renderHeight, depalWidth, framebuffer->renderHeight,
|
||||
false, framebuffer->renderScaleFactor);
|
||||
|
||||
gpuStats.numDepal++;
|
||||
|
||||
gstate_c.curTextureWidth = texWidth;
|
||||
gstate_c.curTextureWidth = texWidth * depalXScale;
|
||||
gstate_c.curTextureXOffset /= depalXScale * depalXScale; // This state gets wrecked by SetTexture which happens AFTER apply texture! Something is badly wrong. (D3D11).
|
||||
gstate_c.Dirty(DIRTY_TEXCLAMP);
|
||||
|
||||
draw_->BindTexture(0, nullptr);
|
||||
framebufferManager_->RebindFramebuffer("ApplyTextureFramebuffer");
|
||||
|
|
|
@ -48,6 +48,25 @@
|
|||
#define DO_NOT_VECTORIZE_LOOP
|
||||
#endif
|
||||
|
||||
const u8 textureBitsPerPixel[16] = {
|
||||
16, //GE_TFMT_5650,
|
||||
16, //GE_TFMT_5551,
|
||||
16, //GE_TFMT_4444,
|
||||
32, //GE_TFMT_8888,
|
||||
4, //GE_TFMT_CLUT4,
|
||||
8, //GE_TFMT_CLUT8,
|
||||
16, //GE_TFMT_CLUT16,
|
||||
32, //GE_TFMT_CLUT32,
|
||||
4, //GE_TFMT_DXT1,
|
||||
8, //GE_TFMT_DXT3,
|
||||
8, //GE_TFMT_DXT5,
|
||||
0, // INVALID,
|
||||
0, // INVALID,
|
||||
0, // INVALID,
|
||||
0, // INVALID,
|
||||
0, // INVALID,
|
||||
};
|
||||
|
||||
#ifdef _M_SSE
|
||||
|
||||
static u32 QuickTexHashSSE2(const void *checkp, u32 size) {
|
||||
|
|
|
@ -73,27 +73,16 @@ uint32_t GetDXT1Texel(const DXT1Block *src, int x, int y);
|
|||
uint32_t GetDXT3Texel(const DXT3Block *src, int x, int y);
|
||||
uint32_t GetDXT5Texel(const DXT5Block *src, int x, int y);
|
||||
|
||||
static const u8 textureBitsPerPixel[16] = {
|
||||
16, //GE_TFMT_5650,
|
||||
16, //GE_TFMT_5551,
|
||||
16, //GE_TFMT_4444,
|
||||
32, //GE_TFMT_8888,
|
||||
4, //GE_TFMT_CLUT4,
|
||||
8, //GE_TFMT_CLUT8,
|
||||
16, //GE_TFMT_CLUT16,
|
||||
32, //GE_TFMT_CLUT32,
|
||||
4, //GE_TFMT_DXT1,
|
||||
8, //GE_TFMT_DXT3,
|
||||
8, //GE_TFMT_DXT5,
|
||||
0, // INVALID,
|
||||
0, // INVALID,
|
||||
0, // INVALID,
|
||||
0, // INVALID,
|
||||
0, // INVALID,
|
||||
};
|
||||
extern const u8 textureBitsPerPixel[16];
|
||||
|
||||
u32 GetTextureBufw(int level, u32 texaddr, GETextureFormat format);
|
||||
|
||||
// WARNING: Bits not bytes, this is needed due to the presence of 4-bit formats.
|
||||
inline u32 TextureFormatBitsPerPixel(GETextureFormat format) {
|
||||
u32 bits = textureBitsPerPixel[(int)format];
|
||||
return bits != 0 ? bits : 1; // Best to return 1 here to survive divisions in case of invalid data.
|
||||
}
|
||||
|
||||
inline bool AlphaSumIsFull(u32 alphaSum, u32 fullAlphaMask) {
|
||||
return fullAlphaMask != 0 && (alphaSum & fullAlphaMask) == fullAlphaMask;
|
||||
}
|
||||
|
|
2
ext/zstd
2
ext/zstd
|
@ -1 +1 @@
|
|||
Subproject commit 63779c798237346c2b245c546c40b72a5a5913fe
|
||||
Subproject commit 096dccbc2d89a560db0b9892c53ea0c77eff20a1
|
Loading…
Add table
Add a link
Reference in a new issue