Merge pull request #6070 from unknownbrackets/gpu-blend

Apply unsupported blending modes in the shader
This commit is contained in:
Henrik Rydgård 2014-05-27 17:45:33 +02:00
commit 8d84c912eb
11 changed files with 246 additions and 34 deletions

View file

@ -145,6 +145,8 @@ ReplaceAlphaType ReplaceAlphaWithStencil() {
if (gstate.isAlphaBlendEnabled()) { if (gstate.isAlphaBlendEnabled()) {
if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) { if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) {
return REPLACE_ALPHA_YES; return REPLACE_ALPHA_YES;
} else if (ShouldUseShaderBlending()) {
return REPLACE_ALPHA_YES;
} else { } else {
if (gl_extensions.ARB_blend_func_extended) { if (gl_extensions.ARB_blend_func_extended) {
return REPLACE_ALPHA_DUALSOURCE; return REPLACE_ALPHA_DUALSOURCE;
@ -277,10 +279,69 @@ static bool CanDoubleSrcBlendMode() {
} }
} }
// TODO: Setting to disable?
bool ShouldUseShaderBlending() {
if (!gstate.isAlphaBlendEnabled()) {
return false;
}
// We can't blit on GLES2, so we don't support it. We also want texelFetch (OpenGL 3.0+ / GLES3+.)
if (!gl_extensions.VersionGEThan(3, 0, 0) && !gl_extensions.GLES3) {
return false;
}
GEBlendSrcFactor funcA = gstate.getBlendFuncA();
GEBlendDstFactor funcB = gstate.getBlendFuncB();
GEBlendMode eq = gstate.getBlendEq();
if (eq == GE_BLENDMODE_ABSDIFF) {
return true;
}
// This normally involves a blit, so try to skip it.
if (AlphaToColorDoubling() || CanDoubleSrcBlendMode()) {
return false;
}
switch (funcA) {
case GE_SRCBLEND_DOUBLESRCALPHA:
case GE_SRCBLEND_DOUBLEINVSRCALPHA:
case GE_SRCBLEND_DOUBLEDSTALPHA:
case GE_SRCBLEND_DOUBLEINVDSTALPHA:
return true;
case GE_SRCBLEND_FIXA:
if (funcB == GE_DSTBLEND_FIXB) {
u32 fixA = gstate.getFixA();
u32 fixB = gstate.getFixB();
// OpenGL only supports one constant color, so check if we could be more exact.
if (fixA != fixB && fixA != 0xFFFFFF - fixB && fixA != 0 && fixB != 0 && fixA != 0xFFFFFF && fixB != 0xFFFFFF) {
return true;
}
}
default:
break;
}
switch (funcB) {
case GE_DSTBLEND_DOUBLESRCALPHA:
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
case GE_DSTBLEND_DOUBLEDSTALPHA:
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
return true;
default:
break;
}
return false;
}
// Here we must take all the bits of the gstate that determine what the fragment shader will // Here we must take all the bits of the gstate that determine what the fragment shader will
// look like, and concatenate them together into an ID. // look like, and concatenate them together into an ID.
void ComputeFragmentShaderID(FragmentShaderID *id) { void ComputeFragmentShaderID(FragmentShaderID *id) {
int id0 = 0; int id0 = 0;
int id1 = 0;
if (gstate.isModeClear()) { if (gstate.isModeClear()) {
// We only need one clear shader, so let's ignore the rest of the bits. // We only need one clear shader, so let's ignore the rest of the bits.
id0 = 1; id0 = 1;
@ -296,7 +357,6 @@ void ComputeFragmentShaderID(FragmentShaderID *id) {
bool enableAlphaDoubling = !alphaToColorDoubling && CanDoubleSrcBlendMode(); bool enableAlphaDoubling = !alphaToColorDoubling && CanDoubleSrcBlendMode();
bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX; bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
bool doTextureAlpha = gstate.isTextureAlphaUsed(); bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool computeAbsdiff = gstate.getBlendEq() == GE_BLENDMODE_ABSDIFF;
ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil(); ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil();
// All texfuncs except replace are the same for RGB as for RGBA with full alpha. // All texfuncs except replace are the same for RGB as for RGBA with full alpha.
@ -338,12 +398,17 @@ void ComputeFragmentShaderID(FragmentShaderID *id) {
else else
gpuStats.numNonAlphaTestedDraws++; gpuStats.numNonAlphaTestedDraws++;
if (computeAbsdiff) { if (ShouldUseShaderBlending()) {
id0 |= (computeAbsdiff & 1) << 25; // 12 bits total.
id1 |= 1;
id1 |= (gstate.getBlendEq() << 1);
id1 |= (gstate.getBlendFuncA() << 4);
id1 |= (gstate.getBlendFuncB() << 8);
} }
} }
id->d[0] = id0; id->d[0] = id0;
id->d[1] = id1;
} }
// Missing: Z depth range // Missing: Z depth range
@ -423,7 +488,6 @@ void GenerateFragmentShader(char *buffer) {
bool enableAlphaDoubling = !alphaToColorDoubling && CanDoubleSrcBlendMode(); bool enableAlphaDoubling = !alphaToColorDoubling && CanDoubleSrcBlendMode();
bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX; bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
bool doTextureAlpha = gstate.isTextureAlphaUsed(); bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool computeAbsdiff = gstate.getBlendEq() == GE_BLENDMODE_ABSDIFF;
ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil(); ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil();
if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE)
@ -431,6 +495,17 @@ void GenerateFragmentShader(char *buffer) {
if (doTexture) if (doTexture)
WRITE(p, "uniform sampler2D tex;\n"); WRITE(p, "uniform sampler2D tex;\n");
if (ShouldUseShaderBlending() && !gstate.isModeClear()) {
if (!gl_extensions.NV_shader_framebuffer_fetch) {
WRITE(p, "uniform sampler2D fbotex;\n");
}
if (gstate.getBlendFuncA() == GE_SRCBLEND_FIXA) {
WRITE(p, "uniform vec3 u_blendFixA;\n");
}
if (gstate.getBlendFuncB() == GE_DSTBLEND_FIXB) {
WRITE(p, "uniform vec3 u_blendFixB;\n");
}
}
if (enableAlphaTest || enableColorTest) { if (enableAlphaTest || enableColorTest) {
WRITE(p, "uniform vec4 u_alphacolorref;\n"); WRITE(p, "uniform vec4 u_alphacolorref;\n");
@ -607,12 +682,76 @@ void GenerateFragmentShader(char *buffer) {
WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n"); WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
// WRITE(p, " v.x = v_depth;\n"); // WRITE(p, " v.x = v_depth;\n");
} }
if (ShouldUseShaderBlending()) {
// If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit.
// We can just read the prev value more directly.
// TODO: EXT_shader_framebuffer_fetch on iOS 6, possibly others.
if (gl_extensions.NV_shader_framebuffer_fetch) {
WRITE(p, " lowp vec4 destColor = gl_LastFragData[0];\n");
} else {
WRITE(p, " lowp vec4 destColor = texelFetch(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n");
} }
// Handle ABSDIFF blending mode using NV_shader_framebuffer_fetch GEBlendSrcFactor funcA = gstate.getBlendFuncA();
if (computeAbsdiff && gl_extensions.NV_shader_framebuffer_fetch) { GEBlendDstFactor funcB = gstate.getBlendFuncB();
WRITE(p, " lowp vec4 destColor = gl_LastFragData[0];\n"); GEBlendMode eq = gstate.getBlendEq();
WRITE(p, " gl_FragColor = abs(destColor - v);\n");
const char *srcFactor = "vec3(1.0)";
const char *dstFactor = "vec3(0.0)";
switch (funcA)
{
case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break;
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(vec3(1.0) - destColor.rgb)"; break;
case GE_SRCBLEND_SRCALPHA: srcFactor = "vec3(v.a)"; break;
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "vec3(1.0 - v.a)"; break;
case GE_SRCBLEND_DSTALPHA: srcFactor = "vec3(destColor.a)"; break;
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a)"; break;
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "vec3(v.a * 2.0)"; break;
// TODO: Double inverse, or inverse double? Following softgpu for now...
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "vec3(1.0 - v.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "vec3(destColor.a * 2.0)"; break;
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "vec3(1.0 - destColor.a * 2.0)"; break;
case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break;
}
switch (funcB)
{
case GE_DSTBLEND_SRCCOLOR: dstFactor = "v.rgb"; break;
case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(vec3(1.0) - v.rgb)"; break;
case GE_DSTBLEND_SRCALPHA: dstFactor = "vec3(v.a)"; break;
case GE_DSTBLEND_INVSRCALPHA: dstFactor = "vec3(1.0 - v.a)"; break;
case GE_DSTBLEND_DSTALPHA: dstFactor = "vec3(destColor.a)"; break;
case GE_DSTBLEND_INVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a)"; break;
case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "vec3(v.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "vec3(1.0 - v.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "vec3(destColor.a * 2.0)"; break;
case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "vec3(1.0 - destColor.a * 2.0)"; break;
case GE_DSTBLEND_FIXB: dstFactor = "u_blendFixB"; break;
}
switch (eq)
{
case GE_BLENDMODE_MUL_AND_ADD:
WRITE(p, " v.rgb = v.rgb * %s + destColor.rgb * %s;\n", srcFactor, dstFactor);
break;
case GE_BLENDMODE_MUL_AND_SUBTRACT:
WRITE(p, " v.rgb = v.rgb * %s - destColor.rgb * %s;\n", srcFactor, dstFactor);
break;
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
WRITE(p, " v.rgb = destColor.rgb * %s - v.rgb * %s;\n", srcFactor, dstFactor);
break;
case GE_BLENDMODE_MIN:
WRITE(p, " v.rgb = min(v.rgb, destColor.rgb);\n");
break;
case GE_BLENDMODE_MAX:
WRITE(p, " v.rgb = max(v.rgb, destColor.rgb);\n");
break;
case GE_BLENDMODE_ABSDIFF:
WRITE(p, " v.rgb = abs(v.rgb - destColor.rgb);\n");
break;
}
}
} }
switch (stencilToAlpha) { switch (stencilToAlpha) {

View file

@ -20,9 +20,9 @@
#include "Globals.h" #include "Globals.h"
struct FragmentShaderID { struct FragmentShaderID {
FragmentShaderID() {d[0] = 0xFFFFFFFF;} FragmentShaderID() {clear();}
void clear() {d[0] = 0xFFFFFFFF;} void clear() {d[0] = 0xFFFFFFFF; d[1] = 0xFFFFFFFF;}
u32 d[1]; u32 d[2];
bool operator < (const FragmentShaderID &other) const { bool operator < (const FragmentShaderID &other) const {
for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) { for (size_t i = 0; i < sizeof(d) / sizeof(u32); i++) {
if (d[i] < other.d[i]) if (d[i] < other.d[i])
@ -62,4 +62,4 @@ bool IsAlphaTestTriviallyTrue();
bool IsColorTestTriviallyTrue(); bool IsColorTestTriviallyTrue();
StencilValueType ReplaceAlphaWithStencilType(); StencilValueType ReplaceAlphaWithStencilType();
ReplaceAlphaType ReplaceAlphaWithStencil(); ReplaceAlphaType ReplaceAlphaWithStencil();
bool ShouldUseShaderBlending();

View file

@ -784,7 +784,7 @@ void FramebufferManager::DoSetRenderFrameBuffer() {
// None found? Create one. // None found? Create one.
if (!vfb) { if (!vfb) {
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; textureCache_->ForgetLastTexture();
vfb = new VirtualFramebuffer(); vfb = new VirtualFramebuffer();
vfb->fbo = 0; vfb->fbo = 0;
vfb->fb_address = fb_address; vfb->fb_address = fb_address;
@ -891,7 +891,7 @@ void FramebufferManager::DoSetRenderFrameBuffer() {
// Use it as a render target. // Use it as a render target.
DEBUG_LOG(SCEGE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format); DEBUG_LOG(SCEGE, "Switching render target to FBO for %08x: %i x %i x %i ", vfb->fb_address, vfb->width, vfb->height, vfb->format);
vfb->usageFlags |= FB_USAGE_RENDERTARGET; vfb->usageFlags |= FB_USAGE_RENDERTARGET;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; textureCache_->ForgetLastTexture();
vfb->last_frame_render = gpuStats.numFlips; vfb->last_frame_render = gpuStats.numFlips;
frameLastFramebufUsed = gpuStats.numFlips; frameLastFramebufUsed = gpuStats.numFlips;
vfb->dirtyAfterDisplay = true; vfb->dirtyAfterDisplay = true;
@ -992,6 +992,7 @@ void FramebufferManager::BindFramebufferDepth(VirtualFramebuffer *sourceframebuf
// Let's only do this if not clearing. // Let's only do this if not clearing.
if (!gstate.isModeClear() || !gstate.isClearModeDepthMask()) { if (!gstate.isModeClear() || !gstate.isClearModeDepthMask()) {
fbo_bind_for_read(sourceframebuffer->fbo); fbo_bind_for_read(sourceframebuffer->fbo);
glDisable(GL_SCISSOR_TEST);
#if defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) // We only support this extension on Android, it's not even available on PC. #if defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) // We only support this extension on Android, it's not even available on PC.
if (useNV) { if (useNV) {
@ -1000,6 +1001,8 @@ void FramebufferManager::BindFramebufferDepth(VirtualFramebuffer *sourceframebuf
#endif // defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) #endif // defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY))
glBlitFramebuffer(0, 0, sourceframebuffer->renderWidth, sourceframebuffer->renderHeight, 0, 0, targetframebuffer->renderWidth, targetframebuffer->renderHeight, GL_DEPTH_BUFFER_BIT, GL_NEAREST); glBlitFramebuffer(0, 0, sourceframebuffer->renderWidth, sourceframebuffer->renderHeight, 0, 0, targetframebuffer->renderWidth, targetframebuffer->renderHeight, GL_DEPTH_BUFFER_BIT, GL_NEAREST);
// If we set targetframebuffer->depthUpdated here, our optimization above would be pointless. // If we set targetframebuffer->depthUpdated here, our optimization above would be pointless.
glstate.scissorTest.restore();
} }
#endif #endif
} }
@ -1007,6 +1010,10 @@ void FramebufferManager::BindFramebufferDepth(VirtualFramebuffer *sourceframebuf
} }
void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) { void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) {
if (framebuffer == NULL) {
framebuffer = currentRenderVfb_;
}
if (!framebuffer->fbo || !useBufferedRendering_) { if (!framebuffer->fbo || !useBufferedRendering_) {
glBindTexture(GL_TEXTURE_2D, 0); glBindTexture(GL_TEXTURE_2D, 0);
gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE; gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE;
@ -1041,6 +1048,7 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) {
fbo_bind_as_render_target(renderCopy); fbo_bind_as_render_target(renderCopy);
glViewport(0, 0, framebuffer->renderWidth, framebuffer->renderHeight); glViewport(0, 0, framebuffer->renderWidth, framebuffer->renderHeight);
glDisable(GL_SCISSOR_TEST);
fbo_bind_for_read(framebuffer->fbo); fbo_bind_for_read(framebuffer->fbo);
#if defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) // We only support this extension on Android, it's not even available on PC. #if defined(USING_GLES2) && (defined(ANDROID) || defined(BLACKBERRY)) // We only support this extension on Android, it's not even available on PC.
@ -1052,6 +1060,8 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer) {
fbo_bind_as_render_target(currentRenderVfb_->fbo); fbo_bind_as_render_target(currentRenderVfb_->fbo);
fbo_bind_color_as_texture(renderCopy, 0); fbo_bind_color_as_texture(renderCopy, 0);
glstate.viewport.restore();
glstate.scissorTest.restore();
#endif #endif
} else { } else {
fbo_bind_color_as_texture(framebuffer->fbo, 0); fbo_bind_color_as_texture(framebuffer->fbo, 0);
@ -1238,7 +1248,7 @@ void FramebufferManager::ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool s
glEnable(GL_DITHER); glEnable(GL_DITHER);
} else { } else {
nvfb->usageFlags |= FB_USAGE_RENDERTARGET; nvfb->usageFlags |= FB_USAGE_RENDERTARGET;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY; textureCache_->ForgetLastTexture();
nvfb->last_frame_render = gpuStats.numFlips; nvfb->last_frame_render = gpuStats.numFlips;
nvfb->dirtyAfterDisplay = true; nvfb->dirtyAfterDisplay = true;
@ -1317,7 +1327,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int
} }
fbo_bind_as_render_target(dst->fbo); fbo_bind_as_render_target(dst->fbo);
glDisable(GL_SCISSOR_TEST);
#ifndef USING_GLES2 #ifndef USING_GLES2
if (gl_extensions.FBO_ARB) { if (gl_extensions.FBO_ARB) {
@ -1373,7 +1383,7 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int
// Make sure our 2D drawing program is ready. Compiles only if not already compiled. // Make sure our 2D drawing program is ready. Compiles only if not already compiled.
CompileDraw2DProgram(); CompileDraw2DProgram();
glstate.viewport.set(0, 0, dst->width, dst->height); glViewport(0, 0, dst->width, dst->height);
DisableState(); DisableState();
// The first four coordinates are relative to the 6th and 7th arguments of DrawActiveTexture. // The first four coordinates are relative to the 6th and 7th arguments of DrawActiveTexture.
@ -1382,8 +1392,11 @@ void FramebufferManager::BlitFramebuffer_(VirtualFramebuffer *dst, int dstX, int
float srcH = src->height; float srcH = src->height;
DrawActiveTexture(0, dstX, dstY, w, h, dst->width, dst->height, false, srcX / srcW, srcY / srcH, (srcX + w) / srcW, (srcY + h) / srcH, draw2dprogram_); DrawActiveTexture(0, dstX, dstY, w, h, dst->width, dst->height, false, srcX / srcW, srcY / srcH, (srcX + w) / srcW, (srcY + h) / srcH, draw2dprogram_);
glBindTexture(GL_TEXTURE_2D, 0); glBindTexture(GL_TEXTURE_2D, 0);
textureCache_->ForgetLastTexture();
} }
glstate.scissorTest.restore();
glstate.viewport.restore();
fbo_unbind(); fbo_unbind();
} }
@ -1911,7 +1924,7 @@ bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool
fbo_unbind(); fbo_unbind();
} }
glstate.viewport.restore(); glstate.viewport.restore();
gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; textureCache_->ForgetLastTexture();
// This is a memcpy, let's still copy just in case. // This is a memcpy, let's still copy just in case.
return false; return false;
} }
@ -2045,7 +2058,7 @@ void FramebufferManager::NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride,
fbo_unbind(); fbo_unbind();
} }
glstate.viewport.restore(); glstate.viewport.restore();
gstate_c.textureChanged = TEXCHANGE_PARAMSONLY; textureCache_->ForgetLastTexture();
} }
} }
} }

View file

@ -162,8 +162,8 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_STENCILTESTENABLE, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_STENCILTESTENABLE, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_ALPHABLENDENABLE, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_ALPHABLENDENABLE, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_BLENDMODE, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_BLENDMODE, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_BlendFixA},
{GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_BlendFixB},
{GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE},
@ -1084,6 +1084,14 @@ void GLES_GPU::Execute_ColorRef(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF); shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
} }
void GLES_GPU::Execute_BlendFixA(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_BLENDFIX);
}
void GLES_GPU::Execute_BlendFixB(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_BLENDFIX);
}
void GLES_GPU::Execute_WorldMtxNum(u32 op, u32 diff) { void GLES_GPU::Execute_WorldMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_WORLDMATRIXDATA. // This is almost always followed by GE_CMD_WORLDMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointer(currentList->pc + 4); const u32_le *src = (const u32_le *)Memory::GetPointer(currentList->pc + 4);
@ -1611,8 +1619,14 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////
case GE_CMD_ALPHABLENDENABLE: case GE_CMD_ALPHABLENDENABLE:
case GE_CMD_BLENDMODE: case GE_CMD_BLENDMODE:
break;
case GE_CMD_BLENDFIXEDA: case GE_CMD_BLENDFIXEDA:
Execute_BlendFixA(op, diff);
break;
case GE_CMD_BLENDFIXEDB: case GE_CMD_BLENDFIXEDB:
Execute_BlendFixB(op, diff);
break; break;
case GE_CMD_ALPHATESTENABLE: case GE_CMD_ALPHATESTENABLE:

View file

@ -124,6 +124,8 @@ public:
void Execute_AlphaTest(u32 op, u32 diff); void Execute_AlphaTest(u32 op, u32 diff);
void Execute_StencilTest(u32 op, u32 diff); void Execute_StencilTest(u32 op, u32 diff);
void Execute_ColorRef(u32 op, u32 diff); void Execute_ColorRef(u32 op, u32 diff);
void Execute_BlendFixA(u32 op, u32 diff);
void Execute_BlendFixB(u32 op, u32 diff);
void Execute_WorldMtxNum(u32 op, u32 diff); void Execute_WorldMtxNum(u32 op, u32 diff);
void Execute_WorldMtxData(u32 op, u32 diff); void Execute_WorldMtxData(u32 op, u32 diff);
void Execute_ViewMtxNum(u32 op, u32 diff); void Execute_ViewMtxNum(u32 op, u32 diff);

View file

@ -155,6 +155,10 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans
u_colormask = glGetUniformLocation(program, "u_colormask"); u_colormask = glGetUniformLocation(program, "u_colormask");
u_stencilReplaceValue = glGetUniformLocation(program, "u_stencilReplaceValue"); u_stencilReplaceValue = glGetUniformLocation(program, "u_stencilReplaceValue");
u_fbotex = glGetUniformLocation(program, "fbotex");
u_blendFixA = glGetUniformLocation(program, "u_blendFixA");
u_blendFixB = glGetUniformLocation(program, "u_blendFixB");
// Transform // Transform
u_view = glGetUniformLocation(program, "u_view"); u_view = glGetUniformLocation(program, "u_view");
u_world = glGetUniformLocation(program, "u_world"); u_world = glGetUniformLocation(program, "u_world");
@ -225,6 +229,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans
if (u_view != -1) availableUniforms |= DIRTY_VIEWMATRIX; if (u_view != -1) availableUniforms |= DIRTY_VIEWMATRIX;
if (u_texmtx != -1) availableUniforms |= DIRTY_TEXMATRIX; if (u_texmtx != -1) availableUniforms |= DIRTY_TEXMATRIX;
if (u_stencilReplaceValue != -1) availableUniforms |= DIRTY_STENCILREPLACEVALUE; if (u_stencilReplaceValue != -1) availableUniforms |= DIRTY_STENCILREPLACEVALUE;
if (u_blendFixA != -1 || u_blendFixB != -1) availableUniforms |= DIRTY_BLENDFIX;
// Looping up to numBones lets us avoid checking u_bone[i] // Looping up to numBones lets us avoid checking u_bone[i]
for (int i = 0; i < numBones; i++) { for (int i = 0; i < numBones; i++) {
@ -247,6 +252,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans
// Default uniform values // Default uniform values
glUniform1i(u_tex, 0); glUniform1i(u_tex, 0);
glUniform1i(u_fbotex, 1);
// The rest, use the "dirty" mechanism. // The rest, use the "dirty" mechanism.
dirtyUniforms = DIRTY_ALL; dirtyUniforms = DIRTY_ALL;
use(vertType, previous); use(vertType, previous);
@ -520,6 +526,11 @@ void LinkedShader::UpdateUniforms(u32 vertType) {
} }
#endif #endif
if (dirty & DIRTY_BLENDFIX) {
SetColorUniform3(u_blendFixA, gstate.getFixA());
SetColorUniform3(u_blendFixB, gstate.getFixB());
}
// Lighting // Lighting
if (dirty & DIRTY_AMBIENT) { if (dirty & DIRTY_AMBIENT) {
SetColorUniform3Alpha(u_ambient, gstate.ambientcolor, gstate.getAmbientA()); SetColorUniform3Alpha(u_ambient, gstate.ambientcolor, gstate.getAmbientA());

View file

@ -73,6 +73,11 @@ public:
#endif #endif
int numBones; int numBones;
// Shader blending.
int u_fbotex;
int u_blendFixA;
int u_blendFixB;
// Fragment processing inputs // Fragment processing inputs
int u_alphacolorref; int u_alphacolorref;
int u_colormask; int u_colormask;
@ -123,7 +128,7 @@ enum
DIRTY_AMBIENT = (1 << 15), DIRTY_AMBIENT = (1 << 15),
DIRTY_MATAMBIENTALPHA = (1 << 16), DIRTY_MATAMBIENTALPHA = (1 << 16),
// 1 << 17 is free! DIRTY_BLENDFIX = (1 << 17), // (either one.)
DIRTY_UVSCALEOFFSET = (1 << 18), // this will be dirtied ALL THE TIME... maybe we'll need to do "last value with this shader compares" DIRTY_UVSCALEOFFSET = (1 << 18), // this will be dirtied ALL THE TIME... maybe we'll need to do "last value with this shader compares"

View file

@ -171,11 +171,36 @@ void TransformDrawEngine::ApplyDrawState(int prim) {
gstate_c.textureChanged = TEXCHANGE_UNCHANGED; gstate_c.textureChanged = TEXCHANGE_UNCHANGED;
} }
// TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a // Set blend - unless we need to do it in the shader.
// single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily.
// Set blend
bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled(); bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled();
if (wantBlend && ShouldUseShaderBlending()) {
if (!gl_extensions.NV_shader_framebuffer_fetch) {
static const int MAX_REASONABLE_BLITS_PER_FRAME = 24;
static int lastFrameBlit = -1;
static int blitsThisFrame = 0;
if (lastFrameBlit != gpuStats.numFlips) {
if (blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME) {
WARN_LOG_REPORT_ONCE(blendingBlit, G3D, "Lots of blits needed for obscure blending: %d per frame, blend %d/%d/%d", blitsThisFrame, gstate.getBlendFuncA(), gstate.getBlendFuncB(), gstate.getBlendEq());
}
blitsThisFrame = 0;
lastFrameBlit = gpuStats.numFlips;
}
++blitsThisFrame;
glActiveTexture(GL_TEXTURE1);
framebufferManager_->BindFramebufferColor(NULL);
glActiveTexture(GL_TEXTURE0);
fboTexBound_ = true;
}
// None of the below logic is interesting, we're gonna do it entirely in the shader.
wantBlend = false;
} else if (fboTexBound_) {
glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0);
}
glstate.blend.set(wantBlend); glstate.blend.set(wantBlend);
if (wantBlend) { if (wantBlend) {
// This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop. // This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop.
@ -323,12 +348,7 @@ void TransformDrawEngine::ApplyDrawState(int prim) {
} }
if (((blendFuncEq >= GE_BLENDMODE_MIN) && gl_extensions.EXT_blend_minmax) || gl_extensions.GLES3) { if (((blendFuncEq >= GE_BLENDMODE_MIN) && gl_extensions.EXT_blend_minmax) || gl_extensions.GLES3) {
if (blendFuncEq == GE_BLENDMODE_ABSDIFF && gl_extensions.NV_shader_framebuffer_fetch) {
// Handle GE_BLENDMODE_ABSDIFF in fragment shader and turn off regular alpha blending here.
glstate.blend.set(false);
} else {
glstate.blendEquation.set(eqLookup[blendFuncEq]); glstate.blendEquation.set(eqLookup[blendFuncEq]);
}
} else { } else {
glstate.blendEquation.set(eqLookupNoMinMax[blendFuncEq]); glstate.blendEquation.set(eqLookupNoMinMax[blendFuncEq]);
} }

View file

@ -65,6 +65,11 @@ public:
return cache.size(); return cache.size();
} }
void ForgetLastTexture() {
lastBoundTexture = -1;
gstate_c.textureChanged |= TEXCHANGE_PARAMSONLY;
}
// Only used by Qt UI? // Only used by Qt UI?
bool DecodeTexture(u8 *output, GPUgstate state); bool DecodeTexture(u8 *output, GPUgstate state);

View file

@ -126,7 +126,8 @@ TransformDrawEngine::TransformDrawEngine()
numDrawCalls(0), numDrawCalls(0),
vertexCountInDrawCalls(0), vertexCountInDrawCalls(0),
decodeCounter_(0), decodeCounter_(0),
uvScale(0) { uvScale(0),
fboTexBound_(false) {
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
// Allocate nicely aligned memory. Maybe graphics drivers will // Allocate nicely aligned memory. Maybe graphics drivers will
// appreciate it. // appreciate it.

View file

@ -243,4 +243,6 @@ private:
u32 dcid_; u32 dcid_;
UVScale *uvScale; UVScale *uvScale;
bool fboTexBound_;
}; };