GPU: Optimize clip distances needed.

We only need to write one clip distance to clip clamped depth, since we
don't clamp when it needs clipping on both sides.
This commit is contained in:
Unknown W. Brackets 2022-10-05 21:17:17 -07:00
parent 14bf9d1923
commit bc3d3cf9fb
3 changed files with 13 additions and 24 deletions

View file

@ -276,7 +276,6 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu
} else { } else {
const char *clipSuffix0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]"; const char *clipSuffix0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";
const char *clipSuffix1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]"; const char *clipSuffix1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";
const char *clipSuffix2 = compat.shaderLanguage == HLSL_D3D11 ? ".z" : "[2]";
p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster? p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster?
p.C(" vec4 outPos = gl_in[i].gl_Position;\n"); p.C(" vec4 outPos = gl_in[i].gl_Position;\n");
@ -285,8 +284,7 @@ bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLangu
if (clipClampedDepth) { if (clipClampedDepth) {
// Copy the clip distance from the vertex shader. // Copy the clip distance from the vertex shader.
p.F(" gl_ClipDistance%s = gl_in[i].gl_ClipDistance%s;\n", clipSuffix0, clipSuffix0); p.F(" gl_ClipDistance%s = gl_in[i].gl_ClipDistance%s;\n", clipSuffix0, clipSuffix0);
p.F(" gl_ClipDistance%s = gl_in[i].gl_ClipDistance%s;\n", clipSuffix1, clipSuffix1); p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clipSuffix1);
p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clipSuffix2);
} else { } else {
// We shouldn't need to worry about rectangles-as-triangles here, since we don't use geometry shaders for that. // We shouldn't need to worry about rectangles-as-triangles here, since we don't use geometry shaders for that.
p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clipSuffix0); p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clipSuffix0);

View file

@ -231,9 +231,8 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
bool vertexRangeCulling = id.Bit(VS_BIT_VERTEX_RANGE_CULLING) && !isModeThrough; bool vertexRangeCulling = id.Bit(VS_BIT_VERTEX_RANGE_CULLING) && !isModeThrough;
bool clipClampedDepth = !isModeThrough && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE); bool clipClampedDepth = !isModeThrough && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
const char *vertexRangeClipSuffix = "[0]"; const char *clipClampedDepthSuffix = "[0]";
if (vertexRangeCulling && clipClampedDepth) const char *vertexRangeClipSuffix = clipClampedDepth ? "[1]" : "[0]";
vertexRangeClipSuffix = "[2]";
if (compat.shaderLanguage == GLSL_VULKAN) { if (compat.shaderLanguage == GLSL_VULKAN) {
WRITE(p, "\n"); WRITE(p, "\n");
@ -419,12 +418,12 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " vec4 gl_Position : SV_Position;\n"); WRITE(p, " vec4 gl_Position : SV_Position;\n");
bool clipRange = vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE); bool clipRange = vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE);
if (clipClampedDepth && clipRange) { if (clipClampedDepth && clipRange) {
WRITE(p, " float3 gl_ClipDistance : SV_ClipDistance;\n");
vertexRangeClipSuffix = ".z";
} else if (clipClampedDepth) {
WRITE(p, " float2 gl_ClipDistance : SV_ClipDistance;\n"); WRITE(p, " float2 gl_ClipDistance : SV_ClipDistance;\n");
} else if (clipRange) { clipClampedDepthSuffix = ".x";
vertexRangeClipSuffix = ".y";
} else if (clipClampedDepth || clipRange) {
WRITE(p, " float gl_ClipDistance : SV_ClipDistance;\n"); WRITE(p, " float gl_ClipDistance : SV_ClipDistance;\n");
clipClampedDepthSuffix = "";
vertexRangeClipSuffix = ""; vertexRangeClipSuffix = "";
} }
if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) { if (vertexRangeCulling && gstate_c.Supports(GPU_SUPPORTS_CULL_DISTANCE)) {
@ -1267,28 +1266,21 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
} }
if (clipClampedDepth) { if (clipClampedDepth) {
const char *clip0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";
const char *clip1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";
// This should clip against minz, but only when it's above zero. // This should clip against minz, but only when it's above zero.
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) { if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
// On OpenGL/GLES, these values account for the -1 -> 1 range. // On OpenGL/GLES, these values account for the -1 -> 1 range.
WRITE(p, " if (u_depthRange.y - u_depthRange.x >= 1.0) {\n"); WRITE(p, " if (u_depthRange.y - u_depthRange.x >= 1.0) {\n");
WRITE(p, " %sgl_ClipDistance%s = outPos.w + outPos.z;\n", compat.vsOutPrefix, clip0); WRITE(p, " %sgl_ClipDistance%s = outPos.w + outPos.z;\n", compat.vsOutPrefix, clipClampedDepthSuffix);
} else { } else {
// Everywhere else, it's 0 -> 1, simpler. // Everywhere else, it's 0 -> 1, simpler.
WRITE(p, " if (u_depthRange.y >= 1.0) {\n"); WRITE(p, " if (u_depthRange.y >= 1.0) {\n");
WRITE(p, " %sgl_ClipDistance%s = outPos.z;\n", compat.vsOutPrefix, clip0); WRITE(p, " %sgl_ClipDistance%s = outPos.z;\n", compat.vsOutPrefix, clipClampedDepthSuffix);
} }
WRITE(p, " } else {\n");
WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip0);
WRITE(p, " }\n");
// This is similar, but for maxz when it's below 65535.0. -1/0 don't matter here. // This is similar, but for maxz when it's below 65535.0. -1/0 don't matter here.
WRITE(p, " if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n"); WRITE(p, " } else if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n");
WRITE(p, " %sgl_ClipDistance%s = outPos.w - outPos.z;\n", compat.vsOutPrefix, clip1); WRITE(p, " %sgl_ClipDistance%s = outPos.w - outPos.z;\n", compat.vsOutPrefix, clipClampedDepthSuffix);
WRITE(p, " } else {\n"); WRITE(p, " } else {\n");
WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clip1); WRITE(p, " %sgl_ClipDistance%s = 0.0;\n", compat.vsOutPrefix, clipClampedDepthSuffix);
WRITE(p, " }\n"); WRITE(p, " }\n");
} }

View file

@ -199,9 +199,8 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
flags.supportDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0; flags.supportDualSource = (gstate_c.featureFlags & GPU_SUPPORTS_DUALSOURCE_BLEND) != 0;
if (!VSID.Bit(VS_BIT_IS_THROUGH) && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP)) { if (!VSID.Bit(VS_BIT_IS_THROUGH) && gstate_c.Supports(GPU_SUPPORTS_DEPTH_CLAMP)) {
flags.useClipDistance0 = true; flags.useClipDistance0 = true;
flags.useClipDistance1 = true;
if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE))
flags.useClipDistance2 = true; flags.useClipDistance1 = true;
} else if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) { } else if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Supports(GPU_SUPPORTS_CLIP_DISTANCE)) {
flags.useClipDistance0 = true; flags.useClipDistance0 = true;
} }