From 9c0489dd5e312d63e7f0add617c18af47631579c Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 23 May 2018 11:39:11 +0200 Subject: [PATCH 01/65] WIP --- core/rend/gles/gldraw.cpp | 113 +++++++++++++++++++++++++++++--------- core/rend/gles/gles.cpp | 34 +++++++++++- core/rend/gles/gles.h | 4 ++ 3 files changed, 124 insertions(+), 27 deletions(-) diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 7565371b5..7b94ba2e9 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -73,6 +73,8 @@ extern int screen_height; PipelineShader* CurrentShader; u32 gcflip; +GLuint fbo; +GLuint stencilTexId; s32 SetTileClip(u32 val, bool set) { @@ -162,6 +164,20 @@ __forceinline if (CurrentShader->program == -1) CompilePipelineShader(CurrentShader); glcache.UseProgram(CurrentShader->program); + + if (Type == ListType_Opaque) + { + // FIXME Must be done for drawing pass only and only for opaque (and pt?) + glActiveTexture(GL_TEXTURE1); + glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); + GLint uniform = glGetUniformLocation(CurrentShader->program, "shadow_stencil"); + if (uniform != -1) + { + glUniform1i(uniform, 1); glCheck(); + } + glActiveTexture(GL_TEXTURE0); + } + SetTileClip(gp->tileclip,true); //This bit control which pixels are affected @@ -921,8 +937,8 @@ void DrawModVols(int first, int count) SetupModvolVBO(); - glcache.Enable(GL_BLEND); - glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); +// glcache.Enable(GL_BLEND); +// glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); glcache.UseProgram(gl.modvol_shader.program); glUniform1f(gl.modvol_shader.sp_ShaderColor,0.5f); @@ -1021,29 +1037,26 @@ void DrawModVols(int first, int count) } } //disable culling - SetCull(0); - //enable color writes - glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE); - - //black out any stencil with '1' - glcache.Enable(GL_BLEND); - glcache.BlendFunc(GL_SRC_ALPHA,GL_ONE_MINUS_SRC_ALPHA); - - glcache.Enable(GL_STENCIL_TEST); - glcache.StencilFunc(GL_EQUAL,0x81,0x81); //only pixels that are Modvol enabled, and in area 1 - - //clear the stencil result bit - glcache.StencilMask(0x3); //write to lsb - glcache.StencilOp(GL_ZERO,GL_ZERO,GL_ZERO); - - //don't do depth testing - glcache.Disable(GL_DEPTH_TEST); +// SetCull(0); +// //enable color writes +// glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE); +// +// //black out any stencil with '1' +// glcache.Enable(GL_BLEND); +// glcache.BlendFunc(GL_SRC_ALPHA,GL_ONE_MINUS_SRC_ALPHA); +// +// glcache.Enable(GL_STENCIL_TEST); +// glcache.StencilFunc(GL_EQUAL,0x81,0x81); //only pixels that are Modvol enabled, and in area 1 +// +// //clear the stencil result bit +// glcache.StencilMask(0x3); //write to lsb +// glcache.StencilOp(GL_ZERO,GL_ZERO,GL_ZERO); +// +// //don't do depth testing +// glcache.Disable(GL_DEPTH_TEST); SetupMainVBO(); - glDrawArrays(GL_TRIANGLE_STRIP,0,4); - - //Draw and blend - //glDrawArrays(GL_TRIANGLES,pvrrc.modtrig.used(),2); +// glDrawArrays(GL_TRIANGLE_STRIP,0,4); } @@ -1053,6 +1066,42 @@ void DrawModVols(int first, int count) void DrawStrips() { + if (fbo == 0) + { + glGenFramebuffers(1, &fbo); + glBindFramebuffer(GL_FRAMEBUFFER, fbo); // Bind framebuffer 0 to use the system fb + + stencilTexId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); glCheck(); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, screen_width, screen_height, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL); glCheck(); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencilTexId, 0); glCheck(); + +// GLuint colortexid = glcache.GenTexture(); +// glcache.BindTexture(GL_TEXTURE_2D, colortexid); +// +// glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screen_width, screen_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); glCheck(); +// glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colortexid, 0); glCheck(); + + GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); + + verify(uStatus == GL_FRAMEBUFFER_COMPLETE); + } + else + { + glBindFramebuffer(GL_FRAMEBUFFER, fbo); + if (stencilTexId == 0) + { + stencilTexId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); glCheck(); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, screen_width, screen_height, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL); glCheck(); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencilTexId, 0); glCheck(); + } + glcache.Disable(GL_SCISSOR_TEST); + glcache.DepthMask(GL_TRUE); + glStencilMask(0xFF); glCheck(); + glClear(GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glCheck(); + } + SetupMainVBO(); //Draw the strips ! @@ -1067,12 +1116,26 @@ void DrawStrips() glcache.Enable(GL_DEPTH_TEST); glcache.DepthMask(GL_TRUE); - //Opaque - DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count); + // Do a first pass on the depth+stencil buffer + glBindFramebuffer(GL_FRAMEBUFFER, fbo); + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + + DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count); // Modifier volumes DrawModVols(previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); + glBindFramebuffer(GL_FRAMEBUFFER, 0); glCheck(); + glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); glCheck(); + glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); glCheck(); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glCheck(); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glCheck(); + + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + //Opaque + DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count); + //Alpha tested DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 7e1f3ea82..6d278f148 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -193,7 +193,10 @@ uniform lowp vec4 pp_ClipTest; \n\ uniform lowp vec3 sp_FOG_COL_RAM,sp_FOG_COL_VERT; \n\ uniform highp vec2 sp_LOG_FOG_COEFS; \n\ uniform highp float sp_FOG_DENSITY; \n\ +uniform highp float shade_scale_factor; \n\ +uniform highp vec2 screen_size; \n\ uniform sampler2D tex,fog_table; \n\ +uniform usampler2D shadow_stencil; \n\ /* Vertex input*/ \n\ " vary " lowp vec4 vtx_base; \n\ " vary " lowp vec4 vtx_offs; \n\ @@ -267,6 +270,10 @@ void main() \n\ #endif\n\ } \n\ #endif\n\ + //uvec4 stencil = texture(shadow_stencil, vec2(gl_FragCoord.x / 1280, gl_FragCoord.y / 960)); \n\ + uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / screen_size); \n\ + if (stencil.r == uint(0x81)) \n\ + color.rgb *= shade_scale_factor; \n\ #if pp_FogCtrl==0 \n\ { \n\ color.rgb=mix(color.rgb,sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ @@ -661,6 +668,12 @@ struct ShaderUniforms_t if (s->sp_LOG_FOG_COEFS!=-1) glUniform2fv(s->sp_LOG_FOG_COEFS,1, fog_coefs); + + if (s->screen_size != -1) + glUniform2f(s->screen_size, (float)screen_width, (float)screen_height); + + if (s->shade_scale_factor != -1) + glUniform1f(s->shade_scale_factor, FPU_SHAD_SCALE.scale_factor / 256.f); } } ShaderUniforms; @@ -807,7 +820,8 @@ bool CompilePipelineShader( PipelineShader* s) s->sp_FOG_COL_RAM=-1; s->sp_LOG_FOG_COEFS=-1; } - + s->screen_size = glGetUniformLocation(s->program, "screen_size"); + s->shade_scale_factor = glGetUniformLocation(s->program, "shade_scale_factor"); ShaderUniforms.Set(s); @@ -1411,6 +1425,12 @@ bool ProcessFrame(TA_context* ctx) bool RenderFrame() { + static int old_screen_width, old_screen_height; + if (screen_width != old_screen_width || screen_height != old_screen_height) { + rend_resize(screen_width, screen_height); + old_screen_width = screen_width; + old_screen_height = screen_height; + } DoCleanup(); bool is_rtt=pvrrc.isRTT; @@ -1824,7 +1844,17 @@ void rend_set_fb_scale(float x,float y) struct glesrend : Renderer { bool Init() { return gles_init(); } - void Resize(int w, int h) { screen_width=w; screen_height=h; } + void Resize(int w, int h) + { + // FIXME Not called :( + screen_width=w; + screen_height=h; + if (stencilTexId != 0) + { + glcache.DeleteTextures(1, &stencilTexId); + stencilTexId = 0; + } + } void Term() { } bool Process(TA_context* ctx) { return ProcessFrame(ctx); } diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 36688a5bb..3d39d5aab 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -55,6 +55,8 @@ struct PipelineShader GLuint scale,depth_scale; GLuint pp_ClipTest,cp_AlphaTestValue; GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY,sp_LOG_FOG_COEFS; + GLuint shade_scale_factor; + GLuint screen_size; // u32 cp_AlphaTest; s32 pp_ClipTestMode; @@ -126,3 +128,5 @@ int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, bool CompilePipelineShader(PipelineShader* s); #define TEXTURE_LOAD_ERROR 0 GLuint loadPNG(const string& subpath, int &width, int &height); + +extern GLuint stencilTexId; From a858eb6a117af86801ce0d757f968071518ef322 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 23 May 2018 15:16:26 +0200 Subject: [PATCH 02/65] No need to redraw modvols --- core/rend/gles/gldraw.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index ce4f0760e..6e4e8cde8 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -1139,9 +1139,6 @@ void DrawStrips() //Alpha tested DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count); - // Modifier volumes - DrawModVols(previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); - //Alpha blended { if (pvrrc.isAutoSort) From 1c24ae2c31b6b7c9419b2caac57705b4126d0f39 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 26 May 2018 10:37:36 +0200 Subject: [PATCH 03/65] WIP experimental Average Colors and Depth Peeling renderers --- core/hw/pvr/ta.h | 6 +- core/rend/gles/glcache.h | 25 +- core/rend/gles/gldraw.cpp | 184 +++++++----- core/rend/gles/gles.cpp | 158 ++++++----- core/rend/gles/gles.h | 73 ++++- core/rend/gles/render_tr.cpp | 527 +++++++++++++++++++++++++++++++++++ 6 files changed, 822 insertions(+), 151 deletions(-) create mode 100644 core/rend/gles/render_tr.cpp diff --git a/core/hw/pvr/ta.h b/core/hw/pvr/ta.h index 29b7c6d39..bbbd8e17b 100644 --- a/core/hw/pvr/ta.h +++ b/core/hw/pvr/ta.h @@ -22,11 +22,11 @@ void ta_vtx_data(u32* data, u32 size); bool ta_parse_vdrc(TA_context* ctx); -#define STRIPS_AS_PPARAMS 1 -#define TRIG_SORT 1 +#define STRIPS_AS_PPARAMS 0 +#define TRIG_SORT 0 #if TRIG_SORT #undef STRIPS_AS_PPARAMS #define STRIPS_AS_PPARAMS 1 -#endif \ No newline at end of file +#endif diff --git a/core/rend/gles/glcache.h b/core/rend/gles/glcache.h index b72bd61fa..3d1abcad5 100644 --- a/core/rend/gles/glcache.h +++ b/core/rend/gles/glcache.h @@ -9,7 +9,7 @@ public: GLCache() { Reset(); } void BindTexture(GLenum target, GLuint texture) { - if (target == GL_TEXTURE_2D && texture != _texture) { + if ((target == GL_TEXTURE_2D && texture != _texture && !_disable_cache) || _disable_cache) { glBindTexture(target, texture); _texture = texture; } @@ -18,7 +18,7 @@ public: } void BlendFunc(GLenum sfactor, GLenum dfactor) { - if (sfactor != _src_blend_factor || dfactor != _dst_blend_factor) { + if (sfactor != _src_blend_factor || dfactor != _dst_blend_factor || _disable_cache) { _src_blend_factor = sfactor; _dst_blend_factor = dfactor; glBlendFunc(sfactor, dfactor); @@ -26,7 +26,7 @@ public: } void ClearColor(GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha) { - if (red != _clear_r || green != _clear_g || blue != _clear_b || alpha != _clear_a) { + if (red != _clear_r || green != _clear_g || blue != _clear_b || alpha != _clear_a || _disable_cache) { _clear_r = red; _clear_g = green; _clear_b = blue; @@ -36,7 +36,7 @@ public: } void CullFace(GLenum mode) { - if (mode != _cull_face) { + if (mode != _cull_face || _disable_cache) { _cull_face = mode; glCullFace(mode); } @@ -52,14 +52,14 @@ public: } void DepthFunc(GLenum func) { - if (func != _depth_func) { + if (func != _depth_func || _disable_cache) { _depth_func = func; glDepthFunc(func); } } void DepthMask(GLboolean flag) { - if (flag != _depth_mask) { + if (flag != _depth_mask || _disable_cache) { _depth_mask = flag; glDepthMask(flag); } @@ -74,14 +74,14 @@ public: } void UseProgram(GLuint program) { - if (program != _program) { + if (program != _program || _disable_cache) { _program = program; glUseProgram(program); } } void StencilFunc(GLenum func, GLint ref, GLuint mask) { - if (_stencil_func != func || _stencil_ref != ref || _stencil_fmask != mask) { + if (_stencil_func != func || _stencil_ref != ref || _stencil_fmask != mask || _disable_cache) { _stencil_func = func; _stencil_ref = ref; _stencil_fmask = mask; @@ -90,7 +90,7 @@ public: } void StencilOp(GLenum sfail, GLenum dpfail, GLenum dppass) { - if (_stencil_sfail != sfail ||_stencil_dpfail != dpfail || _stencil_dppass != dppass) { + if (_stencil_sfail != sfail ||_stencil_dpfail != dpfail || _stencil_dppass != dppass || _disable_cache) { _stencil_sfail = sfail; _stencil_dpfail = dpfail; _stencil_dppass = dppass; @@ -99,14 +99,14 @@ public: } void StencilMask(GLuint mask) { - if (_stencil_mask != mask) { + if (_stencil_mask != mask || _disable_cache) { _stencil_mask = mask; glStencilMask(mask); } } void TexParameteri(GLenum target, GLenum pname, GLint param) { - if (target == GL_TEXTURE_2D) + if (target == GL_TEXTURE_2D && !_disable_cache) { TextureParameters &cur_params = _texture_params[_texture]; switch (pname) { @@ -201,7 +201,7 @@ private: break; } if (pCap != NULL) { - if (*pCap == value) + if (*pCap == value && !_disable_cache) return; *pCap = value; } @@ -237,6 +237,7 @@ private: GLuint _texture_ids[TEXTURE_ID_CACHE_SIZE]; GLuint _texture_cache_size; std::map _texture_params; + bool _disable_cache = true; }; extern GLCache glcache; diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 6e4e8cde8..55f22dae0 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -21,15 +21,25 @@ const static u32 CullMode[]= GL_FRONT, //2 Cull if Negative Cull if ( |det| < 0 ) or ( |det| < fpu_cull_val ) GL_BACK, //3 Cull if Positive Cull if ( |det| > 0 ) or ( |det| < fpu_cull_val ) }; +#define INVERT_DEPTH_FUNC const static u32 Zfunction[]= { GL_NEVER, //GL_NEVER, //0 Never +#ifndef INVERT_DEPTH_FUNC GL_LESS, //GL_LESS/*EQUAL*/, //1 Less GL_EQUAL, //GL_EQUAL, //2 Equal GL_LEQUAL, //GL_LEQUAL, //3 Less Or Equal GL_GREATER, //GL_GREATER/*EQUAL*/, //4 Greater GL_NOTEQUAL, //GL_NOTEQUAL, //5 Not Equal GL_GEQUAL, //GL_GEQUAL, //6 Greater Or Equal +#else + GL_GREATER, //GL_LESS/*EQUAL*/, //1 Less + GL_EQUAL, //GL_EQUAL, //2 Equal + GL_GEQUAL, //GL_LEQUAL, //3 Less Or Equal + GL_LESS, //GL_GREATER/*EQUAL*/, //4 Greater + GL_NOTEQUAL, //GL_NOTEQUAL, //5 Not Equal + GL_LEQUAL, //GL_GEQUAL, //6 Greater Or Equal +#endif GL_ALWAYS, //GL_ALWAYS, //7 Always }; @@ -68,9 +78,6 @@ const static u32 SrcBlendGL[] = GL_ONE_MINUS_DST_ALPHA }; -extern int screen_width; -extern int screen_height; - PipelineShader* CurrentShader; u32 gcflip; GLuint fbo; @@ -148,9 +155,9 @@ static void SetTextureRepeatMode(GLuint dir, u32 clamp, u32 mirror) } template - void SetGPState(const PolyParam* gp,u32 cflip=0) + void SetGPState(const PolyParam* gp, bool weighted_average = false, u32 front_peeling = 0, u32 cflip=0) { - CurrentShader = &gl.pogram_table[ + CurrentShader = gl.getShader( GetProgramID(Type == ListType_Punch_Through ? 1 : 0, SetTileClip(gp->tileclip, false) + 1, gp->pcw.Texture, @@ -158,24 +165,25 @@ template gp->tsp.IgnoreTexA, gp->tsp.ShadInstr, gp->pcw.Offset, - gp->tsp.FogCtrl)]; + gp->tsp.FogCtrl, + weighted_average, + front_peeling)); - if (CurrentShader->program == -1) + if (CurrentShader->program == -1) { + CurrentShader->cp_AlphaTest = Type == ListType_Punch_Through ? 1 : 0; + CurrentShader->pp_ClipTestMode = SetTileClip(gp->tileclip, false); + CurrentShader->pp_Texture = gp->pcw.Texture; + CurrentShader->pp_UseAlpha = gp->tsp.UseAlpha; + CurrentShader->pp_IgnoreTexA = gp->tsp.IgnoreTexA; + CurrentShader->pp_ShadInstr = gp->tsp.ShadInstr; + CurrentShader->pp_Offset = gp->pcw.Offset; + CurrentShader->pp_FogCtrl = gp->tsp.FogCtrl; + CurrentShader->pp_WeightedAverage = weighted_average; + CurrentShader->pp_FrontPeeling = front_peeling; CompilePipelineShader(CurrentShader); - glcache.UseProgram(CurrentShader->program); - - if (Type == ListType_Opaque || Type == ListType_Punch_Through) - { - // FIXME Must be done for drawing pass only and only for opaque and pt - glActiveTexture(GL_TEXTURE1); - glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); - GLint uniform = glGetUniformLocation(CurrentShader->program, "shadow_stencil"); - if (uniform != -1) - { - glUniform1i(uniform, 1); glCheck(); - } - glActiveTexture(GL_TEXTURE0); } + glcache.UseProgram(CurrentShader->program); + ShaderUniforms.Set(CurrentShader); SetTileClip(gp->tileclip,true); @@ -187,28 +195,31 @@ template glcache.BindTexture(GL_TEXTURE_2D, gp->texid == -1 ? 0 : gp->texid); - SetTextureRepeatMode(GL_TEXTURE_WRAP_S, gp->tsp.ClampU, gp->tsp.FlipU); - SetTextureRepeatMode(GL_TEXTURE_WRAP_T, gp->tsp.ClampV, gp->tsp.FlipV); + if (gp->texid > 0) + { + SetTextureRepeatMode(GL_TEXTURE_WRAP_S, gp->tsp.ClampU, gp->tsp.FlipU); + SetTextureRepeatMode(GL_TEXTURE_WRAP_T, gp->tsp.ClampV, gp->tsp.FlipV); - //set texture filter mode - if (gp->tsp.FilterMode == 0) - { - //disable filtering, mipmaps - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - } - else - { - //bilinear filtering - //PowerVR supports also trilinear via two passes, but we ignore that for now - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (gp->tcw.MipMapped && settings.rend.UseMipmaps) ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + //set texture filter mode + if (gp->tsp.FilterMode == 0) + { + //disable filtering, mipmaps + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + } + else + { + //bilinear filtering + //PowerVR supports also trilinear via two passes, but we ignore that for now + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (gp->tcw.MipMapped && settings.rend.UseMipmaps) ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + } } if (Type==ListType_Translucent) { - glcache.Enable(GL_BLEND); - glcache.BlendFunc(SrcBlendGL[gp->tsp.SrcInstr],DstBlendGL[gp->tsp.DstInstr]); +// glcache.Enable(GL_BLEND); +// glcache.BlendFunc(SrcBlendGL[gp->tsp.SrcInstr],DstBlendGL[gp->tsp.DstInstr]); } else glcache.Disable(GL_BLEND); @@ -221,10 +232,14 @@ template //set Z mode, only if required if (Type == ListType_Punch_Through || (Type == ListType_Translucent && SortingEnabled)) { - if (gp->isp.DepthMode == 7) // Fixes VR2 menu but not sure about this one + if (gp->isp.DepthMode == 7) { // Fixes VR2 menu but not sure about this one glcache.DepthFunc(GL_ALWAYS); + } else - glcache.DepthFunc(GL_GEQUAL); + { + glcache.DepthFunc(Zfunction[6]); // Greater or equal +// glcache.DepthFunc(GL_LESS); + } } else { @@ -232,15 +247,17 @@ template } #if TRIG_SORT - if (SortingEnabled) + if (SortingEnabled && !front_peeling) glcache.DepthMask(GL_FALSE); else #endif - glcache.DepthMask(!gp->isp.ZWriteDis); + if (!weighted_average) + glcache.DepthMask(!gp->isp.ZWriteDis); } template -void DrawList(const List& gply, int first, int count) +void DrawList(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0, + int srcBlendModeFilter = -1, int dstBlendModeFilter = -1) { PolyParam* params = &gply.head()[first]; @@ -260,7 +277,16 @@ void DrawList(const List& gply, int first, int count) { if (params->count>2) //this actually happens for some games. No idea why .. { - SetGPState(params); + if (Type == ListType_Translucent) { + if ((params->tsp.SrcInstr == 0 && params->tsp.DstInstr == 1) // Nothing to do + || (srcBlendModeFilter != -1 && params->tsp.SrcInstr != srcBlendModeFilter) // src filter doesn't match + || (dstBlendModeFilter != -1 && params->tsp.DstInstr != dstBlendModeFilter)) { // dst filter doesn't match + params++; + continue; + } + } + + SetGPState(params, weighted_average, front_peeling); glDrawElements(GL_TRIANGLE_STRIP, params->count, GL_UNSIGNED_SHORT, (GLvoid*)(2*params->first)); glCheck(); } @@ -268,6 +294,20 @@ void DrawList(const List& gply, int first, int count) } } +void DrawListTranslucentAutoSorted(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0, + int srcBlendModeFilter = -1, int dstBlendModeFilter = -1) +{ + DrawList(gply, first, count, weighted_average, front_peeling, srcBlendModeFilter, dstBlendModeFilter); +} +void DrawListOpaque(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0) +{ + DrawList(gply, first, count, weighted_average, front_peeling); +} +void DrawListPunchThrough(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0) +{ + DrawList(gply, first, count, weighted_average, front_peeling); +} + bool operator<(const PolyParam &left, const PolyParam &right) { /* put any condition you want to sort on here */ @@ -780,7 +820,7 @@ void DrawSorted(bool multipass) glcache.UseProgram(gl.modvol_shader.program); glUniform1f(gl.modvol_shader.sp_ShaderColor, 1.f); - glcache.DepthFunc(GL_GEQUAL); + glcache.DepthFunc(Zfunction[6]); // Greater or equal glcache.DepthMask(GL_TRUE); for (u32 p = 0; p < count; p++) @@ -943,7 +983,7 @@ void DrawModVols(int first, int count) glUniform1f(gl.modvol_shader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f); glcache.DepthMask(GL_FALSE); - glcache.DepthFunc(GL_GREATER); + glcache.DepthFunc(Zfunction[4]); if(0) { @@ -1061,8 +1101,15 @@ void DrawModVols(int first, int count) //restore states glcache.Enable(GL_DEPTH_TEST); + glcache.DepthMask(GL_TRUE); } +void InitDualPeeling(); +void RenderAverageColors(); +void RenderWeightedBlended(); +void RenderFrontToBackPeeling(int first, int count); +void DualPeelingReshape(int w, int h); + void DrawStrips() { if (fbo == 0) @@ -1100,6 +1147,8 @@ void DrawStrips() glStencilMask(0xFF); glCheck(); glClear(GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glCheck(); } + InitDualPeeling(); + DualPeelingReshape(screen_width, screen_height); SetupMainVBO(); //Draw the strips ! @@ -1126,13 +1175,18 @@ void DrawStrips() DrawModVols(previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); glBindFramebuffer(GL_FRAMEBUFFER, 0); glCheck(); - glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); glCheck(); - glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); glCheck(); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glCheck(); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glCheck(); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + // Bind stencil buffer for the fragment shader (shadowing) + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, stencilTexId); + glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glActiveTexture(GL_TEXTURE0); + glCheck(); + //Opaque DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count); @@ -1141,19 +1195,25 @@ void DrawStrips() //Alpha blended { - if (pvrrc.isAutoSort) - GenSorted(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); - -#if TRIG_SORT - if (pvrrc.isAutoSort) - DrawSorted(render_pass < pvrrc.render_passes.used() - 1); - else - DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); -#else - if (pvrrc.isAutoSort) - SortPParams(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); - DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); -#endif +// if (hack_on) +// RenderAverageColors(); +// else + RenderFrontToBackPeeling(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); + //RenderWeightedBlended(); +// if (pvrrc.isAutoSort) +// GenSorted(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); +// +//#if TRIG_SORT +// if (pvrrc.isAutoSort) +// DrawSorted(render_pass < pvrrc.render_passes.used() - 1); +// else +// DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); +//#else +// if (pvrrc.isAutoSort) +// SortPParams(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); +// DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); +//#endif + SetupMainVBO(); } previous_pass = current_pass; } diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 6d278f148..39e79cfea 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -88,12 +88,14 @@ uniform highp vec4 depth_scale; \n\ " vary " lowp vec4 vtx_base; \n\ " vary " lowp vec4 vtx_offs; \n\ " vary " mediump vec2 vtx_uv; \n\ +" vary " mediump float vtx_z; \n\ void main() \n\ { \n\ vtx_base=in_base; \n\ vtx_offs=in_offs; \n\ vtx_uv=in_uv; \n\ vec4 vpos=in_pos; \n\ + vtx_z = vpos.z; \n\ vpos.w=1.0/vpos.z; \n" #ifndef GLES "\ @@ -174,10 +176,8 @@ lowp float fog_mode2(highp float invW) \n\ const char* PixelPipelineShader = #ifndef GLES "#version 140 \n" - "out vec4 FragColor; \n" #endif "\ -\ #define cp_AlphaTest %d \n\ #define pp_ClipTestMode %d \n\ #define pp_UseAlpha %d \n\ @@ -186,6 +186,21 @@ const char* PixelPipelineShader = #define pp_ShadInstr %d \n\ #define pp_Offset %d \n\ #define pp_FogCtrl %d \n\ +#define pp_WeightedAverage %d \n\ +#define pp_FrontPeeling %d \n\ +#if pp_WeightedAverage == 1 \n\ +#extension GL_ARB_draw_buffers : require \n\ +#endif \n\ +#if pp_FrontPeeling == 2 \n\ +uniform sampler2DRect DepthTex; \n\ +#endif \n" +#ifndef GLES + "\ + #if pp_WeightedAverage == 0 \n\ + out vec4 FragColor; \n\ + #endif \n" +#endif +"\ /* Shader program params*/ \n\ /* gles has no alpha test stage, so its emulated on the shader */ \n\ uniform lowp float cp_AlphaTestValue; \n\ @@ -201,13 +216,27 @@ uniform usampler2D shadow_stencil; \n\ " vary " lowp vec4 vtx_base; \n\ " vary " lowp vec4 vtx_offs; \n\ " vary " mediump vec2 vtx_uv; \n\ +" vary " mediump float vtx_z; \n\ lowp float fog_mode2(highp float w) \n\ { \n\ highp float fog_idx = clamp(w * sp_FOG_DENSITY, 0.0, 127.99); \n\ return clamp(sp_LOG_FOG_COEFS.y * log2(fog_idx) + sp_LOG_FOG_COEFS.x, 0.001, 1.0); //the clamp is required due to yet another bug !\n\ } \n\ void main() \n\ -{ \n\ +{ \n" +#ifndef GLES + "\ + highp float w = 100000.0 * gl_FragCoord.w; \n\ + gl_FragDepth = 1 - log2(1.0 + w) / 34; \n" +#endif + "\ + #if pp_FrontPeeling == 2 \n\ + // Bit-exact comparison between FP32 z-buffer and fragment depth \n\ + highp float frontDepth = texture2DRect(DepthTex, gl_FragCoord.xy).r; \n\ + if (gl_FragDepth <= frontDepth) { \n\ + discard; \n\ + } \n\ + #endif \n\ // Clip outside the box \n\ #if pp_ClipTestMode==1 \n\ if (gl_FragCoord.x < pp_ClipTest.x || gl_FragCoord.x > pp_ClipTest.z \n\ @@ -221,11 +250,11 @@ void main() \n\ discard; \n\ #endif \n\ \n\ - lowp vec4 color=vtx_base; \n\ + highp vec4 color=vtx_base; \n\ #if pp_UseAlpha==0 \n\ color.a=1.0; \n\ #endif\n\ - #if pp_FogCtrl==3 \n\ + #if pp_FogCtrl==3 // LUT Mode 2 \n\ color=vec4(sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ #endif\n\ #if pp_Texture==1 \n\ @@ -239,23 +268,23 @@ void main() \n\ #if cp_AlphaTest == 1 \n\ if (cp_AlphaTestValue>texcol.a) discard;\n\ #endif \n\ - #if pp_ShadInstr==0 \n\ + #if pp_ShadInstr==0 // DECAL \n\ { \n\ color=texcol; \n\ } \n\ #endif\n\ - #if pp_ShadInstr==1 \n\ + #if pp_ShadInstr==1 // MODULATE \n\ { \n\ color.rgb*=texcol.rgb; \n\ color.a=texcol.a; \n\ } \n\ #endif\n\ - #if pp_ShadInstr==2 \n\ + #if pp_ShadInstr==2 // DECAL ALPHA \n\ { \n\ color.rgb=mix(color.rgb,texcol.rgb,texcol.a); \n\ } \n\ #endif\n\ - #if pp_ShadInstr==3 \n\ + #if pp_ShadInstr==3 // MODULATE ALPHA \n\ { \n\ color*=texcol; \n\ } \n\ @@ -264,7 +293,7 @@ void main() \n\ #if pp_Offset==1 \n\ { \n\ color.rgb+=vtx_offs.rgb; \n\ - if (pp_FogCtrl==1) \n\ + if (pp_FogCtrl==1) // Per vertex \n\ color.rgb=mix(color.rgb,sp_FOG_COL_VERT.rgb,vtx_offs.a); \n\ } \n\ #endif\n\ @@ -274,7 +303,7 @@ void main() \n\ uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / screen_size); \n\ if (stencil.r == uint(0x81)) \n\ color.rgb *= shade_scale_factor; \n\ - #if pp_FogCtrl==0 \n\ + #if pp_FogCtrl==0 // LUT \n\ { \n\ color.rgb=mix(color.rgb,sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ } \n\ @@ -282,13 +311,31 @@ void main() \n\ #if cp_AlphaTest == 1 \n\ color.a=1.0; \n\ #endif \n\ - //color.rgb=vec3(gl_FragCoord.w * sp_FOG_DENSITY / 128.0);\n" -#ifndef GLES - "\ - highp float w = gl_FragCoord.w * 100000.0; \n\ - gl_FragDepth = log2(1.0 + w) / 34; \n" -#endif - FRAGCOL "=color; \n\ + \n\ + //color.rgb=vec3(gl_FragCoord.w * sp_FOG_DENSITY / 128.0); \n\ + //if (gl_FragCoord.w < 0) \n\ + // color.rgb = vec3(1, 0, 0); \n\ + //else if (gl_FragCoord.w > 1) \n\ + // color.rgb = vec3(0, 1, 0); \n\ + //color = vec4(gl_FragCoord.w*6, 0, 0, 0.5); \n\ + \n\ + #if pp_WeightedAverage == 1 \n\ + // Average colors \n\ + gl_FragData[0] = vec4(color.rgb * color.a, color.a); \n\ + gl_FragData[1] = vec4(1.0); \n\ + // Weighted Blended \n\ +// float viewDepth = abs(1.0 / gl_FragCoord.w); \n\ +// float linearDepth = viewDepth * 4.5; // uDepthScale \n\ +// float weight = clamp(0.03 / (1e-5 + pow(linearDepth/10, 3.0)), 1e-2, 3e3); \n\ +// gl_FragData[0] = vec4(color.rgb * color.a, color.a) * weight; \n\ +// gl_FragData[1] = vec4(color.a); \n\ + #elif pp_FrontPeeling == 1 \n" + FRAGCOL " = vec4(color.rgb * color.a, 1.0 - color.a); \n\ + #elif pp_FrontPeeling == 2 \n" + FRAGCOL " = vec4(color.rgb * color.a, color.a); \n\ + #else \n" + FRAGCOL "=color; \n\ + #endif \n\ }"; const char* ModifierVolumeShader = @@ -303,8 +350,8 @@ void main() \n\ { \n" #ifndef GLES "\ - highp float w = gl_FragCoord.w * 100000.0; \n\ - gl_FragDepth = log2(1.0 + w) / 34; \n" + highp float w = 100000.0 * gl_FragCoord.w; \n\ + gl_FragDepth = 1 - log2(1.0 + w) / 34; \n" #endif FRAGCOL "=vec4(0.0, 0.0, 0.0, sp_ShaderColor); \n\ }"; @@ -636,47 +683,7 @@ int screen_height; #endif -struct ShaderUniforms_t -{ - float PT_ALPHA; - float scale_coefs[4]; - float depth_coefs[4]; - float fog_den_float; - float ps_FOG_COL_RAM[3]; - float ps_FOG_COL_VERT[3]; - float fog_coefs[2]; - - void Set(PipelineShader* s) - { - if (s->cp_AlphaTestValue!=-1) - glUniform1f(s->cp_AlphaTestValue,PT_ALPHA); - - if (s->scale!=-1) - glUniform4fv( s->scale, 1, scale_coefs); - - if (s->depth_scale!=-1) - glUniform4fv( s->depth_scale, 1, depth_coefs); - - if (s->sp_FOG_DENSITY!=-1) - glUniform1f( s->sp_FOG_DENSITY,fog_den_float); - - if (s->sp_FOG_COL_RAM!=-1) - glUniform3fv( s->sp_FOG_COL_RAM, 1, ps_FOG_COL_RAM); - - if (s->sp_FOG_COL_VERT!=-1) - glUniform3fv( s->sp_FOG_COL_VERT, 1, ps_FOG_COL_VERT); - - if (s->sp_LOG_FOG_COEFS!=-1) - glUniform2fv(s->sp_LOG_FOG_COEFS,1, fog_coefs); - - if (s->screen_size != -1) - glUniform2f(s->screen_size, (float)screen_width, (float)screen_height); - - if (s->shade_scale_factor != -1) - glUniform1f(s->shade_scale_factor, FPU_SHAD_SCALE.scale_factor / 256.f); - } - -} ShaderUniforms; +ShaderUniforms_t ShaderUniforms; GLuint gl_CompileShader(const char* shader,GLuint type) { @@ -762,7 +769,7 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, - u32 pp_FogCtrl) + u32 pp_FogCtrl, bool pp_WeightedAverage, u32 pp_FrontPeeling) { u32 rv=0; @@ -774,17 +781,19 @@ int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, rv<<=2; rv|=pp_ShadInstr; rv<<=1; rv|=pp_Offset; rv<<=2; rv|=pp_FogCtrl; + rv <<= 1; rv |= pp_WeightedAverage; + rv <<= 2; rv |= pp_FrontPeeling; return rv; } -bool CompilePipelineShader( PipelineShader* s) +bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipelineShader */) { char pshader[8192]; - sprintf(pshader,PixelPipelineShader, + sprintf(pshader, source, s->cp_AlphaTest,s->pp_ClipTestMode,s->pp_UseAlpha, - s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl); + s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, (int)s->pp_WeightedAverage, s->pp_FrontPeeling); s->program=gl_CompileAndLink(VertexShaderSource,pshader); @@ -823,7 +832,17 @@ bool CompilePipelineShader( PipelineShader* s) s->screen_size = glGetUniformLocation(s->program, "screen_size"); s->shade_scale_factor = glGetUniformLocation(s->program, "shade_scale_factor"); - ShaderUniforms.Set(s); + // Depth peeling: use texture 1 for depth texture + gu = glGetUniformLocation(s->program, "DepthTex"); + if (gu != -1) + glUniform1i(gu, 1); + else + { + // Shadow stencil for OP/PT rendering pass + gu = glGetUniformLocation(s->program, "shadow_stencil"); + if (gu != -1) + glUniform1i(gu, 1); + } return glIsProgram(s->program)==GL_TRUE; } @@ -847,6 +866,7 @@ bool gl_create_resources() glGenBuffers(1, &gl.vbo.idxs); glGenBuffers(1, &gl.vbo.idxs2); + /* memset(gl.pogram_table,0,sizeof(gl.pogram_table)); PipelineShader* dshader=0; @@ -888,6 +908,7 @@ bool gl_create_resources() } } } + */ @@ -1445,7 +1466,7 @@ bool RenderFrame() //TODO: Make this dynamic float vtx_min_fZ=0.f; //pvrrc.fZ_min; float vtx_max_fZ=pvrrc.fZ_max; - +//printf("Zmin %g Zmax %g\n", pvrrc.fZ_min, pvrrc.fZ_max); //sanitise the values, now with NaN detection (for omap) //0x49800000 is 1024*1024. Using integer math to avoid issues w/ infs and nans if ((s32&)vtx_max_fZ<0 || (u32&)vtx_max_fZ>0x49800000) @@ -1665,7 +1686,7 @@ bool RenderFrame() glUniform4fv( gl.OSD_SHADER.depth_scale, 1, td); ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; - +/* for (u32 i=0;i #ifdef GLES #if defined(TARGET_IPHONE) //apple-specific ogles2 headers @@ -61,6 +61,8 @@ struct PipelineShader // u32 cp_AlphaTest; s32 pp_ClipTestMode; u32 pp_Texture, pp_UseAlpha, pp_IgnoreTexA, pp_ShadInstr, pp_Offset, pp_FogCtrl; + bool pp_WeightedAverage; + u32 pp_FrontPeeling; }; @@ -86,7 +88,7 @@ struct gl_ctx } modvol_shader; - PipelineShader pogram_table[768*2]; + std::map shaders; struct { GLuint program,scale,depth_scale; @@ -100,8 +102,15 @@ struct gl_ctx #endif } vbo; - - //GLuint matrix; + PipelineShader *getShader(int programId) { + PipelineShader *shader = shaders[programId]; + if (shader == NULL) { + shader = new PipelineShader(); + shaders[programId] = shader; + shader->program = -1; + } + return shader; + } }; extern gl_ctx gl; @@ -119,14 +128,66 @@ void CollectCleanup(); void DoCleanup(); void SortPParams(int first, int count); +extern int screen_width; +extern int screen_height; + void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt); void ReadRTTBuffer(); int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, - u32 pp_FogCtrl); + u32 pp_FogCtrl, bool pp_WeightedAverage, u32 pp_FrontPeeling); -bool CompilePipelineShader(PipelineShader* s); +struct ShaderUniforms_t +{ + float PT_ALPHA; + float scale_coefs[4]; + float depth_coefs[4]; + float fog_den_float; + float ps_FOG_COL_RAM[3]; + float ps_FOG_COL_VERT[3]; + float fog_coefs[2]; + + void Set(PipelineShader* s) + { + if (s->cp_AlphaTestValue!=-1) + glUniform1f(s->cp_AlphaTestValue,PT_ALPHA); + + if (s->scale!=-1) + glUniform4fv( s->scale, 1, scale_coefs); + + if (s->depth_scale!=-1) + glUniform4fv( s->depth_scale, 1, depth_coefs); + + if (s->sp_FOG_DENSITY!=-1) + glUniform1f( s->sp_FOG_DENSITY,fog_den_float); + + if (s->sp_FOG_COL_RAM!=-1) + glUniform3fv( s->sp_FOG_COL_RAM, 1, ps_FOG_COL_RAM); + + if (s->sp_FOG_COL_VERT!=-1) + glUniform3fv( s->sp_FOG_COL_VERT, 1, ps_FOG_COL_VERT); + + if (s->sp_LOG_FOG_COEFS!=-1) + glUniform2fv(s->sp_LOG_FOG_COEFS,1, fog_coefs); + + if (s->screen_size != -1) + glUniform2f(s->screen_size, (float)screen_width, (float)screen_height); + + if (s->shade_scale_factor != -1) + glUniform1f(s->shade_scale_factor, FPU_SHAD_SCALE.scale_factor / 256.f); + } + +}; +extern ShaderUniforms_t ShaderUniforms; + +extern const char *PixelPipelineShader; +bool CompilePipelineShader(PipelineShader* s, const char *source = PixelPipelineShader); #define TEXTURE_LOAD_ERROR 0 GLuint loadPNG(const string& subpath, int &width, int &height); extern GLuint stencilTexId; + +void DrawListTranslucentAutoSorted(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0, int srcBlendModeFilter = -1, int dstBlendModeFilter = -1); +void DrawListOpaque(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0); +void DrawListPunchThrough(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0); +void SetupMainVBO(); diff --git a/core/rend/gles/render_tr.cpp b/core/rend/gles/render_tr.cpp new file mode 100644 index 000000000..a82e60536 --- /dev/null +++ b/core/rend/gles/render_tr.cpp @@ -0,0 +1,527 @@ +#include "glcache.h" + +//#define GL_TEXTURE_RECTANGLE_ARB 0x84F5 +#define GL_RGB16F_ARB 0x881B +#define GL_RGBA16F_ARB 0x881A + +static int g_imageWidth = 0; +static int g_imageHeight = 0; +static GLuint g_quadBuffer = 0; +static GLuint g_quadVertexArray = 0; + +GLenum g_drawBuffers[] = {GL_COLOR_ATTACHMENT0, + GL_COLOR_ATTACHMENT1, + GL_COLOR_ATTACHMENT2, + GL_COLOR_ATTACHMENT3, + GL_COLOR_ATTACHMENT4, + GL_COLOR_ATTACHMENT5, + GL_COLOR_ATTACHMENT6 +}; + +// +// Weighted Average +// +static GLuint g_accumulationTexId[2]; +static GLuint g_accumulationFboId; +PipelineShader g_wavg_final_shader; + +void InitAccumulationRenderTargets() +{ + glGenTextures(2, g_accumulationTexId); + + glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_accumulationTexId[0]); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA16F, + g_imageWidth, g_imageHeight, 0, GL_RGBA, GL_FLOAT, NULL); + + glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_accumulationTexId[1]); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RG32F, + g_imageWidth, g_imageHeight, 0, GL_RGBA, GL_FLOAT, NULL); + + glGenFramebuffers(1, &g_accumulationFboId); + glBindFramebuffer(GL_FRAMEBUFFER, g_accumulationFboId); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_RECTANGLE, g_accumulationTexId[0], 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, + GL_TEXTURE_RECTANGLE, g_accumulationTexId[1], 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencilTexId, 0); + + glCheck(); +} + +void DeleteAccumulationRenderTargets() +{ + glDeleteFramebuffers(1, &g_accumulationFboId); + glDeleteTextures(2, g_accumulationTexId); +} + +const char *wavg_final_fragment = "\ +#version 140 \n\ +out vec4 FragColor; \n\ +uniform sampler2DRect ColorTex0; \n\ +uniform sampler2DRect ColorTex1; \n\ + \n\ +void main(void) \n\ +{ \n\ + highp vec4 SumColor = texture2DRect(ColorTex0, gl_FragCoord.xy); \n\ + highp float n = texture2DRect(ColorTex1, gl_FragCoord.xy).r; \n\ + \n\ + // Average Color \n\ + highp vec3 AvgColor = SumColor.rgb / SumColor.a; \n\ + if (n == 0.0 || isinf(AvgColor.r) || isinf(AvgColor.g) || isinf(AvgColor.b) || isnan(AvgColor.r) || isnan(AvgColor.g) || isnan(AvgColor.b)) { \n\ + FragColor.rgba = vec4(0, 0, 0, 0); \n\ + return; \n\ + } \n\ + \n\ + highp float AvgAlpha = SumColor.a / n; \n\ + \n\ + highp float T = pow(1.0 - AvgAlpha, n); \n\ + if (isnan(T)) T = 0; \n\ + FragColor.rgb = AvgColor; \n\ + FragColor.a = 1 - T; \n\ + // Weighted Blended \n\ +// vec3 AvgColor = SumColor.rgb / max(SumColor.a, 0.00001); \n\ +// FragColor.rgb = AvgColor; \n\ +// FragColor.a = 1 - n; \n\ + //FragColor.rgb = vec3(AvgAlpha, 0, 0); FragColor.a = 1.0; \n\ + //FragColor.rgb = AvgColor; FragColor.a = 1.0; \n\ +} \n\ +"; + +void DrawQuad() +{ + glBindVertexArray(g_quadVertexArray); + + struct Vertex vertices[] = { + { 0, screen_height, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 }, + { 0, 0, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 }, + { screen_width, screen_height, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 }, + { screen_width, 0, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 }, + }; + GLushort indices[] = { 0, 1, 2, 1, 3 }; + + glBindBuffer(GL_ARRAY_BUFFER, g_quadBuffer); glCheck(); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STREAM_DRAW); glCheck(); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glCheck(); + + glEnableVertexAttribArray(VERTEX_POS_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_POS_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,x)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_BASE_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_BASE_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,col)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_OFFS_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_OFFS_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,spc)); glCheck(); + + glEnableVertexAttribArray(VERTEX_UV_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u)); glCheck(); + + glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, indices); glCheck(); +} + +//-------------------------------------------------------------------------- +void RenderAverageColors() +{ + glcache.Enable(GL_DEPTH_TEST); + glcache.DepthMask(false); + + // --------------------------------------------------------------------- + // 1. Accumulate Colors and Depth Complexity + // --------------------------------------------------------------------- + + glBindFramebuffer(GL_FRAMEBUFFER, g_accumulationFboId); + glDrawBuffers(2, g_drawBuffers); + + glcache.ClearColor(0, 0, 0, 0); + glClear(GL_COLOR_BUFFER_BIT); + + glBlendEquation(GL_FUNC_ADD); + glcache.BlendFunc(GL_ONE, GL_ONE); + glcache.Enable(GL_BLEND); + + DrawListTranslucentAutoSorted(pvrrc.global_param_tr, 0, pvrrc.global_param_tr.used(), true); + + glCheck(); + + // --------------------------------------------------------------------- + // 2. Approximate Blending + // --------------------------------------------------------------------- + + glBindFramebuffer(GL_FRAMEBUFFER, 0); + + glcache.Enable(GL_BLEND); + glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + glcache.Disable(GL_DEPTH_TEST); + + glcache.UseProgram(g_wavg_final_shader.program); + ShaderUniforms.Set(&g_wavg_final_shader); + glActiveTexture(GL_TEXTURE0); + glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_accumulationTexId[0]); + glUniform1i(glGetUniformLocation(g_wavg_final_shader.program, "ColorTex0"), 0); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_RECTANGLE, g_accumulationTexId[1]); + glUniform1i(glGetUniformLocation(g_wavg_final_shader.program, "ColorTex1"), 1); + glActiveTexture(GL_TEXTURE0); + + DrawQuad(); + + glCheck(); +} + +extern bool hack_on; + +void RenderWeightedBlended() +{ +// if (hack_on) +// glcache.Disable(GL_DEPTH_TEST); +// else + glcache.Enable(GL_DEPTH_TEST); + glcache.DepthMask(false); + + // --------------------------------------------------------------------- + // 1. Accumulate Colors and Depth Complexity + // --------------------------------------------------------------------- + + glBindFramebuffer(GL_FRAMEBUFFER, g_accumulationFboId); + glDrawBuffers(2, g_drawBuffers); + + glcache.ClearColor(0, 0, 0, 1); + glClear(GL_COLOR_BUFFER_BIT); + + glcache.Enable(GL_BLEND); + glBlendFuncSeparate(GL_ONE, GL_ONE, GL_ZERO, GL_ONE_MINUS_SRC_ALPHA); + + DrawListTranslucentAutoSorted(pvrrc.global_param_tr, 0, pvrrc.global_param_tr.used(), true); + + glCheck(); + + // --------------------------------------------------------------------- + // 2. Approximate Blending + // --------------------------------------------------------------------- + + glBindFramebuffer(GL_FRAMEBUFFER, 0); + + glcache.Enable(GL_BLEND); + glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA); + + glcache.UseProgram(g_wavg_final_shader.program); + ShaderUniforms.Set(&g_wavg_final_shader); + glActiveTexture(GL_TEXTURE0); + glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_accumulationTexId[0]); + glUniform1i(glGetUniformLocation(g_wavg_final_shader.program, "ColorTex0"), 0); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_RECTANGLE, g_accumulationTexId[1]); + glUniform1i(glGetUniformLocation(g_wavg_final_shader.program, "ColorTex1"), 1); + glActiveTexture(GL_TEXTURE0); + + DrawQuad(); + + glCheck(); +} + +// +// Front depth peeling +// +static float g_opacity = 0.6; +static int g_numPasses = 4; // SoA opening sequence needs at least 12 passes!!! +GLuint g_frontFboId[2]; +GLuint g_frontDepthTexId[2]; +GLuint g_frontColorTexId[2]; +GLuint g_frontColorBlenderTexId; +GLuint g_frontColorBlenderFboId; +GLuint g_frontDepthInitTexId; +GLuint g_samples_query; +PipelineShader g_front_blend_shader; +PipelineShader g_front_final_shader; + +const char *front_blend_fragment_source = "\ +#version 140 \n\ +out vec4 FragColor; \n\ +uniform sampler2DRect TempTex; \n\ + \n\ +void main(void) \n\ +{ \n\ + FragColor = texture2DRect(TempTex, gl_FragCoord.xy); \n\ +} \n\ +"; + + +const char *front_final_fragment_source = "\ +#version 140 \n\ +out vec4 FragColor; \n\ +uniform sampler2DRect ColorTex; \n\ + \n\ +void main(void) \n\ +{ \n\ + vec4 frontColor = texture2DRect(ColorTex, gl_FragCoord.xy); \n\ +// if (frontColor.a >= 0.99) { \n\ +// FragColor = vec4(0, 0, 0, 0); \n\ +// return; \n\ +// } \n\ + FragColor.rgb = frontColor.rgb / (1 - frontColor.a); \n\ + FragColor.a = 1 - frontColor.a; \n\ +} \n\ +"; + +void InitFrontPeelingRenderTargets() +{ + glGenTextures(2, g_frontDepthTexId); + glGenTextures(2, g_frontColorTexId); + glGenFramebuffers(2, g_frontFboId); + + for (int i = 0; i < 2; i++) + { + glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_frontDepthTexId[i]); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_DEPTH_COMPONENT, + g_imageWidth, g_imageHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL); + + glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_frontColorTexId[i]); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA, g_imageWidth, g_imageHeight, + 0, GL_RGBA, GL_FLOAT, 0); + + glBindFramebuffer(GL_FRAMEBUFFER, g_frontFboId[i]); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_TEXTURE_RECTANGLE, g_frontDepthTexId[i], 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_RECTANGLE, g_frontColorTexId[i], 0); + } + + g_frontColorBlenderTexId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_frontColorBlenderTexId); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA, g_imageWidth, g_imageHeight, + 0, GL_RGBA, GL_FLOAT, 0); + + g_frontDepthInitTexId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_frontDepthInitTexId); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_DEPTH_COMPONENT, + g_imageWidth, g_imageHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL); + + glGenFramebuffers(1, &g_frontColorBlenderFboId); + glBindFramebuffer(GL_FRAMEBUFFER, g_frontColorBlenderFboId); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_TEXTURE_RECTANGLE, g_frontDepthTexId[0], 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_RECTANGLE, g_frontColorBlenderTexId, 0); + + glGenQueries(1, &g_samples_query); + + glCheck(); +} + +void DeleteFrontPeelingRenderTargets() +{ + glDeleteFramebuffers(2, g_frontFboId); + glDeleteFramebuffers(1, &g_frontColorBlenderFboId); + glcache.DeleteTextures(2, g_frontDepthTexId); + glcache.DeleteTextures(2, g_frontColorTexId); + glcache.DeleteTextures(1, &g_frontColorBlenderTexId); + glcache.DeleteTextures(1, &g_frontDepthInitTexId); + glDeleteQueries(1, &g_samples_query); +} + +//-------------------------------------------------------------------------- +void RenderFrontToBackPeeling(int first, int count) +{ + // --------------------------------------------------------------------- + // 1. Initialize Min Depth Buffer + // --------------------------------------------------------------------- + + glBindFramebuffer(GL_FRAMEBUFFER, g_frontColorBlenderFboId); + glDrawBuffer(g_drawBuffers[0]); + + glcache.DepthMask(true); + glcache.Enable(GL_DEPTH_TEST); + + glClearDepthf(1); + glcache.ClearColor(0, 0, 0, 1); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + + glcache.Disable(GL_BLEND); + // TODO Hack to get the depth from OP and PT passes. + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + + DrawListOpaque(pvrrc.global_param_op, 0, pvrrc.global_param_op.used(), false, 1); + DrawListPunchThrough(pvrrc.global_param_pt, 0, pvrrc.global_param_pt.used(), false, 1); + + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glBindTexture(GL_TEXTURE_RECTANGLE, g_frontDepthInitTexId); + glCopyTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_DEPTH_COMPONENT, 0, 0, g_imageWidth, g_imageHeight, 0); + + DrawListTranslucentAutoSorted(pvrrc.global_param_tr, first, count, false, 1, 4, 5); // FIXME initial pass for other blend modes + + glCheck(); + + // --------------------------------------------------------------------- + // 2. Depth Peeling + Blending + // --------------------------------------------------------------------- +extern bool hack_on; +if (hack_on) + g_numPasses = 4; +else + g_numPasses = 20; + + int numLayers = (g_numPasses - 1) * 2; + for (int layer = 1; layer < numLayers; layer++) { + int currId = layer % 2; + int prevId = 1 - currId; + + glBindFramebuffer(GL_FRAMEBUFFER, g_frontFboId[currId]); + glDrawBuffer(g_drawBuffers[0]); + + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_TEXTURE_RECTANGLE, g_frontDepthInitTexId, 0); + glBindTexture(GL_TEXTURE_RECTANGLE, g_frontDepthTexId[currId]); + glCopyTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_DEPTH_COMPONENT, 0, 0, g_imageWidth, g_imageHeight, 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, + GL_TEXTURE_RECTANGLE, g_frontDepthTexId[currId], 0); + + glcache.ClearColor(0, 0, 0, 0); + glClear(GL_COLOR_BUFFER_BIT); + + glcache.Enable(GL_DEPTH_TEST); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_RECTANGLE, g_frontDepthTexId[prevId]); // DepthTex + glActiveTexture(GL_TEXTURE0); + + glBeginQuery(GL_SAMPLES_PASSED, g_samples_query); + // Peeling shader + DrawListTranslucentAutoSorted(pvrrc.global_param_tr, first, count, false, 2, 4, 5); // Only 4,5 blending + //DrawListTranslucentAutoSorted(pvrrc.global_param_tr, first, count, false, 2, 1, 1); // Only 1,1 blending + //DrawListTranslucentAutoSorted(pvrrc.global_param_tr, first, count, false, 2, 4, 1); // Only 4,1 blending + glEndQuery(GL_SAMPLES_PASSED); + + glCheck(); + + GLuint sample_count; + glGetQueryObjectuiv(g_samples_query, GL_QUERY_RESULT, &sample_count); + if (sample_count == 0) { + printf("Aborting depth peeling after %d layers\n", layer); + break; + } + + glBindFramebuffer(GL_FRAMEBUFFER, g_frontColorBlenderFboId); + glDrawBuffer(g_drawBuffers[0]); + + glcache.Disable(GL_DEPTH_TEST); + glcache.Enable(GL_BLEND); + + glBlendEquation(GL_FUNC_ADD); + glBlendFuncSeparate(GL_DST_ALPHA, GL_ONE, + GL_ZERO, GL_ONE_MINUS_SRC_ALPHA); + // Let's do it again for 1,1 blending + //glBlendEquationSeparate(GL_FUNC_ADD, GL_FUNC_REVERSE_SUBTRACT); + //glBlendFunc(GL_ONE, GL_ONE); + // Let's do it again for 4,1 blending + //glBlendEquationSeparate(GL_FUNC_ADD, GL_FUNC_REVERSE_SUBTRACT); + //glBlendFunc(GL_SRC_ALPHA, GL_ONE); + + glcache.UseProgram(g_front_blend_shader.program); + ShaderUniforms.Set(&g_front_blend_shader); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_RECTANGLE, g_frontColorTexId[currId]); // TempTex + glActiveTexture(GL_TEXTURE0); + + // Blending shader + DrawQuad(); + + SetupMainVBO(); + + glcache.Disable(GL_BLEND); + + glCheck(); + } + + // --------------------------------------------------------------------- + // 3. Final Pass + // --------------------------------------------------------------------- + + glBindFramebuffer(GL_FRAMEBUFFER, 0); + glDrawBuffer(GL_BACK); + glcache.Disable(GL_DEPTH_TEST); + + glcache.Enable(GL_BLEND); + + // FIXME dst=GL_ONE blending don't reduce the dst alpha, so this blending function cannot work. + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); // No cache! b/c of glBlendFuncSeparate + // kinda works for GL_ONE blending + //glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); + + glcache.UseProgram(g_front_final_shader.program); + ShaderUniforms.Set(&g_front_final_shader); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_RECTANGLE, g_frontColorBlenderTexId); // ColorTex + glActiveTexture(GL_TEXTURE0); + + // Final blending + DrawQuad(); + + glCheck(); +} + + +void InitDualPeeling() +{ + if (g_accumulationTexId[0] != 0 || g_frontFboId[0] != 0) + return; + + g_imageWidth = screen_width; + g_imageHeight = screen_height; + + glGenVertexArrays(1, &g_quadVertexArray); + glGenBuffers(1, &g_quadBuffer); + + // Allocate render targets first + InitFrontPeelingRenderTargets(); + InitAccumulationRenderTargets(); + glBindFramebuffer(GL_FRAMEBUFFER, 0); + + // Build shaders + + CompilePipelineShader(&g_wavg_final_shader, wavg_final_fragment); + + CompilePipelineShader(&g_front_blend_shader, front_blend_fragment_source); + glUniform1i(glGetUniformLocation(g_front_blend_shader.program, "TempTex"), 1); + + CompilePipelineShader(&g_front_final_shader, front_final_fragment_source); + glUniform1i(glGetUniformLocation(g_front_final_shader.program, "ColorTex"), 1); +} + +void DualPeelingReshape(int w, int h) +{ + if (g_imageWidth != w || g_imageHeight != h) + { + g_imageWidth = w; + g_imageHeight = h; + + DeleteFrontPeelingRenderTargets(); + InitFrontPeelingRenderTargets(); + + DeleteAccumulationRenderTargets(); + InitAccumulationRenderTargets(); + } +} From c005515f2198253a67dc72579d4d34fdac34800a Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 27 May 2018 10:48:52 +0200 Subject: [PATCH 04/65] WIP A-buffers --- core/rend/gles/abuffer.cpp | 256 +++++++++++++++++++++++++++++++++++ core/rend/gles/gldraw.cpp | 191 +++++++++++++++++++------- core/rend/gles/gles.cpp | 79 ++++++++--- core/rend/gles/gles.h | 6 + core/rend/gles/render_tr.cpp | 131 ++++++++++-------- 5 files changed, 532 insertions(+), 131 deletions(-) create mode 100644 core/rend/gles/abuffer.cpp diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp new file mode 100644 index 000000000..79a7e8908 --- /dev/null +++ b/core/rend/gles/abuffer.cpp @@ -0,0 +1,256 @@ +/* + * abuffer.cpp + * + * Created on: May 26, 2018 + * Author: raph + */ +#include "glcache.h" + +#define ABUFFER_SIZE 32 + +GLuint abufferTexID = 0; +GLuint abufferBlendingTexID = 0; +GLuint abufferCounterTexID = 0; +PipelineShader g_abuffer_final_shader; +PipelineShader g_abuffer_clear_shader; + +static int g_imageWidth = 0; +static int g_imageHeight = 0; + +extern void DrawQuad(); + +static const char *final_shader_source = "\ +#version 140 \n\ +#extension GL_EXT_shader_image_load_store : enable \n\ +#define ABUFFER_SIZE 32 \n\ +out vec4 FragColor; \n\ +uniform layout(size1x32) uimage2D abufferCounterImg; \n\ +uniform layout(size4x32) image2DArray abufferImg; \n\ +uniform layout(size2x32) image2DArray abufferBlendingImg; \n\ +uniform lowp vec2 screen_size; \n\ + \n\ +vec4 colorList[ABUFFER_SIZE]; \n\ +vec4 depthBlendList[ABUFFER_SIZE]; \n\ + \n\ +int resolveClosest(ivec2 coords, int abNumFrag) { \n\ + \n\ + // Search smallest z \n\ + float minZ = 1000000.0f; \n\ + int minIdx; \n\ + for (int i = 0; i < abNumFrag; i++) { \n\ + vec4 val = imageLoad(abufferBlendingImg, ivec3(coords, i)); \n\ + if (val.x < minZ) { \n\ + minZ = val.x; \n\ + minIdx = i; \n\ + } \n\ + } \n\ + \n\ + // Return the index of the closest fragment \n\ + return minIdx; \n\ +} \n\ + \n\ + \n\ +int findOpaque(ivec2 coords, int abNumFrag) { \n\ + \n\ + for (int i = 0; i < abNumFrag; i++) { \n\ + vec4 val = imageLoad(abufferBlendingImg, ivec3(coords, i)); \n\ + if (round(val.y) == 8) { \n\ + return i; \n\ + } \n\ + } \n\ + \n\ + return 0; \n\ +} \n\ + \n\ +void fillFragmentArray(ivec2 coords, int abNumFrag) { \n\ + // Load fragments into a local memory array for sorting \n\ + for (int i = 0; i < abNumFrag; i++) { \n\ + colorList[i] = imageLoad(abufferImg, ivec3(coords, i)); \n\ + depthBlendList[i] = imageLoad(abufferBlendingImg, ivec3(coords, i)); \n\ + } \n\ +} \n\ + \n\ +// Bubble sort used to sort fragments \n\ +void bubbleSort(int array_size) { \n\ + for (int i = (array_size - 2); i >= 0; --i) { \n\ + for (int j = 0; j <= i; ++j) { \n\ + if (depthBlendList[j].x > depthBlendList[j+1].x) { \n\ + vec4 temp = depthBlendList[j + 1]; \n\ + depthBlendList[j + 1] = depthBlendList[j]; \n\ + depthBlendList[j] = temp; \n\ + temp = colorList[j + 1]; \n\ + colorList[j + 1] = colorList[j]; \n\ + colorList[j] = temp; \n\ + } \n\ + } \n\ + } \n\ +} \n\ + \n\ +// Blend fragments back-to-front \n\ +vec4 resolveAlphaBlend(ivec2 coords, int abNumFrag){ \n\ + \n\ + // Copy fragments in local array \n\ + fillFragmentArray(coords, abNumFrag); \n\ + \n\ + // Sort fragments in local memory array \n\ + bubbleSort(abNumFrag); \n\ + \n\ + vec4 finalColor = colorList[abNumFrag - 1]; \n\ + for (int i = abNumFrag - 2; i >= 0; i--) { \n\ + vec4 col = colorList[i]; \n\ + \n\ + int srcBlend = int(depthBlendList[i].y) / 8; \n\ + int dstBlend = int(depthBlendList[i].y) % 8; \n\ + if (srcBlend == 1 && dstBlend == 0) \n\ + finalColor = col; \n\ + else if (srcBlend == 4 && dstBlend == 5) \n\ + finalColor = finalColor * (1 - col.a) + col * (col.a); \n\ + else if (srcBlend == 4 && dstBlend == 1) \n\ + finalColor = finalColor + col * (col.a); \n\ + else if (srcBlend == 1 && dstBlend == 1) \n\ + finalColor = finalColor + col; \n\ + } \n\ + finalColor.a = 1; \n\ + return finalColor; \n\ + \n\ +} \n\ + \n\ +void main(void) \n\ +{ \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + int abNumFrag = int(imageLoad(abufferCounterImg, coords).r); \n\ + // Crash without this (WTF ?) \n\ + if (abNumFrag < 0) \n\ + abNumFrag = 0; \n\ + if (abNumFrag > ABUFFER_SIZE) \n\ + abNumFrag = ABUFFER_SIZE; \n\ + if (abNumFrag > 0) { \n\ + // Compute and output final color for the frame buffer \n\ + //If we only want the closest fragment \n\ + //int minIdx = resolveClosest(coords, abNumFrag); \n\ + //FragColor = vec4(float(abNumFrag) / ABUFFER_SIZE, 0, 0, 1); \n\ + //FragColor = imageLoad(abufferImg, ivec3(coords, 0)); \n\ + FragColor = resolveAlphaBlend(coords, abNumFrag); \n\ + } \n\ + else \n\ + // If no fragment, write nothing \n\ + discard; \n\ + \n\ +} \n\ +"; + +static const char *clear_shader_source = "\ +#version 140 \n\ +#extension GL_EXT_shader_image_load_store : enable \n\ +coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ +coherent uniform layout(size4x32) image2DArray abufferImg; \n\ +coherent uniform layout(size2x32) image2DArray abufferBlendingImg; \n\ + \n\ +void main(void) \n\ +{ \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + \n\ + // Reset counter \n\ + imageStore(abufferCounterImg, coords, uvec4(0)); \n\ + \n\ + // FIXME should not be necessary \n\ + // Put black in first layer \n\ + //imageStore(abufferImg, ivec3(coords, 0), vec4(0)); \n\ + // Reset depth \n\ + //imageStore(abufferBlendingImg, ivec3(coords, 0), vec4(0)); \n\ + \n\ + // Discard fragment so nothing is writen to the framebuffer \n\ + discard; \n\ +} \n\ +"; + +void initABuffer() +{ + g_imageWidth = screen_width; + g_imageHeight = screen_height; + + if (abufferTexID == 0) + abufferTexID = glcache.GenTexture(); + glActiveTexture(GL_TEXTURE3); glCheck(); + glBindTexture(GL_TEXTURE_2D_ARRAY, abufferTexID); glCheck(); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glCheck(); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glCheck(); + glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA32F, g_imageWidth, g_imageHeight, ABUFFER_SIZE, 0, GL_RGBA, GL_FLOAT, 0); glCheck(); + glBindImageTexture(3, abufferTexID, 0, true, 0, GL_READ_WRITE, GL_RGBA32F); + glCheck(); + + if (abufferCounterTexID == 0) + abufferCounterTexID = glcache.GenTexture(); + glActiveTexture(GL_TEXTURE4); + glBindTexture(GL_TEXTURE_2D, abufferCounterTexID); + + // Set filter + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + //Texture creation + //Uses GL_R32F instead of GL_R32I that is not working in R257.15 + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, g_imageWidth, g_imageHeight, 0, GL_RED, GL_FLOAT, 0); + glBindImageTexture(4, abufferCounterTexID, 0, false, 0, GL_READ_WRITE, GL_R32UI); + glCheck(); + + if (abufferBlendingTexID == 0) + abufferBlendingTexID = glcache.GenTexture(); + glActiveTexture(GL_TEXTURE5); + glBindTexture(GL_TEXTURE_2D_ARRAY, abufferBlendingTexID); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RG32F, g_imageWidth, g_imageHeight, ABUFFER_SIZE, 0, GL_RG, GL_FLOAT, 0); + glBindImageTexture(5, abufferBlendingTexID, 0, true, 0, GL_READ_WRITE, GL_RG32F); + + if (g_abuffer_final_shader.program == 0) + CompilePipelineShader(&g_abuffer_final_shader, final_shader_source); + if (g_abuffer_clear_shader.program == 0) + CompilePipelineShader(&g_abuffer_clear_shader, clear_shader_source); + + glCheck(); +} + +void reshapeABuffer(int w, int h) +{ + if (w != g_imageWidth || h != g_imageHeight) { + glcache.DeleteTextures(1, &abufferTexID); + abufferTexID = 0; + glcache.DeleteTextures(1, &abufferCounterTexID); + abufferCounterTexID = 0; + glcache.DeleteTextures(1, &abufferBlendingTexID); + abufferBlendingTexID = 0; + + initABuffer(); + } +} + +void renderABuffer() { + glActiveTexture(GL_TEXTURE3); + glBindTexture(GL_TEXTURE_2D_ARRAY, abufferTexID); + glCheck(); + glActiveTexture(GL_TEXTURE4); + glBindTexture(GL_TEXTURE_2D, abufferCounterTexID); + glCheck(); + glActiveTexture(GL_TEXTURE5); + glBindTexture(GL_TEXTURE_2D_ARRAY, abufferBlendingTexID); + glCheck(); + + glcache.UseProgram(g_abuffer_final_shader.program); + ShaderUniforms.Set(&g_abuffer_final_shader); + + glcache.Disable(GL_BLEND); + glcache.Disable(GL_DEPTH_TEST); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + DrawQuad(); + + glCheck(); + + glcache.UseProgram(g_abuffer_clear_shader.program); + ShaderUniforms.Set(&g_abuffer_clear_shader); + + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + DrawQuad(); + + glCheck(); +} diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 55f22dae0..ecc8c4bbc 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -80,8 +80,10 @@ const static u32 SrcBlendGL[] = PipelineShader* CurrentShader; u32 gcflip; -GLuint fbo; +GLuint geom_fbo; GLuint stencilTexId; +//GLuint opaqueTexId; +GLuint depthTexId; s32 SetTileClip(u32 val, bool set) { @@ -155,11 +157,41 @@ static void SetTextureRepeatMode(GLuint dir, u32 clamp, u32 mirror) } template - void SetGPState(const PolyParam* gp, bool weighted_average = false, u32 front_peeling = 0, u32 cflip=0) + void SetGPState(const PolyParam* gp, bool weighted_average = false, u32 front_peeling = 0, bool geometry_only = false, u32 cflip=0) { - CurrentShader = gl.getShader( - GetProgramID(Type == ListType_Punch_Through ? 1 : 0, - SetTileClip(gp->tileclip, false) + 1, + s32 clipping = SetTileClip(gp->tileclip, false); + int shaderId; + if (geometry_only) + { + shaderId = GetProgramID(Type == ListType_Punch_Through ? 1 : 0, + clipping + 1, + Type == ListType_Punch_Through ? gp->pcw.Texture : 0, + 0, + gp->tsp.IgnoreTexA, + 0, + 0, + 2, + false, + 1); // FIXME Hack: using front peeling to avoid writing to 3D array + CurrentShader = gl.getShader(shaderId); + if (CurrentShader->program == -1) { + CurrentShader->cp_AlphaTest = Type == ListType_Punch_Through ? 1 : 0; + CurrentShader->pp_ClipTestMode = clipping; + CurrentShader->pp_Texture = Type == ListType_Punch_Through ? gp->pcw.Texture : 0; + CurrentShader->pp_UseAlpha = 0; + CurrentShader->pp_IgnoreTexA = gp->tsp.IgnoreTexA; + CurrentShader->pp_ShadInstr = 0; + CurrentShader->pp_Offset = 0; + CurrentShader->pp_FogCtrl = 2; + CurrentShader->pp_WeightedAverage = false; + CurrentShader->pp_FrontPeeling = 1; + CompilePipelineShader(CurrentShader); + } + } + else + { + shaderId = GetProgramID(Type == ListType_Punch_Through ? 1 : 0, + clipping + 1, gp->pcw.Texture, gp->tsp.UseAlpha, gp->tsp.IgnoreTexA, @@ -167,22 +199,34 @@ template gp->pcw.Offset, gp->tsp.FogCtrl, weighted_average, - front_peeling)); - - if (CurrentShader->program == -1) { - CurrentShader->cp_AlphaTest = Type == ListType_Punch_Through ? 1 : 0; - CurrentShader->pp_ClipTestMode = SetTileClip(gp->tileclip, false); - CurrentShader->pp_Texture = gp->pcw.Texture; - CurrentShader->pp_UseAlpha = gp->tsp.UseAlpha; - CurrentShader->pp_IgnoreTexA = gp->tsp.IgnoreTexA; - CurrentShader->pp_ShadInstr = gp->tsp.ShadInstr; - CurrentShader->pp_Offset = gp->pcw.Offset; - CurrentShader->pp_FogCtrl = gp->tsp.FogCtrl; - CurrentShader->pp_WeightedAverage = weighted_average; - CurrentShader->pp_FrontPeeling = front_peeling; - CompilePipelineShader(CurrentShader); + front_peeling); + CurrentShader = gl.getShader(shaderId); + if (CurrentShader->program == -1) { + CurrentShader->cp_AlphaTest = Type == ListType_Punch_Through ? 1 : 0; + CurrentShader->pp_ClipTestMode = clipping; + CurrentShader->pp_Texture = gp->pcw.Texture; + CurrentShader->pp_UseAlpha = gp->tsp.UseAlpha; + CurrentShader->pp_IgnoreTexA = gp->tsp.IgnoreTexA; + CurrentShader->pp_ShadInstr = gp->tsp.ShadInstr; + CurrentShader->pp_Offset = gp->pcw.Offset; + CurrentShader->pp_FogCtrl = gp->tsp.FogCtrl; + CurrentShader->pp_WeightedAverage = weighted_average; + CurrentShader->pp_FrontPeeling = front_peeling; + CompilePipelineShader(CurrentShader); + } } + glcache.UseProgram(CurrentShader->program); + if (Type == ListType_Opaque || Type == ListType_Punch_Through) // TODO Can PT have a non-zero and non-one alpha? + { + ShaderUniforms.blend_mode[0] = 1; + ShaderUniforms.blend_mode[1] = 0; + } + else + { + ShaderUniforms.blend_mode[0] = gp->tsp.SrcInstr; + ShaderUniforms.blend_mode[1] = gp->tsp.DstInstr; + } ShaderUniforms.Set(CurrentShader); SetTileClip(gp->tileclip,true); @@ -246,18 +290,18 @@ template glcache.DepthFunc(Zfunction[gp->isp.DepthMode]); } -#if TRIG_SORT - if (SortingEnabled && !front_peeling) +//#if TRIG_SORT + if (Type == ListType_Translucent) glcache.DepthMask(GL_FALSE); else -#endif +//#endif if (!weighted_average) glcache.DepthMask(!gp->isp.ZWriteDis); } template void DrawList(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0, - int srcBlendModeFilter = -1, int dstBlendModeFilter = -1) + int srcBlendModeFilter = -1, int dstBlendModeFilter = -1, bool geometry_only = false) { PolyParam* params = &gply.head()[first]; @@ -286,7 +330,7 @@ void DrawList(const List& gply, int first, int count, bool weighted_a } } - SetGPState(params, weighted_average, front_peeling); + SetGPState(params, weighted_average, front_peeling, geometry_only); glDrawElements(GL_TRIANGLE_STRIP, params->count, GL_UNSIGNED_SHORT, (GLvoid*)(2*params->first)); glCheck(); } @@ -299,15 +343,23 @@ void DrawListTranslucentAutoSorted(const List& gply, int first, int c { DrawList(gply, first, count, weighted_average, front_peeling, srcBlendModeFilter, dstBlendModeFilter); } + void DrawListOpaque(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0) { DrawList(gply, first, count, weighted_average, front_peeling); } + void DrawListPunchThrough(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0) { DrawList(gply, first, count, weighted_average, front_peeling); } +template +void DrawListGeometry(const List& gply, int first, int count) +{ + DrawList(gply, first, count, false, 0, -1, -1, true); +} + bool operator<(const PolyParam &left, const PolyParam &right) { /* put any condition you want to sort on here */ @@ -1109,24 +1161,52 @@ void RenderAverageColors(); void RenderWeightedBlended(); void RenderFrontToBackPeeling(int first, int count); void DualPeelingReshape(int w, int h); +void renderABuffer(); + +void CreateGeometryTexture() +{ + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + + stencilTexId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); glCheck(); + glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); // OpenGL >= 4.3 + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + // Using glTexStorage2D instead of glTexImage2D to satisfy requirement GL_TEXTURE_IMMUTABLE_FORMAT=true, needed for glTextureView below + glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH32F_STENCIL8, screen_width, screen_height); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencilTexId, 0); glCheck(); + glCheck(); + +// opaqueTexId = glcache.GenTexture(); +// glcache.BindTexture(GL_TEXTURE_2D, opaqueTexId); +// glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); +// glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); +// glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screen_width, screen_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); +// glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, opaqueTexId, 0); +// glCheck(); + depthTexId = glcache.GenTexture(); + glTextureView(depthTexId, GL_TEXTURE_2D, stencilTexId, GL_DEPTH32F_STENCIL8, 0, 1, 0, 1); + glCheck(); + glcache.BindTexture(GL_TEXTURE_2D, depthTexId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glCheck(); +} void DrawStrips() { - if (fbo == 0) + if (geom_fbo == 0) { - glGenFramebuffers(1, &fbo); - glBindFramebuffer(GL_FRAMEBUFFER, fbo); // Bind framebuffer 0 to use the system fb + glGenFramebuffers(1, &geom_fbo); + CreateGeometryTexture(); - stencilTexId = glcache.GenTexture(); - glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); glCheck(); - glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, screen_width, screen_height, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL); glCheck(); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencilTexId, 0); glCheck(); - -// GLuint colortexid = glcache.GenTexture(); -// glcache.BindTexture(GL_TEXTURE_2D, colortexid); -// -// glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screen_width, screen_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); glCheck(); -// glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colortexid, 0); glCheck(); + // Color buffer. Not normally needed + //GLuint colortexid = glcache.GenTexture(); + //glcache.BindTexture(GL_TEXTURE_2D, colortexid); + // + //glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screen_width, screen_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); glCheck(); + //glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colortexid, 0); glCheck(); GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); @@ -1134,13 +1214,10 @@ void DrawStrips() } else { - glBindFramebuffer(GL_FRAMEBUFFER, fbo); + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); if (stencilTexId == 0) { - stencilTexId = glcache.GenTexture(); - glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); glCheck(); - glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, screen_width, screen_height, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL); glCheck(); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencilTexId, 0); glCheck(); + CreateGeometryTexture(); } glcache.Disable(GL_SCISSOR_TEST); glcache.DepthMask(GL_TRUE); @@ -1165,40 +1242,50 @@ void DrawStrips() glcache.DepthMask(GL_TRUE); // Do a first pass on the depth+stencil buffer - glBindFramebuffer(GL_FRAMEBUFFER, fbo); + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count); - DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count); + DrawListGeometry(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count); + DrawListGeometry(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count); // Modifier volumes DrawModVols(previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); glBindFramebuffer(GL_FRAMEBUFFER, 0); glCheck(); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); +// glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); // Bind stencil buffer for the fragment shader (shadowing) - glActiveTexture(GL_TEXTURE1); + glActiveTexture(GL_TEXTURE2); glBindTexture(GL_TEXTURE_2D, stencilTexId); - glTexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glActiveTexture(GL_TEXTURE0); glCheck(); + // Bind depth texture for manual depth testing in fragment shader + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, depthTexId); + glActiveTexture(GL_TEXTURE0); + //Opaque DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count); //Alpha tested DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count); + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + //Alpha blended { + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); + glCheck(); + +glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + renderABuffer(); // if (hack_on) // RenderAverageColors(); // else - RenderFrontToBackPeeling(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); + // RenderFrontToBackPeeling(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); //RenderWeightedBlended(); // if (pvrrc.isAutoSort) // GenSorted(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 39e79cfea..fce03dcf8 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -191,8 +191,8 @@ const char* PixelPipelineShader = #if pp_WeightedAverage == 1 \n\ #extension GL_ARB_draw_buffers : require \n\ #endif \n\ -#if pp_FrontPeeling == 2 \n\ -uniform sampler2DRect DepthTex; \n\ +#if pp_FrontPeeling != 1 // FIXME \n\ +uniform sampler2D DepthTex; \n\ #endif \n" #ifndef GLES "\ @@ -209,9 +209,17 @@ uniform lowp vec3 sp_FOG_COL_RAM,sp_FOG_COL_VERT; \n\ uniform highp vec2 sp_LOG_FOG_COEFS; \n\ uniform highp float sp_FOG_DENSITY; \n\ uniform highp float shade_scale_factor; \n\ -uniform highp vec2 screen_size; \n\ +uniform lowp vec2 screen_size; \n\ uniform sampler2D tex,fog_table; \n\ -uniform usampler2D shadow_stencil; \n\ +#if pp_WeightedAverage == 0 && pp_FrontPeeling == 0 \n\ + uniform usampler2D shadow_stencil; \n\ +#endif \n\ +#extension GL_EXT_shader_image_load_store : enable \n\ +#define ABUFFER_SIZE 32 \n\ +uniform uvec2 blend_mode; \n\ +coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ +coherent uniform layout(size4x32) image2DArray abufferImg; \n\ +coherent uniform layout(size2x32) image2DArray abufferBlendingImg; \n\ /* Vertex input*/ \n\ " vary " lowp vec4 vtx_base; \n\ " vary " lowp vec4 vtx_offs; \n\ @@ -230,9 +238,16 @@ void main() \n\ gl_FragDepth = 1 - log2(1.0 + w) / 34; \n" #endif "\ + #if pp_FrontPeeling != 1 // FIXME \n\ + // Manual depth testing \n\ + highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ + // FIXME this causes dots to appear. Loss of precision? \n\ + //if (gl_FragDepth > frontDepth - 1e-8) // FIXME the TA depth test is ignored \n\ + // discard; \n\ + #endif \n\ #if pp_FrontPeeling == 2 \n\ // Bit-exact comparison between FP32 z-buffer and fragment depth \n\ - highp float frontDepth = texture2DRect(DepthTex, gl_FragCoord.xy).r; \n\ + highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ if (gl_FragDepth <= frontDepth) { \n\ discard; \n\ } \n\ @@ -299,10 +314,12 @@ void main() \n\ #endif\n\ } \n\ #endif\n\ - //uvec4 stencil = texture(shadow_stencil, vec2(gl_FragCoord.x / 1280, gl_FragCoord.y / 960)); \n\ - uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / screen_size); \n\ - if (stencil.r == uint(0x81)) \n\ - color.rgb *= shade_scale_factor; \n\ + #if pp_WeightedAverage == 0 && pp_FrontPeeling == 0 \n\ + //uvec4 stencil = texture(shadow_stencil, vec2(gl_FragCoord.x / 1280, gl_FragCoord.y / 960)); \n\ + uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / screen_size); \n\ + if (stencil.r == uint(0x81)) \n\ + color.rgb *= shade_scale_factor; \n\ + #endif\n\ #if pp_FogCtrl==0 // LUT \n\ { \n\ color.rgb=mix(color.rgb,sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ @@ -334,7 +351,16 @@ void main() \n\ #elif pp_FrontPeeling == 2 \n" FRAGCOL " = vec4(color.rgb * color.a, color.a); \n\ #else \n" - FRAGCOL "=color; \n\ +// FRAGCOL "=color; \n +" ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + int abidx = int(imageAtomicAdd(abufferCounterImg, coords, uint(1))); \n\ + if (abidx >= ABUFFER_SIZE) \n\ + discard; \n\ + vec4 blend_val = vec4(gl_FragDepth, float(blend_mode.x) * 8 + float(blend_mode.y), 0, 0); \n\ + ivec3 coords3 = ivec3(coords, abidx); \n\ + imageStore(abufferImg, coords3, color); \n\ + imageStore(abufferBlendingImg, coords3, blend_val); \n\ + \n\ #endif \n\ }"; @@ -835,14 +861,25 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe // Depth peeling: use texture 1 for depth texture gu = glGetUniformLocation(s->program, "DepthTex"); if (gu != -1) - glUniform1i(gu, 1); - else - { - // Shadow stencil for OP/PT rendering pass - gu = glGetUniformLocation(s->program, "shadow_stencil"); - if (gu != -1) - glUniform1i(gu, 1); - } + glUniform1i(gu, 1); // GL_TEXTURE1 + + // Shadow stencil for OP/PT rendering pass + gu = glGetUniformLocation(s->program, "shadow_stencil"); + if (gu != -1) + glUniform1i(gu, 2); // GL_TEXTURE2 + + // A-buffers + gu = glGetUniformLocation(s->program, "abufferImg"); + if (gu != -1) + glUniform1i(gu, 3); // GL_TEXTURE3 + gu = glGetUniformLocation(s->program, "abufferCounterImg"); + if (gu != -1) + glUniform1i(gu, 4); // GL_TEXTURE4 + gu = glGetUniformLocation(s->program, "abufferBlendingImg"); + if (gu != -1) + glUniform1i(gu, 5); // GL_TEXTURE5 + + s->blend_mode = glGetUniformLocation(s->program, "blend_mode"); return glIsProgram(s->program)==GL_TRUE; } @@ -958,7 +995,7 @@ GLuint gl_CompileShader(const char* shader,GLuint type); bool gl_create_resources(); //setup - +extern void initABuffer(); bool gles_init() { @@ -986,6 +1023,7 @@ bool gles_init() #ifdef GLES glHint(GL_GENERATE_MIPMAP_HINT, GL_FASTEST); #endif + initABuffer(); return true; } @@ -1863,6 +1901,8 @@ void rend_set_fb_scale(float x,float y) fb_scale_y=y; } +void reshapeABuffer(int w, int h); + struct glesrend : Renderer { bool Init() { return gles_init(); } @@ -1876,6 +1916,7 @@ struct glesrend : Renderer glcache.DeleteTextures(1, &stencilTexId); stencilTexId = 0; } + reshapeABuffer(w, h); } void Term() { } diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 48fdccaeb..f23dc2cea 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -57,6 +57,7 @@ struct PipelineShader GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY,sp_LOG_FOG_COEFS; GLuint shade_scale_factor; GLuint screen_size; + GLuint blend_mode; // u32 cp_AlphaTest; s32 pp_ClipTestMode; @@ -146,6 +147,7 @@ struct ShaderUniforms_t float ps_FOG_COL_RAM[3]; float ps_FOG_COL_VERT[3]; float fog_coefs[2]; + GLuint blend_mode[2]; void Set(PipelineShader* s) { @@ -175,6 +177,9 @@ struct ShaderUniforms_t if (s->shade_scale_factor != -1) glUniform1f(s->shade_scale_factor, FPU_SHAD_SCALE.scale_factor / 256.f); + + if (s->blend_mode != -1) + glUniform2uiv(s->blend_mode, 1, blend_mode); } }; @@ -186,6 +191,7 @@ bool CompilePipelineShader(PipelineShader* s, const char *source = PixelPipeline GLuint loadPNG(const string& subpath, int &width, int &height); extern GLuint stencilTexId; +extern GLuint depthTexId; void DrawListTranslucentAutoSorted(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0, int srcBlendModeFilter = -1, int dstBlendModeFilter = -1); void DrawListOpaque(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0); diff --git a/core/rend/gles/render_tr.cpp b/core/rend/gles/render_tr.cpp index a82e60536..66cba0a06 100644 --- a/core/rend/gles/render_tr.cpp +++ b/core/rend/gles/render_tr.cpp @@ -1,9 +1,5 @@ #include "glcache.h" -//#define GL_TEXTURE_RECTANGLE_ARB 0x84F5 -#define GL_RGB16F_ARB 0x881B -#define GL_RGBA16F_ARB 0x881A - static int g_imageWidth = 0; static int g_imageHeight = 0; static GLuint g_quadBuffer = 0; @@ -236,7 +232,7 @@ GLuint g_frontDepthTexId[2]; GLuint g_frontColorTexId[2]; GLuint g_frontColorBlenderTexId; GLuint g_frontColorBlenderFboId; -GLuint g_frontDepthInitTexId; +//GLuint g_frontDepthInitTexId; GLuint g_samples_query; PipelineShader g_front_blend_shader; PipelineShader g_front_final_shader; @@ -244,11 +240,12 @@ PipelineShader g_front_final_shader; const char *front_blend_fragment_source = "\ #version 140 \n\ out vec4 FragColor; \n\ -uniform sampler2DRect TempTex; \n\ +uniform sampler2D TempTex; \n\ +uniform lowp vec2 screen_size; \n\ \n\ void main(void) \n\ { \n\ - FragColor = texture2DRect(TempTex, gl_FragCoord.xy); \n\ + FragColor = texture(TempTex, gl_FragCoord.xy / screen_size); \n\ } \n\ "; @@ -256,11 +253,12 @@ void main(void) \n\ const char *front_final_fragment_source = "\ #version 140 \n\ out vec4 FragColor; \n\ -uniform sampler2DRect ColorTex; \n\ +uniform sampler2D ColorTex; \n\ +uniform lowp vec2 screen_size; \n\ \n\ void main(void) \n\ { \n\ - vec4 frontColor = texture2DRect(ColorTex, gl_FragCoord.xy); \n\ + vec4 frontColor = texture(ColorTex, gl_FragCoord.xy / screen_size); \n\ // if (frontColor.a >= 0.99) { \n\ // FragColor = vec4(0, 0, 0, 0); \n\ // return; \n\ @@ -278,53 +276,53 @@ void InitFrontPeelingRenderTargets() for (int i = 0; i < 2; i++) { - glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_frontDepthTexId[i]); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_DEPTH_COMPONENT, - g_imageWidth, g_imageHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL); + glcache.BindTexture(GL_TEXTURE_2D, g_frontDepthTexId[i]); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH32F_STENCIL8, g_imageWidth, g_imageHeight, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, NULL); - glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_frontColorTexId[i]); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA, g_imageWidth, g_imageHeight, + glcache.BindTexture(GL_TEXTURE_2D, g_frontColorTexId[i]); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, g_imageWidth, g_imageHeight, 0, GL_RGBA, GL_FLOAT, 0); glBindFramebuffer(GL_FRAMEBUFFER, g_frontFboId[i]); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, - GL_TEXTURE_RECTANGLE, g_frontDepthTexId[i], 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, g_frontDepthTexId[i], 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_RECTANGLE, g_frontColorTexId[i], 0); + GL_TEXTURE_2D, g_frontColorTexId[i], 0); } g_frontColorBlenderTexId = glcache.GenTexture(); - glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_frontColorBlenderTexId); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA, g_imageWidth, g_imageHeight, + glcache.BindTexture(GL_TEXTURE_2D, g_frontColorBlenderTexId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, g_imageWidth, g_imageHeight, 0, GL_RGBA, GL_FLOAT, 0); - g_frontDepthInitTexId = glcache.GenTexture(); - glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_frontDepthInitTexId); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_DEPTH_COMPONENT, - g_imageWidth, g_imageHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL); +// g_frontDepthInitTexId = glcache.GenTexture(); +// glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_frontDepthInitTexId); +// glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); +// glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); +// glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); +// glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); +// glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_DEPTH_COMPONENT, +// g_imageWidth, g_imageHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL); glGenFramebuffers(1, &g_frontColorBlenderFboId); glBindFramebuffer(GL_FRAMEBUFFER, g_frontColorBlenderFboId); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, - GL_TEXTURE_RECTANGLE, g_frontDepthTexId[0], 0); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, + GL_TEXTURE_2D, g_frontDepthTexId[0], 0); glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_RECTANGLE, g_frontColorBlenderTexId, 0); + GL_TEXTURE_2D, g_frontColorBlenderTexId, 0); glGenQueries(1, &g_samples_query); @@ -338,10 +336,11 @@ void DeleteFrontPeelingRenderTargets() glcache.DeleteTextures(2, g_frontDepthTexId); glcache.DeleteTextures(2, g_frontColorTexId); glcache.DeleteTextures(1, &g_frontColorBlenderTexId); - glcache.DeleteTextures(1, &g_frontDepthInitTexId); +// glcache.DeleteTextures(1, &g_frontDepthInitTexId); glDeleteQueries(1, &g_samples_query); } +extern GLuint geom_fbo; //-------------------------------------------------------------------------- void RenderFrontToBackPeeling(int first, int count) { @@ -349,27 +348,37 @@ void RenderFrontToBackPeeling(int first, int count) // 1. Initialize Min Depth Buffer // --------------------------------------------------------------------- + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + glBindTexture(GL_TEXTURE_2D, g_frontDepthTexId[0]); + // FIXME: + // GL_DEPTH_STENCIL is super slow + // GL_DEPTH32F_STENCIL8 is fast but doesn't seem to work (depth values are wrong?!?!?) + // GL_DEPTH24_STENCIL8 seems to work but is super slow + // GL_DEPTH_COMPONENT32F fails + // GL_FLOAT_32_UNSIGNED_INT_24_8_REV fails + glCopyTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH32F_STENCIL8, 0, 0, g_imageWidth, g_imageHeight, 0); + glBindTexture(GL_TEXTURE_2D, 0); + glCheck(); + glBindFramebuffer(GL_FRAMEBUFFER, g_frontColorBlenderFboId); glDrawBuffer(g_drawBuffers[0]); glcache.DepthMask(true); glcache.Enable(GL_DEPTH_TEST); - glClearDepthf(1); glcache.ClearColor(0, 0, 0, 1); - glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + glClear(GL_COLOR_BUFFER_BIT /* | GL_DEPTH_BUFFER_BIT */); glcache.Disable(GL_BLEND); + + // Hack on + glClear(GL_DEPTH_BUFFER_BIT); // TODO Hack to get the depth from OP and PT passes. glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - DrawListOpaque(pvrrc.global_param_op, 0, pvrrc.global_param_op.used(), false, 1); DrawListPunchThrough(pvrrc.global_param_pt, 0, pvrrc.global_param_pt.used(), false, 1); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - - glBindTexture(GL_TEXTURE_RECTANGLE, g_frontDepthInitTexId); - glCopyTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_DEPTH_COMPONENT, 0, 0, g_imageWidth, g_imageHeight, 0); + // Hack off DrawListTranslucentAutoSorted(pvrrc.global_param_tr, first, count, false, 1, 4, 5); // FIXME initial pass for other blend modes @@ -389,15 +398,16 @@ else int currId = layer % 2; int prevId = 1 - currId; + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + glBindTexture(GL_TEXTURE_2D, g_frontDepthTexId[currId]); + glCopyTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH32F_STENCIL8, 0, 0, g_imageWidth, g_imageHeight, 0); + glBindTexture(GL_TEXTURE_2D, 0); + glCheck(); + glBindFramebuffer(GL_FRAMEBUFFER, g_frontFboId[currId]); glDrawBuffer(g_drawBuffers[0]); - - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, - GL_TEXTURE_RECTANGLE, g_frontDepthInitTexId, 0); - glBindTexture(GL_TEXTURE_RECTANGLE, g_frontDepthTexId[currId]); - glCopyTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_DEPTH_COMPONENT, 0, 0, g_imageWidth, g_imageHeight, 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, - GL_TEXTURE_RECTANGLE, g_frontDepthTexId[currId], 0); +// glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, g_frontDepthTexId[currId], 0); + glCheck(); glcache.ClearColor(0, 0, 0, 0); glClear(GL_COLOR_BUFFER_BIT); @@ -405,8 +415,9 @@ else glcache.Enable(GL_DEPTH_TEST); glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_RECTANGLE, g_frontDepthTexId[prevId]); // DepthTex + glBindTexture(GL_TEXTURE_2D, g_frontDepthTexId[prevId]); // DepthTex glActiveTexture(GL_TEXTURE0); + glCheck(); glBeginQuery(GL_SAMPLES_PASSED, g_samples_query); // Peeling shader @@ -443,7 +454,7 @@ else glcache.UseProgram(g_front_blend_shader.program); ShaderUniforms.Set(&g_front_blend_shader); glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_RECTANGLE, g_frontColorTexId[currId]); // TempTex + glBindTexture(GL_TEXTURE_2D, g_frontColorTexId[currId]); // TempTex glActiveTexture(GL_TEXTURE0); // Blending shader @@ -474,7 +485,7 @@ else glcache.UseProgram(g_front_final_shader.program); ShaderUniforms.Set(&g_front_final_shader); glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_RECTANGLE, g_frontColorBlenderTexId); // ColorTex + glBindTexture(GL_TEXTURE_2D, g_frontColorBlenderTexId); // ColorTex glActiveTexture(GL_TEXTURE0); // Final blending From d23ce4b24aa058b47cdefc024c8f037fd43b6f88 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 27 May 2018 21:34:18 +0200 Subject: [PATCH 05/65] Removed previous methods. Better a-buffer impl. --- core/rend/gles/abuffer.cpp | 288 ++++++++++-- core/rend/gles/gldraw.cpp | 818 +++++------------------------------ core/rend/gles/gles.cpp | 160 +++---- core/rend/gles/gles.h | 18 +- core/rend/gles/render_tr.cpp | 538 ----------------------- core/rend/soft/softrend.cpp | 49 +++ 6 files changed, 464 insertions(+), 1407 deletions(-) delete mode 100644 core/rend/gles/render_tr.cpp diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 79a7e8908..64bb4a2f0 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -6,38 +6,38 @@ */ #include "glcache.h" -#define ABUFFER_SIZE 32 - GLuint abufferTexID = 0; GLuint abufferBlendingTexID = 0; GLuint abufferCounterTexID = 0; PipelineShader g_abuffer_final_shader; PipelineShader g_abuffer_clear_shader; +PipelineShader g_abuffer_pass2_shader; +static GLuint g_quadBuffer = 0; +static GLuint g_quadVertexArray = 0; static int g_imageWidth = 0; static int g_imageHeight = 0; -extern void DrawQuad(); - static const char *final_shader_source = "\ #version 140 \n\ #extension GL_EXT_shader_image_load_store : enable \n\ -#define ABUFFER_SIZE 32 \n\ +#define ABUFFER_SIZE %d \n\ out vec4 FragColor; \n\ uniform layout(size1x32) uimage2D abufferCounterImg; \n\ uniform layout(size4x32) image2DArray abufferImg; \n\ uniform layout(size2x32) image2DArray abufferBlendingImg; \n\ uniform lowp vec2 screen_size; \n\ +uniform int sortFragments; \n\ \n\ vec4 colorList[ABUFFER_SIZE]; \n\ vec4 depthBlendList[ABUFFER_SIZE]; \n\ \n\ -int resolveClosest(ivec2 coords, int abNumFrag) { \n\ +int resolveClosest(ivec2 coords, int num_frag) { \n\ \n\ // Search smallest z \n\ float minZ = 1000000.0f; \n\ int minIdx; \n\ - for (int i = 0; i < abNumFrag; i++) { \n\ + for (int i = 0; i < num_frag; i++) { \n\ vec4 val = imageLoad(abufferBlendingImg, ivec3(coords, i)); \n\ if (val.x < minZ) { \n\ minZ = val.x; \n\ @@ -50,9 +50,9 @@ int resolveClosest(ivec2 coords, int abNumFrag) { \n\ } \n\ \n\ \n\ -int findOpaque(ivec2 coords, int abNumFrag) { \n\ +int findOpaque(ivec2 coords, int num_frag) { \n\ \n\ - for (int i = 0; i < abNumFrag; i++) { \n\ + for (int i = 0; i < num_frag; i++) { \n\ vec4 val = imageLoad(abufferBlendingImg, ivec3(coords, i)); \n\ if (round(val.y) == 8) { \n\ return i; \n\ @@ -62,9 +62,9 @@ int findOpaque(ivec2 coords, int abNumFrag) { \n\ return 0; \n\ } \n\ \n\ -void fillFragmentArray(ivec2 coords, int abNumFrag) { \n\ +void fillFragmentArray(ivec2 coords, int num_frag) { \n\ // Load fragments into a local memory array for sorting \n\ - for (int i = 0; i < abNumFrag; i++) { \n\ + for (int i = 0; i < num_frag; i++) { \n\ colorList[i] = imageLoad(abufferImg, ivec3(coords, i)); \n\ depthBlendList[i] = imageLoad(abufferBlendingImg, ivec3(coords, i)); \n\ } \n\ @@ -72,43 +72,132 @@ void fillFragmentArray(ivec2 coords, int abNumFrag) { \n\ \n\ // Bubble sort used to sort fragments \n\ void bubbleSort(int array_size) { \n\ - for (int i = (array_size - 2); i >= 0; --i) { \n\ - for (int j = 0; j <= i; ++j) { \n\ - if (depthBlendList[j].x > depthBlendList[j+1].x) { \n\ - vec4 temp = depthBlendList[j + 1]; \n\ - depthBlendList[j + 1] = depthBlendList[j]; \n\ - depthBlendList[j] = temp; \n\ - temp = colorList[j + 1]; \n\ - colorList[j + 1] = colorList[j]; \n\ - colorList[j] = temp; \n\ - } \n\ - } \n\ - } \n\ + for (int i = array_size - 2; i >= 0; i--) { \n\ + for (int j = 0; j <= i; j++) { \n\ +// depth only if (depthBlendList[j].x < depthBlendList[j + 1].x) { \n\ + if (depthBlendList[j].x < depthBlendList[j + 1].x || (depthBlendList[j].x == depthBlendList[j + 1].x && depthBlendList[j].z > depthBlendList[j + 1].z)) { \n\ +// if (depthBlendList[j].z > depthBlendList[j + 1].z) { \n\ + vec4 depthBlend = depthBlendList[j + 1]; \n\ + depthBlendList[j + 1] = depthBlendList[j]; \n\ + depthBlendList[j] = depthBlend; \n\ + vec4 color = colorList[j + 1]; \n\ + colorList[j + 1] = colorList[j]; \n\ + colorList[j] = color; \n\ + } \n\ + } \n\ + } \n\ +} \n\ + \n\ + \n\ +// Insertion sort used to sort fragments \n\ +void insertionSort(int array_size) { \n\ + for (int i = 1; i < array_size; i++) { \n\ + vec4 aDepth = depthBlendList[i]; \n\ + vec4 aColor = colorList[i]; \n\ + int j = i - 1; \n\ +// for (; j >= 0 && depthBlendList[j].z < aDepth.z; j--) { \n\ + for (; j >= 0 && (depthBlendList[j].x < aDepth.x || (depthBlendList[j].x == aDepth.x && depthBlendList[j].z > aDepth.z)); j--) { \n\ + depthBlendList[j + 1] = depthBlendList[j]; \n\ + colorList[j + 1] = colorList[j]; \n\ + } \n\ + depthBlendList[j + 1] = aDepth; \n\ + colorList[j + 1] = aColor; \n\ + } \n\ +} \n\ + \n\ +vec4 returnNthLayer(ivec2 coords, int num_frag, int layer) { \n\ + \n\ + // Copy fragments in local array \n\ + fillFragmentArray(coords, num_frag); \n\ + \n\ + // Sort fragments in local memory array \n\ + if (sortFragments != 0) \n\ + bubbleSort(num_frag); \n\ + \n\ + return vec4(colorList[min(layer, num_frag - 1)].rgb, 1); \n\ } \n\ \n\ // Blend fragments back-to-front \n\ -vec4 resolveAlphaBlend(ivec2 coords, int abNumFrag){ \n\ +vec4 resolveAlphaBlend(ivec2 coords, int num_frag){ \n\ \n\ // Copy fragments in local array \n\ - fillFragmentArray(coords, abNumFrag); \n\ + fillFragmentArray(coords, num_frag); \n\ \n\ // Sort fragments in local memory array \n\ - bubbleSort(abNumFrag); \n\ + if (sortFragments != 0) \n\ + bubbleSort(num_frag); \n\ + else if (num_frag > 1) \n\ + { \n\ + // FIXME This is wrong \n\ + vec4 depthBlend = depthBlendList[0]; \n\ + depthBlendList[0] = depthBlendList[num_frag - 1]; \n\ + depthBlendList[num_frag - 1] = depthBlend; \n\ + vec4 color = colorList[0]; \n\ + colorList[0] = colorList[num_frag - 1]; \n\ + colorList[num_frag - 1] = color; \n\ + } \n\ \n\ - vec4 finalColor = colorList[abNumFrag - 1]; \n\ - for (int i = abNumFrag - 2; i >= 0; i--) { \n\ - vec4 col = colorList[i]; \n\ + vec4 finalColor = colorList[0]; \n\ + for (int i = 1; i < num_frag; i++) { \n\ + vec4 srcColor = colorList[i]; \n\ + float srcAlpha = srcColor.a; \n\ + float dstAlpha = finalColor.a; \n\ \n\ int srcBlend = int(depthBlendList[i].y) / 8; \n\ + switch (srcBlend) \n\ + { \n\ + case 0: // zero \n\ + srcColor = vec4(0); \n\ + break; \n\ + case 1: // one \n\ + break; \n\ + case 2: // other color \n\ + srcColor *= finalColor; \n\ + break; \n\ + case 3: // inverse other color \n\ + srcColor *= vec4(1) - finalColor; \n\ + break; \n\ + case 4: // src alpha \n\ + srcColor *= srcAlpha; \n\ + break; \n\ + case 5: // inverse src alpha \n\ + srcColor *= 1 - srcAlpha; \n\ + break; \n\ + case 6: // dst alpha \n\ + srcColor *= dstAlpha; \n\ + break; \n\ + case 7: // inverse dst alpha \n\ + srcColor *= 1 - dstAlpha; \n\ + break; \n\ + } \n\ int dstBlend = int(depthBlendList[i].y) % 8; \n\ - if (srcBlend == 1 && dstBlend == 0) \n\ - finalColor = col; \n\ - else if (srcBlend == 4 && dstBlend == 5) \n\ - finalColor = finalColor * (1 - col.a) + col * (col.a); \n\ - else if (srcBlend == 4 && dstBlend == 1) \n\ - finalColor = finalColor + col * (col.a); \n\ - else if (srcBlend == 1 && dstBlend == 1) \n\ - finalColor = finalColor + col; \n\ + switch (dstBlend) \n\ + { \n\ + case 0: // zero \n\ + finalColor = vec4(0); \n\ + break; \n\ + case 1: // one \n\ + break; \n\ + case 2: // other color \n\ + finalColor *= colorList[i]; \n\ + break; \n\ + case 3: // inverse other color \n\ + finalColor *= vec4(1) - colorList[i]; \n\ + break; \n\ + case 4: // src alpha \n\ + finalColor *= srcAlpha; \n\ + break; \n\ + case 5: // inverse src alpha \n\ + finalColor *= 1 - srcAlpha; \n\ + break; \n\ + case 6: // dst alpha \n\ + finalColor *= dstAlpha; \n\ + break; \n\ + case 7: // inverse dst alpha \n\ + finalColor *= 1 - dstAlpha; \n\ + break; \n\ + } \n\ + finalColor = clamp(finalColor + srcColor, 0, 1); \n\ } \n\ finalColor.a = 1; \n\ return finalColor; \n\ @@ -118,19 +207,22 @@ vec4 resolveAlphaBlend(ivec2 coords, int abNumFrag){ \n\ void main(void) \n\ { \n\ ivec2 coords = ivec2(gl_FragCoord.xy); \n\ - int abNumFrag = int(imageLoad(abufferCounterImg, coords).r); \n\ + int num_frag = int(imageLoad(abufferCounterImg, coords).r); \n\ // Crash without this (WTF ?) \n\ - if (abNumFrag < 0) \n\ - abNumFrag = 0; \n\ - if (abNumFrag > ABUFFER_SIZE) \n\ - abNumFrag = ABUFFER_SIZE; \n\ - if (abNumFrag > 0) { \n\ + if (num_frag < 0) \n\ + num_frag = 0; \n\ + if (num_frag > ABUFFER_SIZE) \n\ + num_frag = ABUFFER_SIZE; \n\ + if (num_frag > 0) { \n\ // Compute and output final color for the frame buffer \n\ //If we only want the closest fragment \n\ - //int minIdx = resolveClosest(coords, abNumFrag); \n\ - //FragColor = vec4(float(abNumFrag) / ABUFFER_SIZE, 0, 0, 1); \n\ + //int minIdx = resolveClosest(coords, num_frag); \n\ + //FragColor = imageLoad(abufferImg, ivec3(coords, minIdx)); \n\ + // Visualize the number of layers in use \n\ + //FragColor = vec4(float(num_frag) / ABUFFER_SIZE, 0, 0, 1); \n\ //FragColor = imageLoad(abufferImg, ivec3(coords, 0)); \n\ - FragColor = resolveAlphaBlend(coords, abNumFrag); \n\ + //FragColor = returnNthLayer(coords, num_frag, 1); \n\ + FragColor = resolveAlphaBlend(coords, num_frag); \n\ } \n\ else \n\ // If no fragment, write nothing \n\ @@ -164,6 +256,35 @@ void main(void) \n\ } \n\ "; +// Renders the opaque and pt rendered texture into a-buffers +static const char *pass2_shader_source = "\ +#version 140 \n\ +#extension GL_EXT_shader_image_load_store : enable \n\ +coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ +coherent uniform layout(size4x32) image2DArray abufferImg; \n\ +coherent uniform layout(size2x32) image2DArray abufferBlendingImg; \n\ +uniform lowp vec2 screen_size; \n\ +uniform sampler2D DepthTex; \n\ +uniform sampler2D tex; \n\ + \n\ +void main(void) \n\ +{ \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + \n\ + int abidx = int(imageAtomicAdd(abufferCounterImg, coords, uint(1))); \n\ + ivec3 coords3 = ivec3(coords, abidx); \n\ + \n\ + highp float depth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ + vec4 blend_val = vec4(depth, 8, 0, 0); \n\ + imageStore(abufferBlendingImg, coords3, blend_val); \n\ + vec4 color = texture(tex, gl_FragCoord.xy / screen_size); \n\ + imageStore(abufferImg, coords3, color); \n\ + \n\ + // Discard fragment so nothing is writen to the framebuffer \n\ + discard; \n\ +} \n\ +"; + void initABuffer() { g_imageWidth = screen_width; @@ -204,9 +325,18 @@ void initABuffer() glBindImageTexture(5, abufferBlendingTexID, 0, true, 0, GL_READ_WRITE, GL_RG32F); if (g_abuffer_final_shader.program == 0) - CompilePipelineShader(&g_abuffer_final_shader, final_shader_source); + { + char source[8192]; + sprintf(source, final_shader_source, ABUFFER_SIZE); + CompilePipelineShader(&g_abuffer_final_shader, source); + } if (g_abuffer_clear_shader.program == 0) CompilePipelineShader(&g_abuffer_clear_shader, clear_shader_source); + if (g_abuffer_pass2_shader.program == 0) + CompilePipelineShader(&g_abuffer_pass2_shader, pass2_shader_source); + + glGenVertexArrays(1, &g_quadVertexArray); + glGenBuffers(1, &g_quadBuffer); glCheck(); } @@ -225,7 +355,65 @@ void reshapeABuffer(int w, int h) } } -void renderABuffer() { + +void DrawQuad() +{ + glBindVertexArray(g_quadVertexArray); + + struct Vertex vertices[] = { + { 0, screen_height, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 }, + { 0, 0, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 }, + { screen_width, screen_height, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 }, + { screen_width, 0, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 }, + }; + GLushort indices[] = { 0, 1, 2, 1, 3 }; + + glBindBuffer(GL_ARRAY_BUFFER, g_quadBuffer); glCheck(); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STREAM_DRAW); glCheck(); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glCheck(); + + glEnableVertexAttribArray(VERTEX_POS_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_POS_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,x)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_BASE_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_BASE_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,col)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_OFFS_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_OFFS_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,spc)); glCheck(); + + glEnableVertexAttribArray(VERTEX_UV_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u)); glCheck(); + + glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, indices); glCheck(); +} + +void renderPass2(GLuint textureId, GLuint depthTexId) +{ + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, textureId); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, depthTexId); + glActiveTexture(GL_TEXTURE3); + glBindTexture(GL_TEXTURE_2D_ARRAY, abufferTexID); + glActiveTexture(GL_TEXTURE4); + glBindTexture(GL_TEXTURE_2D, abufferCounterTexID); + glActiveTexture(GL_TEXTURE5); + glBindTexture(GL_TEXTURE_2D_ARRAY, abufferBlendingTexID); + glActiveTexture(GL_TEXTURE0); + + glcache.UseProgram(g_abuffer_pass2_shader.program); + ShaderUniforms.Set(&g_abuffer_pass2_shader); + + glcache.Disable(GL_BLEND); + glcache.Disable(GL_DEPTH_TEST); + glcache.Disable(GL_CULL_FACE); +glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + DrawQuad(); +glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); +} + +void renderABuffer(bool sortFragments) +{ glActiveTexture(GL_TEXTURE3); glBindTexture(GL_TEXTURE_2D_ARRAY, abufferTexID); glCheck(); @@ -238,9 +426,12 @@ void renderABuffer() { glcache.UseProgram(g_abuffer_final_shader.program); ShaderUniforms.Set(&g_abuffer_final_shader); + GLint gu = glGetUniformLocation(g_abuffer_final_shader.program, "sortFragments"); + glUniform1i(gu, (int)sortFragments); glcache.Disable(GL_BLEND); glcache.Disable(GL_DEPTH_TEST); + glcache.Disable(GL_CULL_FACE); glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); DrawQuad(); @@ -252,5 +443,6 @@ void renderABuffer() { glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); DrawQuad(); + glActiveTexture(GL_TEXTURE0); glCheck(); } diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index ecc8c4bbc..fc6a27f94 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -1,7 +1,6 @@ #include "glcache.h" #include "rend/rend.h" -#include /* Drawing and related state management @@ -21,6 +20,7 @@ const static u32 CullMode[]= GL_FRONT, //2 Cull if Negative Cull if ( |det| < 0 ) or ( |det| < fpu_cull_val ) GL_BACK, //3 Cull if Positive Cull if ( |det| > 0 ) or ( |det| < fpu_cull_val ) }; + #define INVERT_DEPTH_FUNC const static u32 Zfunction[]= { @@ -82,7 +82,7 @@ PipelineShader* CurrentShader; u32 gcflip; GLuint geom_fbo; GLuint stencilTexId; -//GLuint opaqueTexId; +GLuint opaqueTexId; GLuint depthTexId; s32 SetTileClip(u32 val, bool set) @@ -157,34 +157,32 @@ static void SetTextureRepeatMode(GLuint dir, u32 clamp, u32 mirror) } template - void SetGPState(const PolyParam* gp, bool weighted_average = false, u32 front_peeling = 0, bool geometry_only = false, u32 cflip=0) + void SetGPState(const PolyParam* gp, int pass, u32 cflip=0) { s32 clipping = SetTileClip(gp->tileclip, false); int shaderId; - if (geometry_only) + if (pass == 0) { shaderId = GetProgramID(Type == ListType_Punch_Through ? 1 : 0, clipping + 1, Type == ListType_Punch_Through ? gp->pcw.Texture : 0, - 0, + 1, gp->tsp.IgnoreTexA, 0, 0, 2, - false, - 1); // FIXME Hack: using front peeling to avoid writing to 3D array + pass); CurrentShader = gl.getShader(shaderId); if (CurrentShader->program == -1) { CurrentShader->cp_AlphaTest = Type == ListType_Punch_Through ? 1 : 0; CurrentShader->pp_ClipTestMode = clipping; CurrentShader->pp_Texture = Type == ListType_Punch_Through ? gp->pcw.Texture : 0; - CurrentShader->pp_UseAlpha = 0; + CurrentShader->pp_UseAlpha = 1; CurrentShader->pp_IgnoreTexA = gp->tsp.IgnoreTexA; CurrentShader->pp_ShadInstr = 0; CurrentShader->pp_Offset = 0; CurrentShader->pp_FogCtrl = 2; - CurrentShader->pp_WeightedAverage = false; - CurrentShader->pp_FrontPeeling = 1; + CurrentShader->pass = pass; CompilePipelineShader(CurrentShader); } } @@ -198,8 +196,7 @@ template gp->tsp.ShadInstr, gp->pcw.Offset, gp->tsp.FogCtrl, - weighted_average, - front_peeling); + pass); CurrentShader = gl.getShader(shaderId); if (CurrentShader->program == -1) { CurrentShader->cp_AlphaTest = Type == ListType_Punch_Through ? 1 : 0; @@ -210,14 +207,13 @@ template CurrentShader->pp_ShadInstr = gp->tsp.ShadInstr; CurrentShader->pp_Offset = gp->pcw.Offset; CurrentShader->pp_FogCtrl = gp->tsp.FogCtrl; - CurrentShader->pp_WeightedAverage = weighted_average; - CurrentShader->pp_FrontPeeling = front_peeling; + CurrentShader->pass = pass; CompilePipelineShader(CurrentShader); } } glcache.UseProgram(CurrentShader->program); - if (Type == ListType_Opaque || Type == ListType_Punch_Through) // TODO Can PT have a non-zero and non-one alpha? + if (Type == ListType_Opaque || Type == ListType_Punch_Through) // TODO Can PT have a >0 and <1 alpha? { ShaderUniforms.blend_mode[0] = 1; ShaderUniforms.blend_mode[1] = 0; @@ -237,33 +233,36 @@ template glcache.StencilFunc(GL_ALWAYS,stencil,stencil); - glcache.BindTexture(GL_TEXTURE_2D, gp->texid == -1 ? 0 : gp->texid); - - if (gp->texid > 0) + if (CurrentShader->pp_Texture) { - SetTextureRepeatMode(GL_TEXTURE_WRAP_S, gp->tsp.ClampU, gp->tsp.FlipU); - SetTextureRepeatMode(GL_TEXTURE_WRAP_T, gp->tsp.ClampV, gp->tsp.FlipV); + glcache.BindTexture(GL_TEXTURE_2D, gp->texid == -1 ? 0 : gp->texid); - //set texture filter mode - if (gp->tsp.FilterMode == 0) + if (gp->texid > 0) { - //disable filtering, mipmaps - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - } - else - { - //bilinear filtering - //PowerVR supports also trilinear via two passes, but we ignore that for now - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (gp->tcw.MipMapped && settings.rend.UseMipmaps) ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + SetTextureRepeatMode(GL_TEXTURE_WRAP_S, gp->tsp.ClampU, gp->tsp.FlipU); + SetTextureRepeatMode(GL_TEXTURE_WRAP_T, gp->tsp.ClampV, gp->tsp.FlipV); + + //set texture filter mode + if (gp->tsp.FilterMode == 0) + { + //disable filtering, mipmaps + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + } + else + { + //bilinear filtering + //PowerVR supports also trilinear via two passes, but we ignore that for now + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (gp->tcw.MipMapped && settings.rend.UseMipmaps) ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + } } } - if (Type==ListType_Translucent) + if (Type==ListType_Translucent && !SortingEnabled) { -// glcache.Enable(GL_BLEND); -// glcache.BlendFunc(SrcBlendGL[gp->tsp.SrcInstr],DstBlendGL[gp->tsp.DstInstr]); + glcache.Enable(GL_BLEND); + glcache.BlendFunc(SrcBlendGL[gp->tsp.SrcInstr],DstBlendGL[gp->tsp.DstInstr]); } else glcache.Disable(GL_BLEND); @@ -282,7 +281,6 @@ template else { glcache.DepthFunc(Zfunction[6]); // Greater or equal -// glcache.DepthFunc(GL_LESS); } } else @@ -290,18 +288,14 @@ template glcache.DepthFunc(Zfunction[gp->isp.DepthMode]); } -//#if TRIG_SORT - if (Type == ListType_Translucent) - glcache.DepthMask(GL_FALSE); - else -//#endif - if (!weighted_average) - glcache.DepthMask(!gp->isp.ZWriteDis); +// if (Type == ListType_Translucent) +// glcache.DepthMask(GL_FALSE); +// else + glcache.DepthMask(!gp->isp.ZWriteDis); } template -void DrawList(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0, - int srcBlendModeFilter = -1, int dstBlendModeFilter = -1, bool geometry_only = false) +void DrawList(const List& gply, int first, int count, int pass) { PolyParam* params = &gply.head()[first]; @@ -313,24 +307,30 @@ void DrawList(const List& gply, int first, int count, bool weighted_a //set some 'global' modes for all primitives - glcache.Enable(GL_STENCIL_TEST); - glcache.StencilFunc(GL_ALWAYS,0,0); - glcache.StencilOp(GL_KEEP,GL_KEEP,GL_REPLACE); + if (pass == 0) + { + glcache.Enable(GL_STENCIL_TEST); + glcache.StencilFunc(GL_ALWAYS,0,0); + glcache.StencilOp(GL_KEEP,GL_KEEP,GL_REPLACE); + } + else + { + glcache.StencilMask(0); + glcache.Disable(GL_STENCIL_TEST); + } while(count-->0) { if (params->count>2) //this actually happens for some games. No idea why .. { - if (Type == ListType_Translucent) { - if ((params->tsp.SrcInstr == 0 && params->tsp.DstInstr == 1) // Nothing to do - || (srcBlendModeFilter != -1 && params->tsp.SrcInstr != srcBlendModeFilter) // src filter doesn't match - || (dstBlendModeFilter != -1 && params->tsp.DstInstr != dstBlendModeFilter)) { // dst filter doesn't match - params++; - continue; - } + if (Type == ListType_Translucent && params->tsp.SrcInstr == 0 && params->tsp.DstInstr == 1) + { + // No-op + params++; + continue; } - - SetGPState(params, weighted_average, front_peeling, geometry_only); + ShaderUniforms.poly_number = params - gply.head(); + SetGPState(params, pass); glDrawElements(GL_TRIANGLE_STRIP, params->count, GL_UNSIGNED_SHORT, (GLvoid*)(2*params->first)); glCheck(); } @@ -338,564 +338,6 @@ void DrawList(const List& gply, int first, int count, bool weighted_a } } -void DrawListTranslucentAutoSorted(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0, - int srcBlendModeFilter = -1, int dstBlendModeFilter = -1) -{ - DrawList(gply, first, count, weighted_average, front_peeling, srcBlendModeFilter, dstBlendModeFilter); -} - -void DrawListOpaque(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0) -{ - DrawList(gply, first, count, weighted_average, front_peeling); -} - -void DrawListPunchThrough(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0) -{ - DrawList(gply, first, count, weighted_average, front_peeling); -} - -template -void DrawListGeometry(const List& gply, int first, int count) -{ - DrawList(gply, first, count, false, 0, -1, -1, true); -} - -bool operator<(const PolyParam &left, const PolyParam &right) -{ -/* put any condition you want to sort on here */ - return left.zvZcount<2) - { - pp->zvZ=0; - } - else - { - u16* idx=idx_base+pp->first; - - Vertex* vtx=vtx_base+idx[0]; - Vertex* vtx_end=vtx_base + idx[pp->count-1]+1; - - u32 zv=0xFFFFFFFF; - while(vtx!=vtx_end) - { - zv=min(zv,(u32&)vtx->z); - vtx++; - } - - pp->zvZ=(f32&)zv; - } - pp++; - } - - std::stable_sort(pvrrc.global_param_tr.head() + first, pvrrc.global_param_tr.head() + first + count); -} - -Vertex* vtx_sort_base; - - -struct IndexTrig -{ - u16 id[3]; - u16 pid; - f32 z; -}; - - -struct SortTrigDrawParam -{ - PolyParam* ppid; - u16 first; - u16 count; -}; - -float min3(float v0,float v1,float v2) -{ - return min(min(v0,v1),v2); -} - -float max3(float v0,float v1,float v2) -{ - return max(max(v0,v1),v2); -} - - -float minZ(Vertex* v,u16* mod) -{ - return min(min(v[mod[0]].z,v[mod[1]].z),v[mod[2]].z); -} - -bool operator<(const IndexTrig &left, const IndexTrig &right) -{ - return left.zx-b->x; - float yd=a->y-b->y; - - return xd*xd+yd*yd; -} - -//was good idea, but not really working .. -bool Intersect(Vertex* a, Vertex* b) -{ - float a1=area_x2(a); - float a2=area_x2(b); - - float d = distance_apprx(a,b); - - return (a1+a1)>d; -} - -//root for quick-union -u16 rid(vector& v, u16 id) -{ - while(id!=v[id]) id=v[id]; - return id; -} - -struct TrigBounds -{ - float xs,xe; - float ys,ye; - float zs,ze; -}; - -//find 3d bounding box for triangle -TrigBounds bound(Vertex* v) -{ - TrigBounds rv = { min(min(v[0].x,v[1].x),v[2].x), max(max(v[0].x,v[1].x),v[2].x), - min(min(v[0].y,v[1].y),v[2].y), max(max(v[0].y,v[1].y),v[2].y), - min(min(v[0].z,v[1].z),v[2].z), max(max(v[0].z,v[1].z),v[2].z), - }; - - return rv; -} - -//bounding box 2d intersection -bool Intersect(TrigBounds& a, TrigBounds& b) -{ - return ( !(a.xeb.xe) && !(a.yeb.ye) /*&& !(a.zeb.ze)*/ ); -} - - -bool operator<(const IndexTrig &left, const IndexTrig &right) -{ - /* - TrigBounds l=bound(vtx_sort_base+left.id); - TrigBounds r=bound(vtx_sort_base+right.id); - - if (!Intersect(l,r)) - { - return true; - } - else - { - return (l.zs + l.ze) < (r.zs + r.ze); - }*/ - - return minZ(&vtx_sort_base[left.id])pcw.full&PCW_DRAW_MASK)==(pp1->pcw.full&PCW_DRAW_MASK) && pp0->isp.full==pp1->isp.full && pp0->tcw.full==pp1->tcw.full && pp0->tsp.full==pp1->tsp.full && pp0->tileclip==pp1->tileclip; -} - -static vector pidx_sort; - -void fill_id(u16* d, Vertex* v0, Vertex* v1, Vertex* v2, Vertex* vb) -{ - d[0]=v0-vb; - d[1]=v1-vb; - d[2]=v2-vb; -} - -void GenSorted(int first, int count) -{ - u32 tess_gen=0; - - pidx_sort.clear(); - - if (pvrrc.verts.used() == 0 || count <= 1) - return; - - Vertex* vtx_base=pvrrc.verts.head(); - u16* idx_base=pvrrc.idx.head(); - - PolyParam* pp_base = &pvrrc.global_param_tr.head()[first]; - PolyParam* pp = pp_base; - PolyParam* pp_end = pp + count; - - Vertex* vtx_arr=vtx_base+idx_base[pp->first]; - vtx_sort_base=vtx_base; - - static u32 vtx_cnt; - - int vtx_count=idx_base[pp_end[-1].first+pp_end[-1].count-1]-idx_base[pp->first]; - if (vtx_count>vtx_cnt) - vtx_cnt=vtx_count; - -#if PRINT_SORT_STATS - printf("TVTX: %d || %d\n",vtx_cnt,vtx_count); -#endif - - if (vtx_count<=0) - return; - - //make lists of all triangles, with their pid and vid - static vector lst; - - lst.resize(vtx_count*4); - - - int pfsti=0; - - while(pp!=pp_end) - { - u32 ppid=(pp-pp_base); - - if (pp->count>2) - { - u16* idx=idx_base+pp->first; - - Vertex* vtx=vtx_base+idx[0]; - Vertex* vtx_end=vtx_base + idx[pp->count-1]-1; - u32 flip=0; - while(vtx!=vtx_end) - { - Vertex* v0, * v1, * v2, * v3, * v4, * v5; - - if (flip) - { - v0=&vtx[2]; - v1=&vtx[1]; - v2=&vtx[0]; - } - else - { - v0=&vtx[0]; - v1=&vtx[1]; - v2=&vtx[2]; - } -#if 0 - if (settings.pvr.subdivide_transp) - { - u32 tess_x=(max3(v0->x,v1->x,v2->x)-min3(v0->x,v1->x,v2->x))/32; - u32 tess_y=(max3(v0->y,v1->y,v2->y)-min3(v0->y,v1->y,v2->y))/32; - - if (tess_x==1) tess_x=0; - if (tess_y==1) tess_y=0; - - //bool tess=(maxZ(v0,v1,v2)/minZ(v0,v1,v2))>=1.2; - - if (tess_x + tess_y) - { - v3=pvrrc.verts.Append(3); - v4=v3+1; - v5=v4+1; - - //xyz - for (int i=0;i<3;i++) - { - ((float*)&v3->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f; - ((float*)&v4->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v1->x)[i]*0.5f; - ((float*)&v5->x)[i]=((float*)&v1->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f; - } - - //*TODO* Make it perspective correct - - //uv - for (int i=0;i<2;i++) - { - ((float*)&v3->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f; - ((float*)&v4->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v1->u)[i]*0.5f; - ((float*)&v5->u)[i]=((float*)&v1->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f; - } - - //color - for (int i=0;i<4;i++) - { - v3->col[i]=v0->col[i]/2+v2->col[i]/2; - v4->col[i]=v0->col[i]/2+v1->col[i]/2; - v5->col[i]=v1->col[i]/2+v2->col[i]/2; - } - - fill_id(lst[pfsti].id,v0,v3,v4,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - fill_id(lst[pfsti].id,v2,v3,v5,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - fill_id(lst[pfsti].id,v3,v4,v5,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - fill_id(lst[pfsti].id,v5,v4,v1,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - tess_gen+=3; - } - else - { - fill_id(lst[pfsti].id,v0,v1,v2,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - } - } - else -#endif - { - fill_id(lst[pfsti].id,v0,v1,v2,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - } - - flip ^= 1; - - vtx++; - } - } - pp++; - } - - u32 aused=pfsti; - - lst.resize(aused); - - //sort them -#if 1 - std::stable_sort(lst.begin(),lst.end()); - - //Merge pids/draw cmds if two different pids are actually equal - if (true) - { - for (u32 k=1;klst[k].pid) - { - //MOVE UP - for (int j=k;j>0 && lst[j].pid!=lst[j-1].pid && !Intersect(lst[j],lst[j-1]);j--) - { - swap(lst[j],lst[j-1]); - } - } - else - { - //move down - for (int j=k+1;j vidx_sort; - - vidx_sort.resize(aused*3); - - int idx=-1; - - for (u32 i=0; icount=stdp.first-last->first; - } - - pidx_sort.push_back(stdp); - idx=pid; - } - } - - SortTrigDrawParam* stdp=&pidx_sort[pidx_sort.size()-1]; - stdp->count=aused*3-stdp->first; - -#if PRINT_SORT_STATS - printf("Reassembled into %d from %d\n",pidx_sort.size(),pp_end-pp_base); -#endif - - //Upload to GPU if needed - if (pidx_sort.size()) - { - //Bind and upload sorted index buffer - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl.vbo.idxs2); glCheck(); - glBufferData(GL_ELEMENT_ARRAY_BUFFER,vidx_sort.size()*2,&vidx_sort[0],GL_STREAM_DRAW); - - if (tess_gen) printf("Generated %.2fK Triangles !\n",tess_gen/1000.0); - } -} - -void DrawSorted(bool multipass) -{ - //if any drawing commands, draw them - if (pidx_sort.size()) - { - u32 count=pidx_sort.size(); - - { - //set some 'global' modes for all primitives - - glcache.Enable(GL_STENCIL_TEST); - glcache.StencilFunc(GL_ALWAYS,0,0); - glcache.StencilOp(GL_KEEP,GL_KEEP,GL_REPLACE); - - for (u32 p=0; p2) //this actually happens for some games. No idea why .. - { - SetGPState(params); - glDrawElements(GL_TRIANGLES, pidx_sort[p].count, GL_UNSIGNED_SHORT, (GLvoid*)(2*pidx_sort[p].first)); glCheck(); - -#if 0 - //Verify restriping -- only valid if no sort - int fs=pidx_sort[p].first; - - for (u32 j=0; j<(params->count-2); j++) - { - for (u32 k=0; k<3; k++) - { - verify(idx_base[params->first+j+k]==vidx_sort[fs++]); - } - } - - verify(fs==(pidx_sort[p].first+pidx_sort[p].count)); -#endif - } - params++; - } - - if (multipass && settings.rend.TranslucentPolygonDepthMask) - { - // Write to the depth buffer now. The next render pass might need it. (Cosmic Smash) - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - glcache.Disable(GL_BLEND); - - glcache.StencilMask(0); - - // We use the modifier volumes shader because it's fast. We don't need textures, etc. - glcache.UseProgram(gl.modvol_shader.program); - glUniform1f(gl.modvol_shader.sp_ShaderColor, 1.f); - - glcache.DepthFunc(Zfunction[6]); // Greater or equal - glcache.DepthMask(GL_TRUE); - - for (u32 p = 0; p < count; p++) - { - PolyParam* params = pidx_sort[p].ppid; - if (pidx_sort[p].count > 2 && !params->isp.ZWriteDis) { - // FIXME no clipping in modvol shader - //SetTileClip(gp->tileclip,true); - - SetCull(params->isp.CullMode ^ gcflip); - - glDrawElements(GL_TRIANGLES, pidx_sort[p].count, GL_UNSIGNED_SHORT, (GLvoid*)(2 * pidx_sort[p].first)); - } - } - glcache.StencilMask(0xFF); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - } - } - // Re-bind the previous index buffer for subsequent render passes - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl.vbo.idxs); - } -} - //All pixels are in area 0 by default. //If inside an 'in' volume, they are in area 1 //if inside an 'out' volume, they are in area 0 @@ -1028,11 +470,7 @@ void DrawModVols(int first, int count) SetupModvolVBO(); -// glcache.Enable(GL_BLEND); -// glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glcache.UseProgram(gl.modvol_shader.program); - glUniform1f(gl.modvol_shader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f); glcache.DepthMask(GL_FALSE); glcache.DepthFunc(Zfunction[4]); @@ -1127,28 +565,8 @@ void DrawModVols(int first, int count) } } } - //disable culling -// SetCull(0); -// //enable color writes -// glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE); -// -// //black out any stencil with '1' -// glcache.Enable(GL_BLEND); -// glcache.BlendFunc(GL_SRC_ALPHA,GL_ONE_MINUS_SRC_ALPHA); -// -// glcache.Enable(GL_STENCIL_TEST); -// glcache.StencilFunc(GL_EQUAL,0x81,0x81); //only pixels that are Modvol enabled, and in area 1 -// -// //clear the stencil result bit -// glcache.StencilMask(0x3); //write to lsb -// glcache.StencilOp(GL_ZERO,GL_ZERO,GL_ZERO); -// -// //don't do depth testing -// glcache.Disable(GL_DEPTH_TEST); SetupMainVBO(); -// glDrawArrays(GL_TRIANGLE_STRIP,0,4); - } //restore states @@ -1156,12 +574,8 @@ void DrawModVols(int first, int count) glcache.DepthMask(GL_TRUE); } -void InitDualPeeling(); -void RenderAverageColors(); -void RenderWeightedBlended(); -void RenderFrontToBackPeeling(int first, int count); -void DualPeelingReshape(int w, int h); -void renderABuffer(); +void renderABuffer(bool sortFragments); +void renderPass2(GLuint textureId, GLuint depthTexId); void CreateGeometryTexture() { @@ -1177,13 +591,14 @@ void CreateGeometryTexture() glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencilTexId, 0); glCheck(); glCheck(); -// opaqueTexId = glcache.GenTexture(); -// glcache.BindTexture(GL_TEXTURE_2D, opaqueTexId); -// glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); -// glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); -// glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screen_width, screen_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); -// glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, opaqueTexId, 0); -// glCheck(); + opaqueTexId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, opaqueTexId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screen_width, screen_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, opaqueTexId, 0); + glCheck(); + depthTexId = glcache.GenTexture(); glTextureView(depthTexId, GL_TEXTURE_2D, stencilTexId, GL_DEPTH32F_STENCIL8, 0, 1, 0, 1); glCheck(); @@ -1201,13 +616,6 @@ void DrawStrips() glGenFramebuffers(1, &geom_fbo); CreateGeometryTexture(); - // Color buffer. Not normally needed - //GLuint colortexid = glcache.GenTexture(); - //glcache.BindTexture(GL_TEXTURE_2D, colortexid); - // - //glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screen_width, screen_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, 0); glCheck(); - //glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, colortexid, 0); glCheck(); - GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); verify(uStatus == GL_FRAMEBUFFER_COMPLETE); @@ -1219,13 +627,12 @@ void DrawStrips() { CreateGeometryTexture(); } + glcache.ClearColor(0, 0, 0, 0); glcache.Disable(GL_SCISSOR_TEST); glcache.DepthMask(GL_TRUE); - glStencilMask(0xFF); glCheck(); - glClear(GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glCheck(); + glStencilMask(0xFF); + glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glCheck(); } - InitDualPeeling(); - DualPeelingReshape(screen_width, screen_height); SetupMainVBO(); //Draw the strips ! @@ -1240,68 +647,73 @@ void DrawStrips() //initial state glcache.Enable(GL_DEPTH_TEST); glcache.DepthMask(GL_TRUE); - - // Do a first pass on the depth+stencil buffer + glcache.Disable(GL_BLEND); + // + // PASS 0: Geometry pass to update the depth and stencil + // glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - DrawListGeometry(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count); - DrawListGeometry(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count); + DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 0); + DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 0); // Modifier volumes DrawModVols(previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); - glBindFramebuffer(GL_FRAMEBUFFER, 0); glCheck(); - -// glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + // + // PASS 1: Render OP and PT to fbo + // + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); // Bind stencil buffer for the fragment shader (shadowing) glActiveTexture(GL_TEXTURE2); glBindTexture(GL_TEXTURE_2D, stencilTexId); + glActiveTexture(GL_TEXTURE0); glCheck(); - // Bind depth texture for manual depth testing in fragment shader - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_2D, depthTexId); - glActiveTexture(GL_TEXTURE0); - //Opaque - DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count); + DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 1); //Alpha tested - DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count); + DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 1); + // + // PASS 2: Render opaque and PT texture to a-buffers along with depth + // + // Unbind stencil glActiveTexture(GL_TEXTURE2); glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE0); - //Alpha blended - { - DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); - glCheck(); + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + + renderPass2(opaqueTexId, depthTexId); + + // + // PASS 3: Render TR to a-buffers + // + SetupMainVBO(); + + //Alpha blended + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); + glCheck(); + + // FIXME Unsorted TR cannot use a-buffer because of lost ordering + + // FIXME Blinking pixels in Soulcalibur score table. Could be that some TR have same depth and rely on natural order? + // a-buffers makes the final order unpredictable and varies each frame + // FIXME Depth of translucent poly must be used for next render pass if any + // FIXME Multipass in general... -glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - renderABuffer(); -// if (hack_on) -// RenderAverageColors(); -// else - // RenderFrontToBackPeeling(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); - //RenderWeightedBlended(); -// if (pvrrc.isAutoSort) -// GenSorted(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); -// -//#if TRIG_SORT -// if (pvrrc.isAutoSort) -// DrawSorted(render_pass < pvrrc.render_passes.used() - 1); -// else -// DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); -//#else -// if (pvrrc.isAutoSort) -// SortPParams(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); -// DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); -//#endif - SetupMainVBO(); - } previous_pass = current_pass; } + + // + // PASS 4: Render a-buffers to screen + // + glBindFramebuffer(GL_FRAMEBUFFER, 0); glCheck(); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + renderABuffer(pvrrc.isAutoSort); + SetupMainVBO(); } diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index fce03dcf8..1baf2f221 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -186,17 +186,10 @@ const char* PixelPipelineShader = #define pp_ShadInstr %d \n\ #define pp_Offset %d \n\ #define pp_FogCtrl %d \n\ -#define pp_WeightedAverage %d \n\ -#define pp_FrontPeeling %d \n\ -#if pp_WeightedAverage == 1 \n\ -#extension GL_ARB_draw_buffers : require \n\ -#endif \n\ -#if pp_FrontPeeling != 1 // FIXME \n\ -uniform sampler2D DepthTex; \n\ -#endif \n" +#define PASS %d \n" #ifndef GLES "\ - #if pp_WeightedAverage == 0 \n\ + #if PASS <= 1 \n\ out vec4 FragColor; \n\ #endif \n" #endif @@ -211,15 +204,21 @@ uniform highp float sp_FOG_DENSITY; \n\ uniform highp float shade_scale_factor; \n\ uniform lowp vec2 screen_size; \n\ uniform sampler2D tex,fog_table; \n\ -#if pp_WeightedAverage == 0 && pp_FrontPeeling == 0 \n\ +uniform int pp_Number; \n\ +#if PASS == 1 \n\ uniform usampler2D shadow_stencil; \n\ #endif \n\ -#extension GL_EXT_shader_image_load_store : enable \n\ -#define ABUFFER_SIZE 32 \n\ -uniform uvec2 blend_mode; \n\ -coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ -coherent uniform layout(size4x32) image2DArray abufferImg; \n\ -coherent uniform layout(size2x32) image2DArray abufferBlendingImg; \n\ +#if PASS == 3 \n\ + uniform sampler2D DepthTex; \n\ +#endif \n\ +#if PASS > 1 \n\ + #extension GL_EXT_shader_image_load_store : enable \n\ + #define ABUFFER_SIZE %d \n\ + uniform uvec2 blend_mode; \n\ + coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ + coherent uniform layout(size4x32) image2DArray abufferImg; \n\ + coherent uniform layout(size2x32) image2DArray abufferBlendingImg; \n\ +#endif \n\ /* Vertex input*/ \n\ " vary " lowp vec4 vtx_base; \n\ " vary " lowp vec4 vtx_offs; \n\ @@ -238,19 +237,12 @@ void main() \n\ gl_FragDepth = 1 - log2(1.0 + w) / 34; \n" #endif "\ - #if pp_FrontPeeling != 1 // FIXME \n\ + #if PASS == 3 \n\ // Manual depth testing \n\ highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ // FIXME this causes dots to appear. Loss of precision? \n\ - //if (gl_FragDepth > frontDepth - 1e-8) // FIXME the TA depth test is ignored \n\ - // discard; \n\ - #endif \n\ - #if pp_FrontPeeling == 2 \n\ - // Bit-exact comparison between FP32 z-buffer and fragment depth \n\ - highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ - if (gl_FragDepth <= frontDepth) { \n\ + if (gl_FragDepth > frontDepth) \n\ discard; \n\ - } \n\ #endif \n\ // Clip outside the box \n\ #if pp_ClipTestMode==1 \n\ @@ -314,7 +306,7 @@ void main() \n\ #endif\n\ } \n\ #endif\n\ - #if pp_WeightedAverage == 0 && pp_FrontPeeling == 0 \n\ + #if PASS == 1 \n\ //uvec4 stencil = texture(shadow_stencil, vec2(gl_FragCoord.x / 1280, gl_FragCoord.y / 960)); \n\ uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / screen_size); \n\ if (stencil.r == uint(0x81)) \n\ @@ -330,36 +322,25 @@ void main() \n\ #endif \n\ \n\ //color.rgb=vec3(gl_FragCoord.w * sp_FOG_DENSITY / 128.0); \n\ - //if (gl_FragCoord.w < 0) \n\ - // color.rgb = vec3(1, 0, 0); \n\ - //else if (gl_FragCoord.w > 1) \n\ - // color.rgb = vec3(0, 1, 0); \n\ - //color = vec4(gl_FragCoord.w*6, 0, 0, 0.5); \n\ \n\ - #if pp_WeightedAverage == 1 \n\ - // Average colors \n\ - gl_FragData[0] = vec4(color.rgb * color.a, color.a); \n\ - gl_FragData[1] = vec4(1.0); \n\ - // Weighted Blended \n\ -// float viewDepth = abs(1.0 / gl_FragCoord.w); \n\ -// float linearDepth = viewDepth * 4.5; // uDepthScale \n\ -// float weight = clamp(0.03 / (1e-5 + pow(linearDepth/10, 3.0)), 1e-2, 3e3); \n\ -// gl_FragData[0] = vec4(color.rgb * color.a, color.a) * weight; \n\ -// gl_FragData[1] = vec4(color.a); \n\ - #elif pp_FrontPeeling == 1 \n" - FRAGCOL " = vec4(color.rgb * color.a, 1.0 - color.a); \n\ - #elif pp_FrontPeeling == 2 \n" - FRAGCOL " = vec4(color.rgb * color.a, color.a); \n\ - #else \n" -// FRAGCOL "=color; \n -" ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + #if PASS == 1 \n" + FRAGCOL " = color; \n\ + #elif PASS > 1 \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ int abidx = int(imageAtomicAdd(abufferCounterImg, coords, uint(1))); \n\ - if (abidx >= ABUFFER_SIZE) \n\ - discard; \n\ - vec4 blend_val = vec4(gl_FragDepth, float(blend_mode.x) * 8 + float(blend_mode.y), 0, 0); \n\ - ivec3 coords3 = ivec3(coords, abidx); \n\ - imageStore(abufferImg, coords3, color); \n\ - imageStore(abufferBlendingImg, coords3, blend_val); \n\ + if (abidx >= ABUFFER_SIZE) { \n\ + // Green pixels when overflow \n\ + vec4 blend_val = vec4(0.001, 8, float(pp_Number), 0); \n\ + ivec3 coords3 = ivec3(coords, 0); \n\ + imageStore(abufferImg, coords3, vec4(0, 1, 0, 1)); \n\ + imageStore(abufferBlendingImg, coords3, blend_val); \n\ + } else { \n\ + vec4 blend_val = vec4(gl_FragDepth, float(blend_mode.x) * 8 + float(blend_mode.y), float(pp_Number), 0); \n\ + ivec3 coords3 = ivec3(coords, abidx); \n\ + imageStore(abufferImg, coords3, color); \n\ + imageStore(abufferBlendingImg, coords3, blend_val); \n\ + } \n\ + discard; \n\ \n\ #endif \n\ }"; @@ -367,10 +348,8 @@ void main() \n\ const char* ModifierVolumeShader = #ifndef GLES "#version 140 \n" - "out vec4 FragColor; \n" #endif " \ -uniform lowp float sp_ShaderColor; \n\ /* Vertex input*/ \n\ void main() \n\ { \n" @@ -379,7 +358,7 @@ void main() \n\ highp float w = 100000.0 * gl_FragCoord.w; \n\ gl_FragDepth = 1 - log2(1.0 + w) / 34; \n" #endif - FRAGCOL "=vec4(0.0, 0.0, 0.0, sp_ShaderColor); \n\ + "\ }"; const char* OSD_Shader = @@ -795,7 +774,7 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, - u32 pp_FogCtrl, bool pp_WeightedAverage, u32 pp_FrontPeeling) + u32 pp_FogCtrl, int pass) { u32 rv=0; @@ -807,8 +786,7 @@ int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, rv<<=2; rv|=pp_ShadInstr; rv<<=1; rv|=pp_Offset; rv<<=2; rv|=pp_FogCtrl; - rv <<= 1; rv |= pp_WeightedAverage; - rv <<= 2; rv |= pp_FrontPeeling; + rv <<= 2; rv |= pass; return rv; } @@ -819,7 +797,7 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe sprintf(pshader, source, s->cp_AlphaTest,s->pp_ClipTestMode,s->pp_UseAlpha, - s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, (int)s->pp_WeightedAverage, s->pp_FrontPeeling); + s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pass, ABUFFER_SIZE); s->program=gl_CompileAndLink(VertexShaderSource,pshader); @@ -858,7 +836,7 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe s->screen_size = glGetUniformLocation(s->program, "screen_size"); s->shade_scale_factor = glGetUniformLocation(s->program, "shade_scale_factor"); - // Depth peeling: use texture 1 for depth texture + // Use texture 1 for depth texture gu = glGetUniformLocation(s->program, "DepthTex"); if (gu != -1) glUniform1i(gu, 1); // GL_TEXTURE1 @@ -880,6 +858,7 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe glUniform1i(gu, 5); // GL_TEXTURE5 s->blend_mode = glGetUniformLocation(s->program, "blend_mode"); + s->pp_Number = glGetUniformLocation(s->program, "pp_Number"); return glIsProgram(s->program)==GL_TRUE; } @@ -903,55 +882,8 @@ bool gl_create_resources() glGenBuffers(1, &gl.vbo.idxs); glGenBuffers(1, &gl.vbo.idxs2); - /* - memset(gl.pogram_table,0,sizeof(gl.pogram_table)); - - PipelineShader* dshader=0; - u32 compile=0; -#define forl(name,max) for(u32 name=0;name<=max;name++) - forl(cp_AlphaTest,1) - { - forl(pp_ClipTestMode,2) - { - forl(pp_UseAlpha,1) - { - forl(pp_Texture,1) - { - forl(pp_FogCtrl,3) - { - forl(pp_IgnoreTexA,1) - { - forl(pp_ShadInstr,3) - { - forl(pp_Offset,1) - { - dshader=&gl.pogram_table[GetProgramID(cp_AlphaTest,pp_ClipTestMode,pp_Texture,pp_UseAlpha,pp_IgnoreTexA, - pp_ShadInstr,pp_Offset,pp_FogCtrl)]; - - dshader->cp_AlphaTest = cp_AlphaTest; - dshader->pp_ClipTestMode = pp_ClipTestMode-1; - dshader->pp_Texture = pp_Texture; - dshader->pp_UseAlpha = pp_UseAlpha; - dshader->pp_IgnoreTexA = pp_IgnoreTexA; - dshader->pp_ShadInstr = pp_ShadInstr; - dshader->pp_Offset = pp_Offset; - dshader->pp_FogCtrl = pp_FogCtrl; - dshader->program = -1; - } - } - } - } - } - } - } - } - */ - - - gl.modvol_shader.program=gl_CompileAndLink(VertexShaderSource,ModifierVolumeShader); gl.modvol_shader.scale = glGetUniformLocation(gl.modvol_shader.program, "scale"); - gl.modvol_shader.sp_ShaderColor = glGetUniformLocation(gl.modvol_shader.program, "sp_ShaderColor"); gl.modvol_shader.depth_scale = glGetUniformLocation(gl.modvol_shader.program, "depth_scale"); @@ -1916,6 +1848,16 @@ struct glesrend : Renderer glcache.DeleteTextures(1, &stencilTexId); stencilTexId = 0; } + if (depthTexId != 0) + { + glcache.DeleteTextures(1, &depthTexId); + depthTexId = 0; + } + if (opaqueTexId != 0) + { + glcache.DeleteTextures(1, &opaqueTexId); + opaqueTexId = 0; + } reshapeABuffer(w, h); } void Term() { } diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index f23dc2cea..fd7d9f666 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -58,12 +58,12 @@ struct PipelineShader GLuint shade_scale_factor; GLuint screen_size; GLuint blend_mode; + GLuint pp_Number; // u32 cp_AlphaTest; s32 pp_ClipTestMode; u32 pp_Texture, pp_UseAlpha, pp_IgnoreTexA, pp_ShadInstr, pp_Offset, pp_FogCtrl; - bool pp_WeightedAverage; - u32 pp_FrontPeeling; + int pass; }; @@ -85,8 +85,6 @@ struct gl_ctx GLuint program; GLuint scale,depth_scale; - GLuint sp_ShaderColor; - } modvol_shader; std::map shaders; @@ -136,7 +134,7 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt); void ReadRTTBuffer(); int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, - u32 pp_FogCtrl, bool pp_WeightedAverage, u32 pp_FrontPeeling); + u32 pp_FogCtrl, int pass); struct ShaderUniforms_t { @@ -148,6 +146,7 @@ struct ShaderUniforms_t float ps_FOG_COL_VERT[3]; float fog_coefs[2]; GLuint blend_mode[2]; + int poly_number; void Set(PipelineShader* s) { @@ -180,6 +179,9 @@ struct ShaderUniforms_t if (s->blend_mode != -1) glUniform2uiv(s->blend_mode, 1, blend_mode); + + if (s->pp_Number != -1) + glUniform1i(s->pp_Number, poly_number); } }; @@ -192,8 +194,6 @@ GLuint loadPNG(const string& subpath, int &width, int &height); extern GLuint stencilTexId; extern GLuint depthTexId; +extern GLuint opaqueTexId; -void DrawListTranslucentAutoSorted(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0, int srcBlendModeFilter = -1, int dstBlendModeFilter = -1); -void DrawListOpaque(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0); -void DrawListPunchThrough(const List& gply, int first, int count, bool weighted_average = false, u32 front_peeling = 0); -void SetupMainVBO(); +#define ABUFFER_SIZE 16 diff --git a/core/rend/gles/render_tr.cpp b/core/rend/gles/render_tr.cpp deleted file mode 100644 index 66cba0a06..000000000 --- a/core/rend/gles/render_tr.cpp +++ /dev/null @@ -1,538 +0,0 @@ -#include "glcache.h" - -static int g_imageWidth = 0; -static int g_imageHeight = 0; -static GLuint g_quadBuffer = 0; -static GLuint g_quadVertexArray = 0; - -GLenum g_drawBuffers[] = {GL_COLOR_ATTACHMENT0, - GL_COLOR_ATTACHMENT1, - GL_COLOR_ATTACHMENT2, - GL_COLOR_ATTACHMENT3, - GL_COLOR_ATTACHMENT4, - GL_COLOR_ATTACHMENT5, - GL_COLOR_ATTACHMENT6 -}; - -// -// Weighted Average -// -static GLuint g_accumulationTexId[2]; -static GLuint g_accumulationFboId; -PipelineShader g_wavg_final_shader; - -void InitAccumulationRenderTargets() -{ - glGenTextures(2, g_accumulationTexId); - - glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_accumulationTexId[0]); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RGBA16F, - g_imageWidth, g_imageHeight, 0, GL_RGBA, GL_FLOAT, NULL); - - glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_accumulationTexId[1]); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_RG32F, - g_imageWidth, g_imageHeight, 0, GL_RGBA, GL_FLOAT, NULL); - - glGenFramebuffers(1, &g_accumulationFboId); - glBindFramebuffer(GL_FRAMEBUFFER, g_accumulationFboId); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_RECTANGLE, g_accumulationTexId[0], 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT1, - GL_TEXTURE_RECTANGLE, g_accumulationTexId[1], 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencilTexId, 0); - - glCheck(); -} - -void DeleteAccumulationRenderTargets() -{ - glDeleteFramebuffers(1, &g_accumulationFboId); - glDeleteTextures(2, g_accumulationTexId); -} - -const char *wavg_final_fragment = "\ -#version 140 \n\ -out vec4 FragColor; \n\ -uniform sampler2DRect ColorTex0; \n\ -uniform sampler2DRect ColorTex1; \n\ - \n\ -void main(void) \n\ -{ \n\ - highp vec4 SumColor = texture2DRect(ColorTex0, gl_FragCoord.xy); \n\ - highp float n = texture2DRect(ColorTex1, gl_FragCoord.xy).r; \n\ - \n\ - // Average Color \n\ - highp vec3 AvgColor = SumColor.rgb / SumColor.a; \n\ - if (n == 0.0 || isinf(AvgColor.r) || isinf(AvgColor.g) || isinf(AvgColor.b) || isnan(AvgColor.r) || isnan(AvgColor.g) || isnan(AvgColor.b)) { \n\ - FragColor.rgba = vec4(0, 0, 0, 0); \n\ - return; \n\ - } \n\ - \n\ - highp float AvgAlpha = SumColor.a / n; \n\ - \n\ - highp float T = pow(1.0 - AvgAlpha, n); \n\ - if (isnan(T)) T = 0; \n\ - FragColor.rgb = AvgColor; \n\ - FragColor.a = 1 - T; \n\ - // Weighted Blended \n\ -// vec3 AvgColor = SumColor.rgb / max(SumColor.a, 0.00001); \n\ -// FragColor.rgb = AvgColor; \n\ -// FragColor.a = 1 - n; \n\ - //FragColor.rgb = vec3(AvgAlpha, 0, 0); FragColor.a = 1.0; \n\ - //FragColor.rgb = AvgColor; FragColor.a = 1.0; \n\ -} \n\ -"; - -void DrawQuad() -{ - glBindVertexArray(g_quadVertexArray); - - struct Vertex vertices[] = { - { 0, screen_height, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 }, - { 0, 0, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 }, - { screen_width, screen_height, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 }, - { screen_width, 0, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 }, - }; - GLushort indices[] = { 0, 1, 2, 1, 3 }; - - glBindBuffer(GL_ARRAY_BUFFER, g_quadBuffer); glCheck(); - glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STREAM_DRAW); glCheck(); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glCheck(); - - glEnableVertexAttribArray(VERTEX_POS_ARRAY); glCheck(); - glVertexAttribPointer(VERTEX_POS_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,x)); glCheck(); - - glEnableVertexAttribArray(VERTEX_COL_BASE_ARRAY); glCheck(); - glVertexAttribPointer(VERTEX_COL_BASE_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,col)); glCheck(); - - glEnableVertexAttribArray(VERTEX_COL_OFFS_ARRAY); glCheck(); - glVertexAttribPointer(VERTEX_COL_OFFS_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,spc)); glCheck(); - - glEnableVertexAttribArray(VERTEX_UV_ARRAY); glCheck(); - glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u)); glCheck(); - - glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, indices); glCheck(); -} - -//-------------------------------------------------------------------------- -void RenderAverageColors() -{ - glcache.Enable(GL_DEPTH_TEST); - glcache.DepthMask(false); - - // --------------------------------------------------------------------- - // 1. Accumulate Colors and Depth Complexity - // --------------------------------------------------------------------- - - glBindFramebuffer(GL_FRAMEBUFFER, g_accumulationFboId); - glDrawBuffers(2, g_drawBuffers); - - glcache.ClearColor(0, 0, 0, 0); - glClear(GL_COLOR_BUFFER_BIT); - - glBlendEquation(GL_FUNC_ADD); - glcache.BlendFunc(GL_ONE, GL_ONE); - glcache.Enable(GL_BLEND); - - DrawListTranslucentAutoSorted(pvrrc.global_param_tr, 0, pvrrc.global_param_tr.used(), true); - - glCheck(); - - // --------------------------------------------------------------------- - // 2. Approximate Blending - // --------------------------------------------------------------------- - - glBindFramebuffer(GL_FRAMEBUFFER, 0); - - glcache.Enable(GL_BLEND); - glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glcache.Disable(GL_DEPTH_TEST); - - glcache.UseProgram(g_wavg_final_shader.program); - ShaderUniforms.Set(&g_wavg_final_shader); - glActiveTexture(GL_TEXTURE0); - glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_accumulationTexId[0]); - glUniform1i(glGetUniformLocation(g_wavg_final_shader.program, "ColorTex0"), 0); - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_RECTANGLE, g_accumulationTexId[1]); - glUniform1i(glGetUniformLocation(g_wavg_final_shader.program, "ColorTex1"), 1); - glActiveTexture(GL_TEXTURE0); - - DrawQuad(); - - glCheck(); -} - -extern bool hack_on; - -void RenderWeightedBlended() -{ -// if (hack_on) -// glcache.Disable(GL_DEPTH_TEST); -// else - glcache.Enable(GL_DEPTH_TEST); - glcache.DepthMask(false); - - // --------------------------------------------------------------------- - // 1. Accumulate Colors and Depth Complexity - // --------------------------------------------------------------------- - - glBindFramebuffer(GL_FRAMEBUFFER, g_accumulationFboId); - glDrawBuffers(2, g_drawBuffers); - - glcache.ClearColor(0, 0, 0, 1); - glClear(GL_COLOR_BUFFER_BIT); - - glcache.Enable(GL_BLEND); - glBlendFuncSeparate(GL_ONE, GL_ONE, GL_ZERO, GL_ONE_MINUS_SRC_ALPHA); - - DrawListTranslucentAutoSorted(pvrrc.global_param_tr, 0, pvrrc.global_param_tr.used(), true); - - glCheck(); - - // --------------------------------------------------------------------- - // 2. Approximate Blending - // --------------------------------------------------------------------- - - glBindFramebuffer(GL_FRAMEBUFFER, 0); - - glcache.Enable(GL_BLEND); - glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA); - - glcache.UseProgram(g_wavg_final_shader.program); - ShaderUniforms.Set(&g_wavg_final_shader); - glActiveTexture(GL_TEXTURE0); - glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_accumulationTexId[0]); - glUniform1i(glGetUniformLocation(g_wavg_final_shader.program, "ColorTex0"), 0); - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_RECTANGLE, g_accumulationTexId[1]); - glUniform1i(glGetUniformLocation(g_wavg_final_shader.program, "ColorTex1"), 1); - glActiveTexture(GL_TEXTURE0); - - DrawQuad(); - - glCheck(); -} - -// -// Front depth peeling -// -static float g_opacity = 0.6; -static int g_numPasses = 4; // SoA opening sequence needs at least 12 passes!!! -GLuint g_frontFboId[2]; -GLuint g_frontDepthTexId[2]; -GLuint g_frontColorTexId[2]; -GLuint g_frontColorBlenderTexId; -GLuint g_frontColorBlenderFboId; -//GLuint g_frontDepthInitTexId; -GLuint g_samples_query; -PipelineShader g_front_blend_shader; -PipelineShader g_front_final_shader; - -const char *front_blend_fragment_source = "\ -#version 140 \n\ -out vec4 FragColor; \n\ -uniform sampler2D TempTex; \n\ -uniform lowp vec2 screen_size; \n\ - \n\ -void main(void) \n\ -{ \n\ - FragColor = texture(TempTex, gl_FragCoord.xy / screen_size); \n\ -} \n\ -"; - - -const char *front_final_fragment_source = "\ -#version 140 \n\ -out vec4 FragColor; \n\ -uniform sampler2D ColorTex; \n\ -uniform lowp vec2 screen_size; \n\ - \n\ -void main(void) \n\ -{ \n\ - vec4 frontColor = texture(ColorTex, gl_FragCoord.xy / screen_size); \n\ -// if (frontColor.a >= 0.99) { \n\ -// FragColor = vec4(0, 0, 0, 0); \n\ -// return; \n\ -// } \n\ - FragColor.rgb = frontColor.rgb / (1 - frontColor.a); \n\ - FragColor.a = 1 - frontColor.a; \n\ -} \n\ -"; - -void InitFrontPeelingRenderTargets() -{ - glGenTextures(2, g_frontDepthTexId); - glGenTextures(2, g_frontColorTexId); - glGenFramebuffers(2, g_frontFboId); - - for (int i = 0; i < 2; i++) - { - glcache.BindTexture(GL_TEXTURE_2D, g_frontDepthTexId[i]); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT); - glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH32F_STENCIL8, g_imageWidth, g_imageHeight, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, NULL); - - glcache.BindTexture(GL_TEXTURE_2D, g_frontColorTexId[i]); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, g_imageWidth, g_imageHeight, - 0, GL_RGBA, GL_FLOAT, 0); - - glBindFramebuffer(GL_FRAMEBUFFER, g_frontFboId[i]); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, g_frontDepthTexId[i], 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, g_frontColorTexId[i], 0); - } - - g_frontColorBlenderTexId = glcache.GenTexture(); - glcache.BindTexture(GL_TEXTURE_2D, g_frontColorBlenderTexId); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, g_imageWidth, g_imageHeight, - 0, GL_RGBA, GL_FLOAT, 0); - -// g_frontDepthInitTexId = glcache.GenTexture(); -// glcache.BindTexture(GL_TEXTURE_RECTANGLE, g_frontDepthInitTexId); -// glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); -// glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); -// glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MIN_FILTER, GL_NEAREST); -// glcache.TexParameteri(GL_TEXTURE_RECTANGLE, GL_TEXTURE_MAG_FILTER, GL_NEAREST); -// glTexImage2D(GL_TEXTURE_RECTANGLE, 0, GL_DEPTH_COMPONENT, -// g_imageWidth, g_imageHeight, 0, GL_DEPTH_COMPONENT, GL_FLOAT, NULL); - - glGenFramebuffers(1, &g_frontColorBlenderFboId); - glBindFramebuffer(GL_FRAMEBUFFER, g_frontColorBlenderFboId); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, - GL_TEXTURE_2D, g_frontDepthTexId[0], 0); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, - GL_TEXTURE_2D, g_frontColorBlenderTexId, 0); - - glGenQueries(1, &g_samples_query); - - glCheck(); -} - -void DeleteFrontPeelingRenderTargets() -{ - glDeleteFramebuffers(2, g_frontFboId); - glDeleteFramebuffers(1, &g_frontColorBlenderFboId); - glcache.DeleteTextures(2, g_frontDepthTexId); - glcache.DeleteTextures(2, g_frontColorTexId); - glcache.DeleteTextures(1, &g_frontColorBlenderTexId); -// glcache.DeleteTextures(1, &g_frontDepthInitTexId); - glDeleteQueries(1, &g_samples_query); -} - -extern GLuint geom_fbo; -//-------------------------------------------------------------------------- -void RenderFrontToBackPeeling(int first, int count) -{ - // --------------------------------------------------------------------- - // 1. Initialize Min Depth Buffer - // --------------------------------------------------------------------- - - glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); - glBindTexture(GL_TEXTURE_2D, g_frontDepthTexId[0]); - // FIXME: - // GL_DEPTH_STENCIL is super slow - // GL_DEPTH32F_STENCIL8 is fast but doesn't seem to work (depth values are wrong?!?!?) - // GL_DEPTH24_STENCIL8 seems to work but is super slow - // GL_DEPTH_COMPONENT32F fails - // GL_FLOAT_32_UNSIGNED_INT_24_8_REV fails - glCopyTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH32F_STENCIL8, 0, 0, g_imageWidth, g_imageHeight, 0); - glBindTexture(GL_TEXTURE_2D, 0); - glCheck(); - - glBindFramebuffer(GL_FRAMEBUFFER, g_frontColorBlenderFboId); - glDrawBuffer(g_drawBuffers[0]); - - glcache.DepthMask(true); - glcache.Enable(GL_DEPTH_TEST); - - glcache.ClearColor(0, 0, 0, 1); - glClear(GL_COLOR_BUFFER_BIT /* | GL_DEPTH_BUFFER_BIT */); - - glcache.Disable(GL_BLEND); - - // Hack on - glClear(GL_DEPTH_BUFFER_BIT); - // TODO Hack to get the depth from OP and PT passes. - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - DrawListOpaque(pvrrc.global_param_op, 0, pvrrc.global_param_op.used(), false, 1); - DrawListPunchThrough(pvrrc.global_param_pt, 0, pvrrc.global_param_pt.used(), false, 1); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - // Hack off - - DrawListTranslucentAutoSorted(pvrrc.global_param_tr, first, count, false, 1, 4, 5); // FIXME initial pass for other blend modes - - glCheck(); - - // --------------------------------------------------------------------- - // 2. Depth Peeling + Blending - // --------------------------------------------------------------------- -extern bool hack_on; -if (hack_on) - g_numPasses = 4; -else - g_numPasses = 20; - - int numLayers = (g_numPasses - 1) * 2; - for (int layer = 1; layer < numLayers; layer++) { - int currId = layer % 2; - int prevId = 1 - currId; - - glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); - glBindTexture(GL_TEXTURE_2D, g_frontDepthTexId[currId]); - glCopyTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH32F_STENCIL8, 0, 0, g_imageWidth, g_imageHeight, 0); - glBindTexture(GL_TEXTURE_2D, 0); - glCheck(); - - glBindFramebuffer(GL_FRAMEBUFFER, g_frontFboId[currId]); - glDrawBuffer(g_drawBuffers[0]); -// glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, g_frontDepthTexId[currId], 0); - glCheck(); - - glcache.ClearColor(0, 0, 0, 0); - glClear(GL_COLOR_BUFFER_BIT); - - glcache.Enable(GL_DEPTH_TEST); - - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_2D, g_frontDepthTexId[prevId]); // DepthTex - glActiveTexture(GL_TEXTURE0); - glCheck(); - - glBeginQuery(GL_SAMPLES_PASSED, g_samples_query); - // Peeling shader - DrawListTranslucentAutoSorted(pvrrc.global_param_tr, first, count, false, 2, 4, 5); // Only 4,5 blending - //DrawListTranslucentAutoSorted(pvrrc.global_param_tr, first, count, false, 2, 1, 1); // Only 1,1 blending - //DrawListTranslucentAutoSorted(pvrrc.global_param_tr, first, count, false, 2, 4, 1); // Only 4,1 blending - glEndQuery(GL_SAMPLES_PASSED); - - glCheck(); - - GLuint sample_count; - glGetQueryObjectuiv(g_samples_query, GL_QUERY_RESULT, &sample_count); - if (sample_count == 0) { - printf("Aborting depth peeling after %d layers\n", layer); - break; - } - - glBindFramebuffer(GL_FRAMEBUFFER, g_frontColorBlenderFboId); - glDrawBuffer(g_drawBuffers[0]); - - glcache.Disable(GL_DEPTH_TEST); - glcache.Enable(GL_BLEND); - - glBlendEquation(GL_FUNC_ADD); - glBlendFuncSeparate(GL_DST_ALPHA, GL_ONE, - GL_ZERO, GL_ONE_MINUS_SRC_ALPHA); - // Let's do it again for 1,1 blending - //glBlendEquationSeparate(GL_FUNC_ADD, GL_FUNC_REVERSE_SUBTRACT); - //glBlendFunc(GL_ONE, GL_ONE); - // Let's do it again for 4,1 blending - //glBlendEquationSeparate(GL_FUNC_ADD, GL_FUNC_REVERSE_SUBTRACT); - //glBlendFunc(GL_SRC_ALPHA, GL_ONE); - - glcache.UseProgram(g_front_blend_shader.program); - ShaderUniforms.Set(&g_front_blend_shader); - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_2D, g_frontColorTexId[currId]); // TempTex - glActiveTexture(GL_TEXTURE0); - - // Blending shader - DrawQuad(); - - SetupMainVBO(); - - glcache.Disable(GL_BLEND); - - glCheck(); - } - - // --------------------------------------------------------------------- - // 3. Final Pass - // --------------------------------------------------------------------- - - glBindFramebuffer(GL_FRAMEBUFFER, 0); - glDrawBuffer(GL_BACK); - glcache.Disable(GL_DEPTH_TEST); - - glcache.Enable(GL_BLEND); - - // FIXME dst=GL_ONE blending don't reduce the dst alpha, so this blending function cannot work. - glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); // No cache! b/c of glBlendFuncSeparate - // kinda works for GL_ONE blending - //glBlendFuncSeparate(GL_SRC_ALPHA, GL_ONE, GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - - glcache.UseProgram(g_front_final_shader.program); - ShaderUniforms.Set(&g_front_final_shader); - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_2D, g_frontColorBlenderTexId); // ColorTex - glActiveTexture(GL_TEXTURE0); - - // Final blending - DrawQuad(); - - glCheck(); -} - - -void InitDualPeeling() -{ - if (g_accumulationTexId[0] != 0 || g_frontFboId[0] != 0) - return; - - g_imageWidth = screen_width; - g_imageHeight = screen_height; - - glGenVertexArrays(1, &g_quadVertexArray); - glGenBuffers(1, &g_quadBuffer); - - // Allocate render targets first - InitFrontPeelingRenderTargets(); - InitAccumulationRenderTargets(); - glBindFramebuffer(GL_FRAMEBUFFER, 0); - - // Build shaders - - CompilePipelineShader(&g_wavg_final_shader, wavg_final_fragment); - - CompilePipelineShader(&g_front_blend_shader, front_blend_fragment_source); - glUniform1i(glGetUniformLocation(g_front_blend_shader.program, "TempTex"), 1); - - CompilePipelineShader(&g_front_final_shader, front_final_fragment_source); - glUniform1i(glGetUniformLocation(g_front_final_shader.program, "ColorTex"), 1); -} - -void DualPeelingReshape(int w, int h) -{ - if (g_imageWidth != w || g_imageHeight != h) - { - g_imageWidth = w; - g_imageHeight = h; - - DeleteFrontPeelingRenderTargets(); - InitFrontPeelingRenderTargets(); - - DeleteAccumulationRenderTargets(); - InitAccumulationRenderTargets(); - } -} diff --git a/core/rend/soft/softrend.cpp b/core/rend/soft/softrend.cpp index c9b21bd10..a04c73f36 100644 --- a/core/rend/soft/softrend.cpp +++ b/core/rend/soft/softrend.cpp @@ -20,6 +20,7 @@ #include #include +#include #include "rend/gles/gles.h" @@ -53,6 +54,54 @@ union m128i { uint32_t m128i_u32[4]; }; + +bool operator<(const PolyParam &left, const PolyParam &right) +{ +/* put any condition you want to sort on here */ + return left.zvZcount<2) + { + pp->zvZ=0; + } + else + { + u16* idx=idx_base+pp->first; + + Vertex* vtx=vtx_base+idx[0]; + Vertex* vtx_end=vtx_base + idx[pp->count-1]+1; + + u32 zv=0xFFFFFFFF; + while(vtx!=vtx_end) + { + zv=min(zv,(u32&)vtx->z); + vtx++; + } + + pp->zvZ=(f32&)zv; + } + pp++; + } + + std::stable_sort(pvrrc.global_param_tr.head() + first, pvrrc.global_param_tr.head() + first + count); +} + static __m128 _mm_load_scaled_float(float v, float s) { return _mm_setr_ps(v, v + s, v + s + s, v + s + s + s); From 00fbc3f6f07e6bdd2ada159471bccad7710bdb5f Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 27 May 2018 22:51:12 +0200 Subject: [PATCH 06/65] A-buffers: Handle manual sort TR. Sort on depth then on poly number. --- core/rend/gles/abuffer.cpp | 62 +++++++++++++++++--------------------- core/rend/gles/gldraw.cpp | 2 +- core/rend/gles/gles.cpp | 8 ++--- core/rend/gles/gles.h | 2 +- 4 files changed, 34 insertions(+), 40 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 64bb4a2f0..b85344698 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -10,6 +10,7 @@ GLuint abufferTexID = 0; GLuint abufferBlendingTexID = 0; GLuint abufferCounterTexID = 0; PipelineShader g_abuffer_final_shader; +PipelineShader g_abuffer_final_nosort_shader; PipelineShader g_abuffer_clear_shader; PipelineShader g_abuffer_pass2_shader; static GLuint g_quadBuffer = 0; @@ -22,12 +23,12 @@ static const char *final_shader_source = "\ #version 140 \n\ #extension GL_EXT_shader_image_load_store : enable \n\ #define ABUFFER_SIZE %d \n\ +#define DEPTH_SORTED %d \n\ out vec4 FragColor; \n\ uniform layout(size1x32) uimage2D abufferCounterImg; \n\ uniform layout(size4x32) image2DArray abufferImg; \n\ -uniform layout(size2x32) image2DArray abufferBlendingImg; \n\ +uniform layout(size4x32) image2DArray abufferBlendingImg; \n\ uniform lowp vec2 screen_size; \n\ -uniform int sortFragments; \n\ \n\ vec4 colorList[ABUFFER_SIZE]; \n\ vec4 depthBlendList[ABUFFER_SIZE]; \n\ @@ -74,9 +75,15 @@ void fillFragmentArray(ivec2 coords, int num_frag) { \n\ void bubbleSort(int array_size) { \n\ for (int i = array_size - 2; i >= 0; i--) { \n\ for (int j = 0; j <= i; j++) { \n\ -// depth only if (depthBlendList[j].x < depthBlendList[j + 1].x) { \n\ +#if DEPTH_SORTED == 1 \n\ + // depth only \n\ + //if (depthBlendList[j].x < depthBlendList[j + 1].x) { \n\ + // depth then poly number \n\ if (depthBlendList[j].x < depthBlendList[j + 1].x || (depthBlendList[j].x == depthBlendList[j + 1].x && depthBlendList[j].z > depthBlendList[j + 1].z)) { \n\ -// if (depthBlendList[j].z > depthBlendList[j + 1].z) { \n\ +#else \n\ + // poly number only \n\ + if (depthBlendList[j].z > depthBlendList[j + 1].z) { \n\ +#endif \n\ vec4 depthBlend = depthBlendList[j + 1]; \n\ depthBlendList[j + 1] = depthBlendList[j]; \n\ depthBlendList[j] = depthBlend; \n\ @@ -95,8 +102,11 @@ void insertionSort(int array_size) { \n\ vec4 aDepth = depthBlendList[i]; \n\ vec4 aColor = colorList[i]; \n\ int j = i - 1; \n\ -// for (; j >= 0 && depthBlendList[j].z < aDepth.z; j--) { \n\ +#if DEPTH_SORTED == 1 \n\ for (; j >= 0 && (depthBlendList[j].x < aDepth.x || (depthBlendList[j].x == aDepth.x && depthBlendList[j].z > aDepth.z)); j--) { \n\ +#else \n\ + for (; j >= 0 && depthBlendList[j].z < aDepth.z; j--) { \n\ +#endif \n\ depthBlendList[j + 1] = depthBlendList[j]; \n\ colorList[j + 1] = colorList[j]; \n\ } \n\ @@ -111,8 +121,7 @@ vec4 returnNthLayer(ivec2 coords, int num_frag, int layer) { \n\ fillFragmentArray(coords, num_frag); \n\ \n\ // Sort fragments in local memory array \n\ - if (sortFragments != 0) \n\ - bubbleSort(num_frag); \n\ + bubbleSort(num_frag); \n\ \n\ return vec4(colorList[min(layer, num_frag - 1)].rgb, 1); \n\ } \n\ @@ -124,18 +133,7 @@ vec4 resolveAlphaBlend(ivec2 coords, int num_frag){ \n\ fillFragmentArray(coords, num_frag); \n\ \n\ // Sort fragments in local memory array \n\ - if (sortFragments != 0) \n\ - bubbleSort(num_frag); \n\ - else if (num_frag > 1) \n\ - { \n\ - // FIXME This is wrong \n\ - vec4 depthBlend = depthBlendList[0]; \n\ - depthBlendList[0] = depthBlendList[num_frag - 1]; \n\ - depthBlendList[num_frag - 1] = depthBlend; \n\ - vec4 color = colorList[0]; \n\ - colorList[0] = colorList[num_frag - 1]; \n\ - colorList[num_frag - 1] = color; \n\ - } \n\ + bubbleSort(num_frag); \n\ \n\ vec4 finalColor = colorList[0]; \n\ for (int i = 1; i < num_frag; i++) { \n\ @@ -235,8 +233,6 @@ static const char *clear_shader_source = "\ #version 140 \n\ #extension GL_EXT_shader_image_load_store : enable \n\ coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ -coherent uniform layout(size4x32) image2DArray abufferImg; \n\ -coherent uniform layout(size2x32) image2DArray abufferBlendingImg; \n\ \n\ void main(void) \n\ { \n\ @@ -244,12 +240,6 @@ void main(void) \n\ \n\ // Reset counter \n\ imageStore(abufferCounterImg, coords, uvec4(0)); \n\ - \n\ - // FIXME should not be necessary \n\ - // Put black in first layer \n\ - //imageStore(abufferImg, ivec3(coords, 0), vec4(0)); \n\ - // Reset depth \n\ - //imageStore(abufferBlendingImg, ivec3(coords, 0), vec4(0)); \n\ \n\ // Discard fragment so nothing is writen to the framebuffer \n\ discard; \n\ @@ -262,7 +252,7 @@ static const char *pass2_shader_source = "\ #extension GL_EXT_shader_image_load_store : enable \n\ coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ coherent uniform layout(size4x32) image2DArray abufferImg; \n\ -coherent uniform layout(size2x32) image2DArray abufferBlendingImg; \n\ +coherent uniform layout(size4x32) image2DArray abufferBlendingImg; \n\ uniform lowp vec2 screen_size; \n\ uniform sampler2D DepthTex; \n\ uniform sampler2D tex; \n\ @@ -321,15 +311,21 @@ void initABuffer() glBindTexture(GL_TEXTURE_2D_ARRAY, abufferBlendingTexID); glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RG32F, g_imageWidth, g_imageHeight, ABUFFER_SIZE, 0, GL_RG, GL_FLOAT, 0); - glBindImageTexture(5, abufferBlendingTexID, 0, true, 0, GL_READ_WRITE, GL_RG32F); + glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA32F, g_imageWidth, g_imageHeight, ABUFFER_SIZE, 0, GL_RGBA, GL_FLOAT, 0); + glBindImageTexture(5, abufferBlendingTexID, 0, true, 0, GL_READ_WRITE, GL_RGBA32F); if (g_abuffer_final_shader.program == 0) { char source[8192]; - sprintf(source, final_shader_source, ABUFFER_SIZE); + sprintf(source, final_shader_source, ABUFFER_SIZE, 1); CompilePipelineShader(&g_abuffer_final_shader, source); } + if (g_abuffer_final_nosort_shader.program == 0) + { + char source[8192]; + sprintf(source, final_shader_source, ABUFFER_SIZE, 0); + CompilePipelineShader(&g_abuffer_final_nosort_shader, source); + } if (g_abuffer_clear_shader.program == 0) CompilePipelineShader(&g_abuffer_clear_shader, clear_shader_source); if (g_abuffer_pass2_shader.program == 0) @@ -424,10 +420,8 @@ void renderABuffer(bool sortFragments) glBindTexture(GL_TEXTURE_2D_ARRAY, abufferBlendingTexID); glCheck(); - glcache.UseProgram(g_abuffer_final_shader.program); + glcache.UseProgram(sortFragments ? g_abuffer_final_shader.program : g_abuffer_final_nosort_shader.program); ShaderUniforms.Set(&g_abuffer_final_shader); - GLint gu = glGetUniformLocation(g_abuffer_final_shader.program, "sortFragments"); - glUniform1i(gu, (int)sortFragments); glcache.Disable(GL_BLEND); glcache.Disable(GL_DEPTH_TEST); diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index fc6a27f94..f697e8522 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -329,7 +329,7 @@ void DrawList(const List& gply, int first, int count, int pass) params++; continue; } - ShaderUniforms.poly_number = params - gply.head(); + ShaderUniforms.poly_number = params - gply.head() + 1; SetGPState(params, pass); glDrawElements(GL_TRIANGLE_STRIP, params->count, GL_UNSIGNED_SHORT, (GLvoid*)(2*params->first)); glCheck(); } diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 1baf2f221..719789011 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -330,10 +330,10 @@ void main() \n\ int abidx = int(imageAtomicAdd(abufferCounterImg, coords, uint(1))); \n\ if (abidx >= ABUFFER_SIZE) { \n\ // Green pixels when overflow \n\ - vec4 blend_val = vec4(0.001, 8, float(pp_Number), 0); \n\ - ivec3 coords3 = ivec3(coords, 0); \n\ - imageStore(abufferImg, coords3, vec4(0, 1, 0, 1)); \n\ - imageStore(abufferBlendingImg, coords3, blend_val); \n\ +// vec4 blend_val = vec4(0.001, 8, float(pp_Number), 0); \n\ +// ivec3 coords3 = ivec3(coords, 0); \n\ +// imageStore(abufferImg, coords3, vec4(0, 1, 0, 1)); \n\ +// imageStore(abufferBlendingImg, coords3, blend_val); \n\ } else { \n\ vec4 blend_val = vec4(gl_FragDepth, float(blend_mode.x) * 8 + float(blend_mode.y), float(pp_Number), 0); \n\ ivec3 coords3 = ivec3(coords, abidx); \n\ diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index fd7d9f666..841f190b4 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -196,4 +196,4 @@ extern GLuint stencilTexId; extern GLuint depthTexId; extern GLuint opaqueTexId; -#define ABUFFER_SIZE 16 +#define ABUFFER_SIZE 32 From 0d32618203f36a2e8f2eebc1d632d913c1e6a581 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 28 May 2018 12:07:52 +0200 Subject: [PATCH 07/65] Cosmetic changes --- core/rend/gles/abuffer.cpp | 36 +++++++++++++++++++++++++++++++++--- core/rend/gles/gles.cpp | 4 ++-- core/rend/gles/gles.h | 2 ++ 3 files changed, 37 insertions(+), 5 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index b85344698..50c1070bf 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -22,7 +22,7 @@ static int g_imageHeight = 0; static const char *final_shader_source = "\ #version 140 \n\ #extension GL_EXT_shader_image_load_store : enable \n\ -#define ABUFFER_SIZE %d \n\ +#define ABUFFER_SIZE " ABUFFER_SIZE_STR " \n\ #define DEPTH_SORTED %d \n\ out vec4 FragColor; \n\ uniform layout(size1x32) uimage2D abufferCounterImg; \n\ @@ -275,6 +275,36 @@ void main(void) \n\ } \n\ "; +static const char *tr_modvol_shader_source = "\ +#version 140 \n\ +#extension GL_EXT_shader_image_load_store : enable \n\ +coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ +coherent uniform layout(size4x32) image2DArray abufferBlendingImg; \n\ +uniform lowp vec2 screen_size; \n\ +uniform sampler2D DepthTex; \n\ + \n\ +void main(void) \n\ +{ \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + \n\ + int num_frag = int(imageLoad(abufferCounterImg, coords).r); \n\ + \n\ + highp float w = 100000.0 * gl_FragCoord.w; \n\ + highp float depth = 1 - log2(1.0 + w) / 34; \n\ + for (int i = 0; i < num_frag; i++) \n\ + { \n\ + vec4 pixel_info = imageLoad(abufferBlendingImg, ivec3(coords, i)); \n\ + highp float pixel_depth = info.x; \n\ + if (depth > pixel_depth) \n\ + continue; \n\ + // FIXME Need int or uint pixel format, not vec4 \n\ + imageAtomicXor(abufferBlendingImg, ivec3(coords, i), 1); \n\ + } \n\ + \n\ + discard; \n\ +} \n\ +"; + void initABuffer() { g_imageWidth = screen_width; @@ -317,13 +347,13 @@ void initABuffer() if (g_abuffer_final_shader.program == 0) { char source[8192]; - sprintf(source, final_shader_source, ABUFFER_SIZE, 1); + sprintf(source, final_shader_source, 1); CompilePipelineShader(&g_abuffer_final_shader, source); } if (g_abuffer_final_nosort_shader.program == 0) { char source[8192]; - sprintf(source, final_shader_source, ABUFFER_SIZE, 0); + sprintf(source, final_shader_source, 0); CompilePipelineShader(&g_abuffer_final_nosort_shader, source); } if (g_abuffer_clear_shader.program == 0) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 719789011..cbeab17fe 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -213,7 +213,7 @@ uniform int pp_Number; \n\ #endif \n\ #if PASS > 1 \n\ #extension GL_EXT_shader_image_load_store : enable \n\ - #define ABUFFER_SIZE %d \n\ + #define ABUFFER_SIZE " ABUFFER_SIZE_STR " \n\ uniform uvec2 blend_mode; \n\ coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ coherent uniform layout(size4x32) image2DArray abufferImg; \n\ @@ -797,7 +797,7 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe sprintf(pshader, source, s->cp_AlphaTest,s->pp_ClipTestMode,s->pp_UseAlpha, - s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pass, ABUFFER_SIZE); + s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pass); s->program=gl_CompileAndLink(VertexShaderSource,pshader); diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 841f190b4..c4bc4cb05 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -196,4 +196,6 @@ extern GLuint stencilTexId; extern GLuint depthTexId; extern GLuint opaqueTexId; +// Must match! #define ABUFFER_SIZE 32 +#define ABUFFER_SIZE_STR "32" From 81b96f2edef42fb96887ba3747fd8c7706ddbed6 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 28 May 2018 12:34:14 +0200 Subject: [PATCH 08/65] a-buffers RTT fix --- core/rend/gles/gldraw.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index f697e8522..71ecb8576 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -611,6 +611,10 @@ void CreateGeometryTexture() void DrawStrips() { + + GLint output_fbo; + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &output_fbo); + if (geom_fbo == 0) { glGenFramebuffers(1, &geom_fbo); @@ -671,6 +675,8 @@ void DrawStrips() glActiveTexture(GL_TEXTURE0); glCheck(); +// Multipass: render on generated tex of previous pass? +// FIXME re-rendering on same depth buffer: what if GL_LESS is used? //Opaque DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 1); @@ -698,10 +704,6 @@ void DrawStrips() DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); glCheck(); - // FIXME Unsorted TR cannot use a-buffer because of lost ordering - - // FIXME Blinking pixels in Soulcalibur score table. Could be that some TR have same depth and rely on natural order? - // a-buffers makes the final order unpredictable and varies each frame // FIXME Depth of translucent poly must be used for next render pass if any // FIXME Multipass in general... @@ -711,7 +713,7 @@ void DrawStrips() // // PASS 4: Render a-buffers to screen // - glBindFramebuffer(GL_FRAMEBUFFER, 0); glCheck(); + glBindFramebuffer(GL_FRAMEBUFFER, output_fbo); glCheck(); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); renderABuffer(pvrrc.isAutoSort); From aa996566fee4ad32bf5bc2c410b10b58d93b9b8c Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 28 May 2018 23:38:26 +0200 Subject: [PATCH 09/65] Parse translucent modifier volumes. Fix for overrun. Still need to be checked... --- core/hw/pvr/ta_ctx.h | 7 ++++++- core/hw/pvr/ta_vtx.cpp | 22 ++++++++++++++++++---- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index 703ba0721..4ec370fdb 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -95,6 +95,7 @@ struct RenderPass { u32 mvo_count; u32 pt_count; u32 tr_count; + u32 mvo_tr_count; }; struct rend_context @@ -118,6 +119,7 @@ struct rend_context List idx; List modtrig; List global_param_mvo; + List global_param_mvo_tr; List global_param_op; List global_param_pt; @@ -133,6 +135,7 @@ struct rend_context global_param_tr.Clear(); modtrig.Clear(); global_param_mvo.Clear(); + global_param_mvo_tr.Clear(); render_passes.Clear(); Overrun=false; @@ -188,8 +191,9 @@ struct TA_context rend.global_param_pt.Init(4096,&rend.Overrun); rend.global_param_mvo.Init(4096,&rend.Overrun); rend.global_param_tr.Init(4096,&rend.Overrun); + rend.global_param_mvo_tr.Init(4096,&rend.Overrun); - rend.modtrig.Init(8192,&rend.Overrun); + rend.modtrig.Init(16384,&rend.Overrun); rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun); // 10 render passes @@ -215,6 +219,7 @@ struct TA_context rend.global_param_tr.Free(); rend.modtrig.Free(); rend.global_param_mvo.Free(); + rend.global_param_mvo_tr.Free(); rend.render_passes.Free(); } }; diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index ce67dd956..8d57993d1 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -774,6 +774,12 @@ public: p.id=vdrc.modtrig.used(); *vdrc.global_param_mvo.Append()=p; } + else if (ListType == ListType_Translucent_Modifier_Volume) + { + ISP_Modvol p; + p.id = vdrc.modtrig.used(); + *vdrc.global_param_mvo_tr.Append()=p; + } } /* @@ -1373,9 +1379,13 @@ public: //Mod Volume Vertex handlers static void StartModVol(TA_ModVolParam* param) { - if (CurrentList!=ListType_Opaque_Modifier_Volume) + ISP_Modvol* p = NULL; + if (CurrentList == ListType_Opaque_Modifier_Volume) + p = vdrc.global_param_mvo.Append(); + else if (CurrentList == ListType_Translucent_Modifier_Volume) + p = vdrc.global_param_mvo_tr.Append(); + else return; - ISP_Modvol* p=vdrc.global_param_mvo.Append(); p->full=param->isp.full; p->VolumeLast=param->pcw.Volume; p->id=vdrc.modtrig.used(); @@ -1383,7 +1393,7 @@ public: __forceinline static void AppendModVolVertexA(TA_ModVolA* mvv) { - if (CurrentList!=ListType_Opaque_Modifier_Volume) + if (CurrentList != ListType_Opaque_Modifier_Volume && CurrentList != ListType_Translucent_Modifier_Volume) return; lmr=vdrc.modtrig.Append(); @@ -1403,7 +1413,7 @@ public: __forceinline static void AppendModVolVertexB(TA_ModVolB* mvv) { - if (CurrentList!=ListType_Opaque_Modifier_Volume) + if (CurrentList != ListType_Opaque_Modifier_Volume && CurrentList != ListType_Translucent_Modifier_Volume) return; lmr->y2=mvv->y2; lmr->z2=mvv->z2; @@ -1469,15 +1479,19 @@ bool ta_parse_vdrc(TA_context* ctx) render_pass->mvo_count = vd_rc.global_param_mvo.used(); render_pass->pt_count = vd_rc.global_param_pt.used(); render_pass->tr_count = vd_rc.global_param_tr.used(); + render_pass->mvo_tr_count = vd_rc.global_param_mvo_tr.used(); } rv = true; //whatever } + bool overrun = ctx->rend.Overrun; vd_ctx->rend = vd_rc; vd_ctx = 0; ctx->rend_inuse.Unlock(); + ctx->rend.Overrun = overrun; + return rv; } From c1975702865cfa8064c352ffc71f6d5a114c6e10 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 28 May 2018 23:39:47 +0200 Subject: [PATCH 10/65] A-buffers: linked list implementation --- core/rend/gles/abuffer.cpp | 430 ++++++++++++++++++++----------------- core/rend/gles/gldraw.cpp | 8 +- core/rend/gles/gles.cpp | 162 ++++++++------ core/rend/gles/gles.h | 42 +++- 4 files changed, 377 insertions(+), 265 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 50c1070bf..5a1fe0b6f 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -6,69 +6,39 @@ */ #include "glcache.h" -GLuint abufferTexID = 0; -GLuint abufferBlendingTexID = 0; -GLuint abufferCounterTexID = 0; +GLuint pixels_buffer; +GLuint pixels_pointers; +GLuint atomic_buffer; PipelineShader g_abuffer_final_shader; PipelineShader g_abuffer_final_nosort_shader; PipelineShader g_abuffer_clear_shader; PipelineShader g_abuffer_pass2_shader; +PipelineShader g_abuffer_tr_modvol_shader; +PipelineShader g_abuffer_tr_modvol_final_shader; static GLuint g_quadBuffer = 0; static GLuint g_quadVertexArray = 0; static int g_imageWidth = 0; static int g_imageHeight = 0; -static const char *final_shader_source = "\ -#version 140 \n\ -#extension GL_EXT_shader_image_load_store : enable \n\ -#define ABUFFER_SIZE " ABUFFER_SIZE_STR " \n\ +#define MAX_PIXELS_PER_FRAGMENT "32" + +static const char *final_shader_source = SHADER_HEADER "\ #define DEPTH_SORTED %d \n\ +#define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT " \n\ out vec4 FragColor; \n\ -uniform layout(size1x32) uimage2D abufferCounterImg; \n\ -uniform layout(size4x32) image2DArray abufferImg; \n\ -uniform layout(size4x32) image2DArray abufferBlendingImg; \n\ -uniform lowp vec2 screen_size; \n\ \n\ -vec4 colorList[ABUFFER_SIZE]; \n\ -vec4 depthBlendList[ABUFFER_SIZE]; \n\ +Pixel pixel_list[MAX_PIXELS_PER_FRAGMENT]; \n\ \n\ -int resolveClosest(ivec2 coords, int num_frag) { \n\ - \n\ - // Search smallest z \n\ - float minZ = 1000000.0f; \n\ - int minIdx; \n\ - for (int i = 0; i < num_frag; i++) { \n\ - vec4 val = imageLoad(abufferBlendingImg, ivec3(coords, i)); \n\ - if (val.x < minZ) { \n\ - minZ = val.x; \n\ - minIdx = i; \n\ - } \n\ - } \n\ - \n\ - // Return the index of the closest fragment \n\ - return minIdx; \n\ -} \n\ - \n\ - \n\ -int findOpaque(ivec2 coords, int num_frag) { \n\ - \n\ - for (int i = 0; i < num_frag; i++) { \n\ - vec4 val = imageLoad(abufferBlendingImg, ivec3(coords, i)); \n\ - if (round(val.y) == 8) { \n\ - return i; \n\ - } \n\ - } \n\ - \n\ - return 0; \n\ -} \n\ - \n\ -void fillFragmentArray(ivec2 coords, int num_frag) { \n\ +int fillFragmentArray(ivec2 coords) { \n\ // Load fragments into a local memory array for sorting \n\ - for (int i = 0; i < num_frag; i++) { \n\ - colorList[i] = imageLoad(abufferImg, ivec3(coords, i)); \n\ - depthBlendList[i] = imageLoad(abufferBlendingImg, ivec3(coords, i)); \n\ + uint idx = imageLoad(abufferPointerImg, coords).x; \n\ + int i = 0; \n\ + for (; idx != EOL && i < MAX_PIXELS_PER_FRAGMENT; i++) { \n\ + pixel_list[i] = pixels[idx]; \n\ + idx = pixel_list[i].next; \n\ } \n\ + return i; \n\ } \n\ \n\ // Bubble sort used to sort fragments \n\ @@ -76,20 +46,16 @@ void bubbleSort(int array_size) { \n\ for (int i = array_size - 2; i >= 0; i--) { \n\ for (int j = 0; j <= i; j++) { \n\ #if DEPTH_SORTED == 1 \n\ - // depth only \n\ - //if (depthBlendList[j].x < depthBlendList[j + 1].x) { \n\ // depth then poly number \n\ - if (depthBlendList[j].x < depthBlendList[j + 1].x || (depthBlendList[j].x == depthBlendList[j + 1].x && depthBlendList[j].z > depthBlendList[j + 1].z)) { \n\ + if (pixel_list[j].depth < pixel_list[j + 1].depth \n\ + || (pixel_list[j].depth == pixel_list[j + 1].depth && pixel_list[j].seq_num > pixel_list[j + 1].seq_num)) { \n\ #else \n\ // poly number only \n\ - if (depthBlendList[j].z > depthBlendList[j + 1].z) { \n\ + if (pixel_list[j].seq_num > pixel_list[j + 1].seq_num) { \n\ #endif \n\ - vec4 depthBlend = depthBlendList[j + 1]; \n\ - depthBlendList[j + 1] = depthBlendList[j]; \n\ - depthBlendList[j] = depthBlend; \n\ - vec4 color = colorList[j + 1]; \n\ - colorList[j + 1] = colorList[j]; \n\ - colorList[j] = color; \n\ + Pixel p = pixel_list[j + 1]; \n\ + pixel_list[j + 1] = pixel_list[j]; \n\ + pixel_list[j] = p; \n\ } \n\ } \n\ } \n\ @@ -99,49 +65,37 @@ void bubbleSort(int array_size) { \n\ // Insertion sort used to sort fragments \n\ void insertionSort(int array_size) { \n\ for (int i = 1; i < array_size; i++) { \n\ - vec4 aDepth = depthBlendList[i]; \n\ - vec4 aColor = colorList[i]; \n\ + Pixel p = pixel_list[i]; \n\ int j = i - 1; \n\ #if DEPTH_SORTED == 1 \n\ - for (; j >= 0 && (depthBlendList[j].x < aDepth.x || (depthBlendList[j].x == aDepth.x && depthBlendList[j].z > aDepth.z)); j--) { \n\ + for (; j >= 0 && (pixel_list[j].depth < p.depth || (pixel_list[j].depth == p.depth && pixel_list[j].seq_num > p.seq_num)); j--) { \n\ #else \n\ - for (; j >= 0 && depthBlendList[j].z < aDepth.z; j--) { \n\ + for (; j >= 0 && pixel_list[j].seq_num > p.seq_num; j--) { \n\ #endif \n\ - depthBlendList[j + 1] = depthBlendList[j]; \n\ - colorList[j + 1] = colorList[j]; \n\ + pixel_list[j + 1] = pixel_list[j]; \n\ } \n\ - depthBlendList[j + 1] = aDepth; \n\ - colorList[j + 1] = aColor; \n\ + pixel_list[j + 1] = p; \n\ } \n\ } \n\ \n\ -vec4 returnNthLayer(ivec2 coords, int num_frag, int layer) { \n\ - \n\ - // Copy fragments in local array \n\ - fillFragmentArray(coords, num_frag); \n\ - \n\ - // Sort fragments in local memory array \n\ - bubbleSort(num_frag); \n\ - \n\ - return vec4(colorList[min(layer, num_frag - 1)].rgb, 1); \n\ -} \n\ - \n\ // Blend fragments back-to-front \n\ -vec4 resolveAlphaBlend(ivec2 coords, int num_frag){ \n\ +vec4 resolveAlphaBlend(ivec2 coords) { \n\ \n\ // Copy fragments in local array \n\ - fillFragmentArray(coords, num_frag); \n\ + int num_frag = fillFragmentArray(coords); \n\ + if (num_frag == 0) \n\ + discard; \n\ \n\ // Sort fragments in local memory array \n\ bubbleSort(num_frag); \n\ \n\ - vec4 finalColor = colorList[0]; \n\ + vec4 finalColor = pixel_list[0].color; \n\ for (int i = 1; i < num_frag; i++) { \n\ - vec4 srcColor = colorList[i]; \n\ + vec4 srcColor = pixel_list[i].color; \n\ float srcAlpha = srcColor.a; \n\ float dstAlpha = finalColor.a; \n\ \n\ - int srcBlend = int(depthBlendList[i].y) / 8; \n\ + int srcBlend = int(pixel_list[i].blend_stencil) / 256 / 8; \n\ switch (srcBlend) \n\ { \n\ case 0: // zero \n\ @@ -168,7 +122,7 @@ vec4 resolveAlphaBlend(ivec2 coords, int num_frag){ \n\ srcColor *= 1 - dstAlpha; \n\ break; \n\ } \n\ - int dstBlend = int(depthBlendList[i].y) % 8; \n\ + int dstBlend = (int(pixel_list[i].blend_stencil) / 256) % 8; \n\ switch (dstBlend) \n\ { \n\ case 0: // zero \n\ @@ -177,10 +131,10 @@ vec4 resolveAlphaBlend(ivec2 coords, int num_frag){ \n\ case 1: // one \n\ break; \n\ case 2: // other color \n\ - finalColor *= colorList[i]; \n\ + finalColor *= pixel_list[i].color; \n\ break; \n\ case 3: // inverse other color \n\ - finalColor *= vec4(1) - colorList[i]; \n\ + finalColor *= vec4(1) - pixel_list[i].color; \n\ break; \n\ case 4: // src alpha \n\ finalColor *= srcAlpha; \n\ @@ -197,7 +151,7 @@ vec4 resolveAlphaBlend(ivec2 coords, int num_frag){ \n\ } \n\ finalColor = clamp(finalColor + srcColor, 0, 1); \n\ } \n\ - finalColor.a = 1; \n\ + \n\ return finalColor; \n\ \n\ } \n\ @@ -205,54 +159,29 @@ vec4 resolveAlphaBlend(ivec2 coords, int num_frag){ \n\ void main(void) \n\ { \n\ ivec2 coords = ivec2(gl_FragCoord.xy); \n\ - int num_frag = int(imageLoad(abufferCounterImg, coords).r); \n\ - // Crash without this (WTF ?) \n\ - if (num_frag < 0) \n\ - num_frag = 0; \n\ - if (num_frag > ABUFFER_SIZE) \n\ - num_frag = ABUFFER_SIZE; \n\ - if (num_frag > 0) { \n\ - // Compute and output final color for the frame buffer \n\ - //If we only want the closest fragment \n\ - //int minIdx = resolveClosest(coords, num_frag); \n\ - //FragColor = imageLoad(abufferImg, ivec3(coords, minIdx)); \n\ - // Visualize the number of layers in use \n\ - //FragColor = vec4(float(num_frag) / ABUFFER_SIZE, 0, 0, 1); \n\ - //FragColor = imageLoad(abufferImg, ivec3(coords, 0)); \n\ - //FragColor = returnNthLayer(coords, num_frag, 1); \n\ - FragColor = resolveAlphaBlend(coords, num_frag); \n\ - } \n\ - else \n\ - // If no fragment, write nothing \n\ - discard; \n\ - \n\ + // Compute and output final color for the frame buffer \n\ + // Visualize the number of layers in use \n\ + //FragColor = vec4(float(fillFragmentArray(coords)) / MAX_PIXELS_PER_FRAGMENT, 0, 0, 1); \n\ + FragColor = resolveAlphaBlend(coords); \n\ } \n\ "; -static const char *clear_shader_source = "\ -#version 140 \n\ -#extension GL_EXT_shader_image_load_store : enable \n\ -coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ +static const char *clear_shader_source = SHADER_HEADER "\ \n\ void main(void) \n\ { \n\ ivec2 coords = ivec2(gl_FragCoord.xy); \n\ \n\ - // Reset counter \n\ - imageStore(abufferCounterImg, coords, uvec4(0)); \n\ + // Reset pointers \n\ + imageStore(abufferPointerImg, coords, uvec4(EOL)); \n\ \n\ - // Discard fragment so nothing is writen to the framebuffer \n\ + // Discard fragment so nothing is written to the framebuffer \n\ discard; \n\ } \n\ "; // Renders the opaque and pt rendered texture into a-buffers -static const char *pass2_shader_source = "\ -#version 140 \n\ -#extension GL_EXT_shader_image_load_store : enable \n\ -coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ -coherent uniform layout(size4x32) image2DArray abufferImg; \n\ -coherent uniform layout(size4x32) image2DArray abufferBlendingImg; \n\ +static const char *pass2_shader_source = SHADER_HEADER "\ uniform lowp vec2 screen_size; \n\ uniform sampler2D DepthTex; \n\ uniform sampler2D tex; \n\ @@ -261,46 +190,49 @@ void main(void) \n\ { \n\ ivec2 coords = ivec2(gl_FragCoord.xy); \n\ \n\ - int abidx = int(imageAtomicAdd(abufferCounterImg, coords, uint(1))); \n\ - ivec3 coords3 = ivec3(coords, abidx); \n\ + uint idx = atomicCounterIncrement(buffer_index); \n\ + if ((idx + 1u) * 32u - 1u >= ABUFFER_SIZE) { \n\ + discard; \n\ + return; \n\ + } \n\ + Pixel pixel; \n\ + pixel.color = texture(tex, gl_FragCoord.xy / screen_size); \n\ + pixel.depth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ + pixel.seq_num = 0; \n\ + pixel.blend_stencil = 0x800u; \n\ + pixel.next = imageAtomicExchange(abufferPointerImg, coords, idx); \n\ + pixels[idx] = pixel; \n\ \n\ - highp float depth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ - vec4 blend_val = vec4(depth, 8, 0, 0); \n\ - imageStore(abufferBlendingImg, coords3, blend_val); \n\ - vec4 color = texture(tex, gl_FragCoord.xy / screen_size); \n\ - imageStore(abufferImg, coords3, color); \n\ - \n\ - // Discard fragment so nothing is writen to the framebuffer \n\ + // Discard fragment so nothing is written to the framebuffer \n\ discard; \n\ } \n\ "; -static const char *tr_modvol_shader_source = "\ -#version 140 \n\ -#extension GL_EXT_shader_image_load_store : enable \n\ -coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ -coherent uniform layout(size4x32) image2DArray abufferBlendingImg; \n\ -uniform lowp vec2 screen_size; \n\ -uniform sampler2D DepthTex; \n\ - \n\ +static const char *tr_modvol_shader_source = SHADER_HEADER "\ +#define LAST_PASS %d \n\ void main(void) \n\ { \n\ +#if LAST_PASS == 0 \n\ + setFragDepth(); \n\ +#endif \n\ ivec2 coords = ivec2(gl_FragCoord.xy); \n\ - \n\ - int num_frag = int(imageLoad(abufferCounterImg, coords).r); \n\ - \n\ - highp float w = 100000.0 * gl_FragCoord.w; \n\ - highp float depth = 1 - log2(1.0 + w) / 34; \n\ - for (int i = 0; i < num_frag; i++) \n\ - { \n\ - vec4 pixel_info = imageLoad(abufferBlendingImg, ivec3(coords, i)); \n\ - highp float pixel_depth = info.x; \n\ - if (depth > pixel_depth) \n\ - continue; \n\ - // FIXME Need int or uint pixel format, not vec4 \n\ - imageAtomicXor(abufferBlendingImg, ivec3(coords, i), 1); \n\ + if (all(greaterThanEqual(coords, ivec2(0))) && all(lessThan(coords, imageSize(abufferPointerImg)))) { \n\ + \n\ + uint idx = imageLoad(abufferPointerImg, coords).x; \n\ + while (idx != EOL) { \n\ + if (pixels[idx].seq_num > 0) { \n\ +#if LAST_PASS == 0 \n\ + if (gl_FragDepth <= pixels[idx].depth) \n\ + atomicXor(pixels[idx].blend_stencil, 2u); \n\ +#else \n\ + if (mod(pixels[idx].blend_stencil, 256u) != 0u) \n\ + pixels[idx].color.a = 1.0; // FIXME \n\ +#endif \n\ + } \n\ + idx = pixels[idx].next; \n\ + } \n\ } \n\ - \n\ + \n\ discard; \n\ } \n\ "; @@ -310,39 +242,40 @@ void initABuffer() g_imageWidth = screen_width; g_imageHeight = screen_height; - if (abufferTexID == 0) - abufferTexID = glcache.GenTexture(); - glActiveTexture(GL_TEXTURE3); glCheck(); - glBindTexture(GL_TEXTURE_2D_ARRAY, abufferTexID); glCheck(); - glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glCheck(); - glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glCheck(); - glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA32F, g_imageWidth, g_imageHeight, ABUFFER_SIZE, 0, GL_RGBA, GL_FLOAT, 0); glCheck(); - glBindImageTexture(3, abufferTexID, 0, true, 0, GL_READ_WRITE, GL_RGBA32F); - glCheck(); - - if (abufferCounterTexID == 0) - abufferCounterTexID = glcache.GenTexture(); - glActiveTexture(GL_TEXTURE4); - glBindTexture(GL_TEXTURE_2D, abufferCounterTexID); - - // Set filter + if (pixels_pointers == 0) + pixels_pointers = glcache.GenTexture(); + glActiveTexture(GL_TEXTURE3); + glBindTexture(GL_TEXTURE_2D, pixels_pointers); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - - //Texture creation //Uses GL_R32F instead of GL_R32I that is not working in R257.15 - glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, g_imageWidth, g_imageHeight, 0, GL_RED, GL_FLOAT, 0); - glBindImageTexture(4, abufferCounterTexID, 0, false, 0, GL_READ_WRITE, GL_R32UI); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, g_imageWidth, g_imageHeight, 0, GL_RED, GL_FLOAT, 0); + glBindImageTexture(3, pixels_pointers, 0, false, 0, GL_READ_WRITE, GL_R32UI); glCheck(); - if (abufferBlendingTexID == 0) - abufferBlendingTexID = glcache.GenTexture(); - glActiveTexture(GL_TEXTURE5); - glBindTexture(GL_TEXTURE_2D_ARRAY, abufferBlendingTexID); - glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D_ARRAY, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage3D(GL_TEXTURE_2D_ARRAY, 0, GL_RGBA32F, g_imageWidth, g_imageHeight, ABUFFER_SIZE, 0, GL_RGBA, GL_FLOAT, 0); - glBindImageTexture(5, abufferBlendingTexID, 0, true, 0, GL_READ_WRITE, GL_RGBA32F); + if (pixels_buffer == 0 ) + { + // Create the buffer + glGenBuffers(1, &pixels_buffer); + // Bind it + glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer); + // Declare storage + glBufferData(GL_SHADER_STORAGE_BUFFER, ABUFFER_SIZE, NULL, GL_DYNAMIC_COPY); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer); + glCheck(); + } + + if (atomic_buffer == 0 ) + { + // Create the buffer + glGenBuffers(1, &atomic_buffer); + // Bind it + glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer); + // Declare storage + glBufferData(GL_ATOMIC_COUNTER_BUFFER, 4, NULL, GL_DYNAMIC_COPY); + glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomic_buffer); + glCheck(); + } if (g_abuffer_final_shader.program == 0) { @@ -360,22 +293,33 @@ void initABuffer() CompilePipelineShader(&g_abuffer_clear_shader, clear_shader_source); if (g_abuffer_pass2_shader.program == 0) CompilePipelineShader(&g_abuffer_pass2_shader, pass2_shader_source); + if (g_abuffer_tr_modvol_shader.program == 0) + { + char source[8192]; + sprintf(source, tr_modvol_shader_source, 0); + CompilePipelineShader(&g_abuffer_tr_modvol_shader, source); + } + if (g_abuffer_tr_modvol_final_shader.program == 0) + { + char source[8192]; + sprintf(source, tr_modvol_shader_source, 1); + CompilePipelineShader(&g_abuffer_tr_modvol_final_shader, source); + } glGenVertexArrays(1, &g_quadVertexArray); glGenBuffers(1, &g_quadBuffer); glCheck(); + } void reshapeABuffer(int w, int h) { if (w != g_imageWidth || h != g_imageHeight) { - glcache.DeleteTextures(1, &abufferTexID); - abufferTexID = 0; - glcache.DeleteTextures(1, &abufferCounterTexID); - abufferCounterTexID = 0; - glcache.DeleteTextures(1, &abufferBlendingTexID); - abufferBlendingTexID = 0; + glcache.DeleteTextures(1, &pixels_pointers); + pixels_pointers = 0; + + // FIXME We might need to resize the pixels_buffer accordingly initABuffer(); } @@ -420,11 +364,7 @@ void renderPass2(GLuint textureId, GLuint depthTexId) glActiveTexture(GL_TEXTURE1); glBindTexture(GL_TEXTURE_2D, depthTexId); glActiveTexture(GL_TEXTURE3); - glBindTexture(GL_TEXTURE_2D_ARRAY, abufferTexID); - glActiveTexture(GL_TEXTURE4); - glBindTexture(GL_TEXTURE_2D, abufferCounterTexID); - glActiveTexture(GL_TEXTURE5); - glBindTexture(GL_TEXTURE_2D_ARRAY, abufferBlendingTexID); + glBindTexture(GL_TEXTURE_2D, pixels_pointers); glActiveTexture(GL_TEXTURE0); glcache.UseProgram(g_abuffer_pass2_shader.program); @@ -433,21 +373,100 @@ void renderPass2(GLuint textureId, GLuint depthTexId) glcache.Disable(GL_BLEND); glcache.Disable(GL_DEPTH_TEST); glcache.Disable(GL_CULL_FACE); -glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + DrawQuad(); -glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); +} + +void DrawTranslucentModVols(int first, int count) +{ + if (count == 0 || pvrrc.modtrig.used() == 0) + return; + printf("Drawing %d translucent modvols %d triangles\n", count, pvrrc.modtrig.used()); + SetupModvolVBO(); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE3); + glBindTexture(GL_TEXTURE_2D, pixels_pointers); + glActiveTexture(GL_TEXTURE0); + + // Why do I need to rebind? + glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer); + glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer); + glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomic_buffer); + + glcache.Disable(GL_BLEND); + glcache.Disable(GL_DEPTH_TEST); + glcache.Disable(GL_STENCIL_TEST); + + glcache.UseProgram(g_abuffer_tr_modvol_final_shader.program); + ShaderUniforms.Set(&g_abuffer_tr_modvol_final_shader); + glcache.UseProgram(g_abuffer_tr_modvol_shader.program); + ShaderUniforms.Set(&g_abuffer_tr_modvol_shader); + glCheck(); + + u32 mod_base = 0; //cur start triangle + u32 mod_last = 0; //last merge + + u32 cmv_count = count - 1; + ISP_Modvol* params = &pvrrc.global_param_mvo_tr.head()[first]; + + //ISP_Modvol + for (u32 cmv = 0; cmv < 3 /* FIXME cmv_count */; cmv++) + { + + ISP_Modvol ispc = params[cmv]; + mod_base = ispc.id; + if (mod_last == 0) + // FIXME Will this work if no OP modvols are drawn? + mod_last = mod_base; + + u32 sz = params[cmv + 1].id - mod_base; + if (sz == 0) + continue; + + u32 mv_mode = ispc.DepthMode; + + verify(mod_base > 0 && mod_base + sz <= pvrrc.modtrig.used()); + + if (mv_mode == 0) //normal trigs + { + glcache.UseProgram(g_abuffer_tr_modvol_shader.program); glCheck(); + SetCull(ispc.CullMode); glCheck(); + glDrawArrays(GL_TRIANGLES, mod_base * 3, sz * 3); glCheck(); + } + else if (mv_mode < 3) + { + while(sz) + { + //merge and clear all the prev. stencil bits + + //Count Intersections (last poly) + glcache.UseProgram(g_abuffer_tr_modvol_shader.program); glCheck(); + SetCull(ispc.CullMode); glCheck(); + glDrawArrays(GL_TRIANGLES, mod_base * 3, 3); glCheck(); + + //Sum the area + glcache.UseProgram(g_abuffer_tr_modvol_final_shader.program); glCheck(); + + glDrawArrays(GL_TRIANGLES, mod_last * 3, (mod_base - mod_last + 1) * 3); glCheck(); + + //update pointers + mod_last = mod_base + 1; + sz--; + mod_base++; + } + } + } } void renderABuffer(bool sortFragments) { glActiveTexture(GL_TEXTURE3); - glBindTexture(GL_TEXTURE_2D_ARRAY, abufferTexID); - glCheck(); - glActiveTexture(GL_TEXTURE4); - glBindTexture(GL_TEXTURE_2D, abufferCounterTexID); - glCheck(); - glActiveTexture(GL_TEXTURE5); - glBindTexture(GL_TEXTURE_2D_ARRAY, abufferBlendingTexID); + glBindTexture(GL_TEXTURE_2D, pixels_pointers); glCheck(); glcache.UseProgram(sortFragments ? g_abuffer_final_shader.program : g_abuffer_final_nosort_shader.program); @@ -456,7 +475,7 @@ void renderABuffer(bool sortFragments) glcache.Disable(GL_BLEND); glcache.Disable(GL_DEPTH_TEST); glcache.Disable(GL_CULL_FACE); - glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_BUFFER_UPDATE_BARRIER_BIT); DrawQuad(); glCheck(); @@ -464,9 +483,20 @@ void renderABuffer(bool sortFragments) glcache.UseProgram(g_abuffer_clear_shader.program); ShaderUniforms.Set(&g_abuffer_clear_shader); - glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_BUFFER_UPDATE_BARRIER_BIT); + + GLuint size = 0; + glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, 4, &size); + //printf("ABUFFER %d pixels used\n", size); + if ((size + 1) * 32 - 1 >= ABUFFER_SIZE) + printf("ABUFFER OVERRUN %d pixels\n", size); + DrawQuad(); + glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer); + GLuint zero = 0; + glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0 , sizeof(GLuint), &zero); + glActiveTexture(GL_TEXTURE0); glCheck(); } diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 71ecb8576..f03b0899d 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -223,7 +223,6 @@ template ShaderUniforms.blend_mode[0] = gp->tsp.SrcInstr; ShaderUniforms.blend_mode[1] = gp->tsp.DstInstr; } - ShaderUniforms.Set(CurrentShader); SetTileClip(gp->tileclip,true); @@ -233,6 +232,9 @@ template glcache.StencilFunc(GL_ALWAYS,stencil,stencil); + ShaderUniforms.stencil = stencil; + ShaderUniforms.Set(CurrentShader); + if (CurrentShader->pp_Texture) { glcache.BindTexture(GL_TEXTURE_2D, gp->texid == -1 ? 0 : gp->texid); @@ -576,6 +578,7 @@ void DrawModVols(int first, int count) void renderABuffer(bool sortFragments); void renderPass2(GLuint textureId, GLuint depthTexId); +void DrawTranslucentModVols(int first, int count); void CreateGeometryTexture() { @@ -704,6 +707,9 @@ void DrawStrips() DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); glCheck(); +// glMemoryBarrier(GL_ALL_BARRIER_BITS); +// DrawTranslucentModVols(previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count); + // FIXME Depth of translucent poly must be used for next render pass if any // FIXME Multipass in general... diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index cbeab17fe..5a0a1eca8 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -173,10 +173,7 @@ lowp float fog_mode2(highp float invW) \n\ #endif -const char* PixelPipelineShader = -#ifndef GLES - "#version 140 \n" -#endif +const char* PixelPipelineShader = SHADER_HEADER "\ #define cp_AlphaTest %d \n\ #define pp_ClipTestMode %d \n\ @@ -194,6 +191,15 @@ const char* PixelPipelineShader = #endif \n" #endif "\ +#define ZERO 0u \n\ +#define ONE 1u \n\ +#define OTHER_COLOR 2u \n\ +#define INVERSE_OTHER_COLOR 3u \n\ +#define SRC_ALPHA 4u \n\ +#define INVERSE_SRC_ALPHA 5u \n\ +#define DST_ALPHA 6u \n\ +#define INVERSE_DST_ALPHA 7u \n\ + \n\ /* Shader program params*/ \n\ /* gles has no alpha test stage, so its emulated on the shader */ \n\ uniform lowp float cp_AlphaTestValue; \n\ @@ -205,20 +211,11 @@ uniform highp float shade_scale_factor; \n\ uniform lowp vec2 screen_size; \n\ uniform sampler2D tex,fog_table; \n\ uniform int pp_Number; \n\ -#if PASS == 1 \n\ - uniform usampler2D shadow_stencil; \n\ -#endif \n\ -#if PASS == 3 \n\ - uniform sampler2D DepthTex; \n\ -#endif \n\ -#if PASS > 1 \n\ - #extension GL_EXT_shader_image_load_store : enable \n\ - #define ABUFFER_SIZE " ABUFFER_SIZE_STR " \n\ - uniform uvec2 blend_mode; \n\ - coherent uniform layout(size1x32) uimage2D abufferCounterImg; \n\ - coherent uniform layout(size4x32) image2DArray abufferImg; \n\ - coherent uniform layout(size2x32) image2DArray abufferBlendingImg; \n\ -#endif \n\ +uniform usampler2D shadow_stencil; \n\ +uniform sampler2D DepthTex; \n\ +uniform uvec2 blend_mode; \n\ +uniform uint pp_Stencil; \n\ + \n\ /* Vertex input*/ \n\ " vary " lowp vec4 vtx_base; \n\ " vary " lowp vec4 vtx_offs; \n\ @@ -230,13 +227,9 @@ lowp float fog_mode2(highp float w) \n\ return clamp(sp_LOG_FOG_COEFS.y * log2(fog_idx) + sp_LOG_FOG_COEFS.x, 0.001, 1.0); //the clamp is required due to yet another bug !\n\ } \n\ void main() \n\ -{ \n" -#ifndef GLES - "\ - highp float w = 100000.0 * gl_FragCoord.w; \n\ - gl_FragDepth = 1 - log2(1.0 + w) / 34; \n" -#endif - "\ +{ \n\ + setFragDepth(); \n\ + \n\ #if PASS == 3 \n\ // Manual depth testing \n\ highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ @@ -309,7 +302,7 @@ void main() \n\ #if PASS == 1 \n\ //uvec4 stencil = texture(shadow_stencil, vec2(gl_FragCoord.x / 1280, gl_FragCoord.y / 960)); \n\ uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / screen_size); \n\ - if (stencil.r == uint(0x81)) \n\ + if (stencil.r == 0x81u) \n\ color.rgb *= shade_scale_factor; \n\ #endif\n\ #if pp_FogCtrl==0 // LUT \n\ @@ -326,39 +319,87 @@ void main() \n\ #if PASS == 1 \n" FRAGCOL " = color; \n\ #elif PASS > 1 \n\ - ivec2 coords = ivec2(gl_FragCoord.xy); \n\ - int abidx = int(imageAtomicAdd(abufferCounterImg, coords, uint(1))); \n\ - if (abidx >= ABUFFER_SIZE) { \n\ - // Green pixels when overflow \n\ -// vec4 blend_val = vec4(0.001, 8, float(pp_Number), 0); \n\ -// ivec3 coords3 = ivec3(coords, 0); \n\ -// imageStore(abufferImg, coords3, vec4(0, 1, 0, 1)); \n\ -// imageStore(abufferBlendingImg, coords3, blend_val); \n\ - } else { \n\ - vec4 blend_val = vec4(gl_FragDepth, float(blend_mode.x) * 8 + float(blend_mode.y), float(pp_Number), 0); \n\ - ivec3 coords3 = ivec3(coords, abidx); \n\ - imageStore(abufferImg, coords3, color); \n\ - imageStore(abufferBlendingImg, coords3, blend_val); \n\ + // Discard as many pixels as possible \n\ + bool ignore = false; \n\ + switch (blend_mode.y) // DST \n\ + { \n\ + case ONE: \n\ + switch (blend_mode.x) \n\ + { \n\ + case ZERO: \n\ + ignore = true; \n\ + break; \n\ + case ONE: \n\ + case OTHER_COLOR: \n\ + case INVERSE_OTHER_COLOR: \n\ + ignore = color.r == 0.0 && color.g == 0.0 && color.b == 0.0 && color.a == 0.0; \n\ + break; \n\ + case SRC_ALPHA: \n\ + ignore = (color.r == 0.0 && color.g == 0.0 && color.b == 0.0) || color.a == 0.0; \n\ + break; \n\ + case INVERSE_SRC_ALPHA: \n\ + ignore = (color.r == 0.0 && color.g == 0.0 && color.b == 0.0) || color.a == 1.0; \n\ + break; \n\ + } \n\ + break; \n\ + case OTHER_COLOR: \n\ + if (blend_mode.x == ZERO && color.r == 1.0 && color.g == 1.0 && color.b == 1.0 && color.a == 1.0) \n\ + ignore = true; \n\ + break; \n\ + case INVERSE_OTHER_COLOR: \n\ + if (blend_mode.x <= SRC_ALPHA && color.r == 0.0 && color.g == 0.0 && color.b == 0.0 && color.a == 0.0) \n\ + ignore = true; \n\ + break; \n\ + case SRC_ALPHA: \n\ + if ((blend_mode.x == ZERO || blend_mode.x == INVERSE_SRC_ALPHA) && color.a == 1.0) \n\ + ignore = true; \n\ + break; \n\ + case INVERSE_SRC_ALPHA: \n\ + switch (blend_mode.x) // SRC \n\ + { \n\ + case ZERO: \n\ + case SRC_ALPHA: \n\ + ignore = color.a == 0.0; \n\ + break; \n\ + case ONE: \n\ + case OTHER_COLOR: \n\ + case INVERSE_OTHER_COLOR: \n\ + ignore = color.r == 0.0 && color.g == 0.0 && color.b == 0.0 && color.a == 0.0; \n\ + break; \n\ + } \n\ + break; \n\ + } \n\ + \n\ + \n\ + \n\ + if (!ignore) \n\ + { \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + uint idx = atomicCounterIncrement(buffer_index); \n\ + if ((idx + 1u) * 32u - 1u >= ABUFFER_SIZE) { \n\ + discard; \n\ + return; \n\ + } \n\ + Pixel pixel; \n\ + pixel.color = color; \n\ + pixel.depth = gl_FragDepth; \n\ + pixel.seq_num = pp_Number; \n\ + pixel.blend_stencil = (blend_mode.x * 8u + blend_mode.y) * 256u + pp_Stencil; \n\ + pixel.next = imageAtomicExchange(abufferPointerImg, coords, idx); \n\ + pixels[idx] = pixel; \n\ } \n\ discard; \n\ \n\ #endif \n\ }"; -const char* ModifierVolumeShader = -#ifndef GLES - "#version 140 \n" -#endif +const char* ModifierVolumeShader = SHADER_HEADER " \ /* Vertex input*/ \n\ void main() \n\ -{ \n" -#ifndef GLES - "\ - highp float w = 100000.0 * gl_FragCoord.w; \n\ - gl_FragDepth = 1 - log2(1.0 + w) / 34; \n" -#endif - "\ +{ \n\ + setFragDepth(); \n\ + \n\ }"; const char* OSD_Shader = @@ -847,18 +888,13 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe glUniform1i(gu, 2); // GL_TEXTURE2 // A-buffers - gu = glGetUniformLocation(s->program, "abufferImg"); - if (gu != -1) - glUniform1i(gu, 3); // GL_TEXTURE3 - gu = glGetUniformLocation(s->program, "abufferCounterImg"); - if (gu != -1) - glUniform1i(gu, 4); // GL_TEXTURE4 - gu = glGetUniformLocation(s->program, "abufferBlendingImg"); - if (gu != -1) - glUniform1i(gu, 5); // GL_TEXTURE5 +// gu = glGetUniformLocation(s->program, "abufferPointerImg"); +// if (gu != -1) +// glUniform1i(gu, 3); // GL_TEXTURE3 s->blend_mode = glGetUniformLocation(s->program, "blend_mode"); s->pp_Number = glGetUniformLocation(s->program, "pp_Number"); + s->pp_Stencil = glGetUniformLocation(s->program, "pp_Stencil"); return glIsProgram(s->program)==GL_TRUE; } @@ -1411,7 +1447,10 @@ bool ProcessFrame(TA_context* ctx) CollectCleanup(); - return true; + if (ctx->rend.Overrun) + printf("TA context overrun\n"); + + return !ctx->rend.Overrun; } bool RenderFrame() @@ -1840,7 +1879,6 @@ struct glesrend : Renderer bool Init() { return gles_init(); } void Resize(int w, int h) { - // FIXME Not called :( screen_width=w; screen_height=h; if (stencilTexId != 0) @@ -1865,7 +1903,7 @@ struct glesrend : Renderer bool Process(TA_context* ctx) { return ProcessFrame(ctx); } bool Render() { return RenderFrame(); } - void Present() { gl_swap(); glViewport(0, 0, screen_width, screen_height); } + void Present() { gl_swap(); } void DrawOSD() { OSD_DRAW(); } diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index c4bc4cb05..5238e421f 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -59,6 +59,7 @@ struct PipelineShader GLuint screen_size; GLuint blend_mode; GLuint pp_Number; + GLuint pp_Stencil; // u32 cp_AlphaTest; s32 pp_ClipTestMode; @@ -135,6 +136,7 @@ void ReadRTTBuffer(); int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, u32 pp_FogCtrl, int pass); +void SetCull(u32 CulliMode); struct ShaderUniforms_t { @@ -147,6 +149,7 @@ struct ShaderUniforms_t float fog_coefs[2]; GLuint blend_mode[2]; int poly_number; + u32 stencil; void Set(PipelineShader* s) { @@ -182,6 +185,9 @@ struct ShaderUniforms_t if (s->pp_Number != -1) glUniform1i(s->pp_Number, poly_number); + + if (s->pp_Stencil != -1) + glUniform1ui(s->pp_Stencil, stencil); } }; @@ -197,5 +203,37 @@ extern GLuint depthTexId; extern GLuint opaqueTexId; // Must match! -#define ABUFFER_SIZE 32 -#define ABUFFER_SIZE_STR "32" +// in bytes +#define ABUFFER_SIZE 256*1024*1024 +#define ABUFFER_SIZE_STR "(256u * 1024u * 1024u)" + +#define SHADER_HEADER "#version 140 \n\ +#extension GL_EXT_shader_image_load_store : enable \n\ +#extension GL_ARB_shader_storage_buffer_object : enable \n\ +#extension GL_ARB_shader_atomic_counters : enable \n\ +#extension GL_ARB_shader_image_size : enable \n\ +#extension GL_ARB_shading_language_420pack : enable \n\ +\n\ +#define ABUFFER_SIZE " ABUFFER_SIZE_STR " \n\ +coherent uniform layout(size1x32, binding = 3) uimage2D abufferPointerImg; \n\ +struct Pixel { \n\ + mediump vec4 color; \n\ + mediump float depth; \n\ + int seq_num; \n\ + uint blend_stencil; \n\ + uint next; \n\ +}; \n\ +#define EOL 0xFFFFFFFFu \n\ +layout (binding = 0) buffer PixelBuffer { \n\ + Pixel pixels[]; \n\ +}; \n\ +layout(binding = 0, offset = 0) uniform atomic_uint buffer_index; \n\ +\n\ +void setFragDepth(void) \n\ +{ \n\ + highp float w = 100000.0 * gl_FragCoord.w; \n\ + gl_FragDepth = 1 - log2(1.0 + w) / 34; \n\ +} \n\ +" + +void SetupModvolVBO(); From 0bb28b2e641997ee191c1a187f590801bf5691ec Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 29 May 2018 18:33:59 +0200 Subject: [PATCH 11/65] Fully parse two-volume mode polygons. --- core/hw/pvr/Renderer_if.cpp | 17 +++++++--- core/hw/pvr/ta_ctx.h | 11 ++++++- core/hw/pvr/ta_vtx.cpp | 64 ++++++++++++++++++++++++++++++++++++- 3 files changed, 86 insertions(+), 6 deletions(-) diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index 7186997c0..a8f9cc241 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -95,7 +95,7 @@ void dump_frame(const char* file, TA_context* ctx, u8* vram, u8* vram_ref = NULL u32 bytes = ctx->tad.End() - ctx->tad.thd_root; - fwrite("TAFRAME3", 1, 8, fw); + fwrite("TAFRAME4", 1, 8, fw); fwrite(&ctx->rend.isRTT, 1, sizeof(ctx->rend.isRTT), fw); fwrite(&ctx->rend.isAutoSort, 1, sizeof(ctx->rend.isAutoSort), fw); @@ -159,10 +159,17 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) { fread(id0, 1, 8, fw); - if (memcmp(id0, "TAFRAME3", 8) != 0) { + if (memcmp(id0, "TAFRAME", 7) != 0 || (id0[7] != '3' && id0[7] != '4')) { fclose(fw); return 0; } + int sizeofPolyParam = sizeof(PolyParam); + int sizeofVertex = sizeof(Vertex); + if (id0[8] != '3') + { + sizeofPolyParam -= 12; + sizeofVertex -= 16; + } TA_context* ctx = tactx_Alloc(); @@ -175,8 +182,10 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) { fread(&ctx->rend.fb_X_CLIP.full, 1, sizeof(ctx->rend.fb_X_CLIP.full), fw); fread(&ctx->rend.fb_Y_CLIP.full, 1, sizeof(ctx->rend.fb_Y_CLIP.full), fw); - fread(ctx->rend.global_param_op.Append(), 1, sizeof(PolyParam), fw); - fread(ctx->rend.verts.Append(4), 1, 4 * sizeof(Vertex), fw); + fread(ctx->rend.global_param_op.Append(), 1, sizeofPolyParam, fw); + Vertex *vtx = ctx->rend.verts.Append(4); + for (int i = 0; i < 4; i++) + fread(vtx + i, 1, sizeofVertex, fw); fread(&t, 1, sizeof(t), fw); verify(t == VRAM_SIZE); diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index 4ec370fdb..d0e5f0e77 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -17,6 +17,12 @@ struct Vertex u8 spc[4]; float u,v; + + // Two volumes format + u8 col1[4]; + u8 spc1[4]; + + float u1,v1; }; struct PolyParam @@ -35,6 +41,9 @@ struct PolyParam float zvZ; u32 tileclip; //float zMin,zMax; + TSP tsp1; + TCW tcw1; + u32 texid1; }; struct ModParam @@ -185,7 +194,7 @@ struct TA_context { tad.Reset((u8*)OS_aligned_malloc(32, 2*1024*1024)); - rend.verts.InitBytes(1024*1024,&rend.Overrun); //up to 1 mb of vtx data/frame = ~ 38k vtx/frame + rend.verts.InitBytes(2*1024*1024,&rend.Overrun); //up to 2 mb of vtx data/frame = ~ 48k vtx/frame rend.idx.Init(60*1024,&rend.Overrun); //up to 60K indexes ( idx have stripification overhead ) rend.global_param_op.Init(4096,&rend.Overrun); rend.global_param_pt.Init(4096,&rend.Overrun); diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index 8d57993d1..211fd9169 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -85,6 +85,8 @@ List* CurrentPPlist; //TA state vars DECL_ALIGN(4) static u8 FaceBaseColor[4]; DECL_ALIGN(4) static u8 FaceOffsColor[4]; +DECL_ALIGN(4) static u8 FaceBaseColor1[4]; +DECL_ALIGN(4) static u8 FaceOffsColor1[4]; DECL_ALIGN(4) static u32 SFaceBaseColor; DECL_ALIGN(4) static u32 SFaceOffsColor; @@ -817,6 +819,9 @@ public: if (d_pp->pcw.Texture) { d_pp->texid = renderer->GetTexture(d_pp->tsp,d_pp->tcw); } + d_pp->tsp1.full = -1; + d_pp->tcw1.full = -1; + d_pp->texid1 = -1; } } @@ -869,6 +874,11 @@ public: TA_PolyParam3* pp=(TA_PolyParam3*)vpp; glob_param_bdc(pp); + + CurrentPP->tsp1.full = pp->tsp1.full; + CurrentPP->tcw1.full = pp->tcw1.full; + if (pp->pcw.Texture) + CurrentPP->texid1 = renderer->GetTexture(pp->tsp1, pp->tcw1); } __forceinline static void TACALL AppendPolyParam4A(void* vpp) @@ -876,13 +886,19 @@ public: TA_PolyParam4A* pp=(TA_PolyParam4A*)vpp; glob_param_bdc(pp); + + CurrentPP->tsp1.full = pp->tsp1.full; + CurrentPP->tcw1.full = pp->tcw1.full; + if (pp->pcw.Texture) + CurrentPP->texid1 = renderer->GetTexture(pp->tsp1, pp->tcw1); } __forceinline static void TACALL AppendPolyParam4B(void* vpp) { TA_PolyParam4B* pp=(TA_PolyParam4B*)vpp; - poly_float_color(FaceBaseColor,FaceColor0); + poly_float_color(FaceBaseColor, FaceColor0); + poly_float_color(FaceBaseColor1, FaceColor1); } //Poly Strip handling @@ -950,6 +966,14 @@ public: cv->u = f16(vtx->u_name);\ cv->v = f16(vtx->v_name); + #define vert_uv1_32(u_name,v_name) \ + cv->u1 = (vtx->u_name);\ + cv->v1 = (vtx->v_name); + + #define vert_uv1_16(u_name,v_name) \ + cv->u1 = f16(vtx->u_name);\ + cv->v1 = f16(vtx->v_name); + //Color conversions #define vert_packed_color_(to,src) \ { \ @@ -993,6 +1017,20 @@ public: cv->spc[2] = FaceOffsColor[2]*satint/256; \ cv->spc[3] = FaceOffsColor[3]; } + #define vert_face_base_color1(baseint) \ + { u32 satint=float_to_satu8(vtx->baseint); \ + cv->col1[0] = FaceBaseColor1[0]*satint/256; \ + cv->col1[1] = FaceBaseColor1[1]*satint/256; \ + cv->col1[2] = FaceBaseColor1[2]*satint/256; \ + cv->col1[3] = FaceBaseColor1[3]; } + + #define vert_face_offs_color1(offsint) \ + { u32 satint=float_to_satu8(vtx->offsint); \ + cv->spc1[0] = FaceOffsColor1[0]*satint/256; \ + cv->spc1[1] = FaceOffsColor1[1]*satint/256; \ + cv->spc1[2] = FaceOffsColor1[2]*satint/256; \ + cv->spc1[3] = FaceOffsColor1[3]; } + //vert_float_color_(cv->spc,FaceOffsColor[3],FaceOffsColor[0]*satint/256,FaceOffsColor[1]*satint/256,FaceOffsColor[2]*satint/256); } @@ -1118,6 +1156,7 @@ public: vert_cvt_base; vert_packed_color(col,BaseCol0); + vert_packed_color(col1, BaseCol1); } //(Non-Textured, Intensity, with Two Volumes) @@ -1127,6 +1166,7 @@ public: vert_cvt_base; vert_face_base_color(BaseInt0); + vert_face_base_color1(BaseInt1); } //(Textured, Packed Color, with Two Volumes) @@ -1145,6 +1185,10 @@ public: { vert_res_base; + vert_packed_color(col1, BaseCol1); + vert_packed_color(spc1, OffsCol1); + + vert_uv1_32(u1, v1); } //(Textured, Packed Color, 16bit UV, with Two Volumes) @@ -1163,6 +1207,10 @@ public: { vert_res_base; + vert_packed_color(col1, BaseCol1); + vert_packed_color(spc1, OffsCol1); + + vert_uv1_16(u1, v1); } //(Textured, Intensity, with Two Volumes) @@ -1181,6 +1229,10 @@ public: { vert_res_base; + vert_face_base_color1(BaseInt1); + vert_face_offs_color1(OffsInt1); + + vert_uv1_32(u1,v1); } //(Textured, Intensity, 16bit UV, with Two Volumes) @@ -1199,6 +1251,10 @@ public: { vert_res_base; + vert_face_base_color1(BaseInt1); + vert_face_offs_color1(OffsInt1); + + vert_uv1_16(u1, v1); } //Sprites @@ -1226,6 +1282,9 @@ public: if (d_pp->pcw.Texture) { d_pp->texid = renderer->GetTexture(d_pp->tsp,d_pp->tcw); } + d_pp->tcw1.full = -1; + d_pp->tsp1.full = -1; + d_pp->texid1 = -1; SFaceBaseColor=spr->BaseCol; SFaceOffsColor=spr->OffsCol; @@ -1616,6 +1675,9 @@ void FillBGP(TA_context* ctx) bgpp->isp.full=vri(strip_base); bgpp->tsp.full=vri(strip_base+4); bgpp->tcw.full=vri(strip_base+8); + bgpp->tcw1.full = -1; + bgpp->tsp1.full = -1; + bgpp->texid1 = -1; bgpp->count=4; bgpp->first=0; bgpp->tileclip=0;//disabled ! HA ~ From 4bca94cf862e05797000a8ad8c81095ea5efa023 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 29 May 2018 18:42:21 +0200 Subject: [PATCH 12/65] Removed pass 2: OP and TR pixels are not added to a-buffers. Discard TR pixels as much as possible in pass 3 based on blending mode and color/alpha values. Removed depth and stencil buffer from RTT FBO as they are no longer needed. --- core/rend/gles/abuffer.cpp | 43 +++++++++++++-------------- core/rend/gles/gldraw.cpp | 11 ++++++- core/rend/gles/gles.cpp | 60 +++++++++++++++++++++++++++++++++++--- core/rend/gles/gltex.cpp | 35 ++++------------------ 4 files changed, 92 insertions(+), 57 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 5a1fe0b6f..e7c5c805f 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -26,6 +26,10 @@ static int g_imageHeight = 0; static const char *final_shader_source = SHADER_HEADER "\ #define DEPTH_SORTED %d \n\ #define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT " \n\ + \n\ +layout(binding = 0) uniform sampler2D tex; \n\ +uniform lowp vec2 screen_size; \n\ + \n\ out vec4 FragColor; \n\ \n\ Pixel pixel_list[MAX_PIXELS_PER_FRAGMENT]; \n\ @@ -83,14 +87,12 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ \n\ // Copy fragments in local array \n\ int num_frag = fillFragmentArray(coords); \n\ - if (num_frag == 0) \n\ - discard; \n\ \n\ // Sort fragments in local memory array \n\ bubbleSort(num_frag); \n\ \n\ - vec4 finalColor = pixel_list[0].color; \n\ - for (int i = 1; i < num_frag; i++) { \n\ + vec4 finalColor = texture(tex, gl_FragCoord.xy / screen_size); \n\ + for (int i = 0; i < num_frag; i++) { \n\ vec4 srcColor = pixel_list[i].color; \n\ float srcAlpha = srcColor.a; \n\ float dstAlpha = finalColor.a; \n\ @@ -191,10 +193,8 @@ void main(void) \n\ ivec2 coords = ivec2(gl_FragCoord.xy); \n\ \n\ uint idx = atomicCounterIncrement(buffer_index); \n\ - if ((idx + 1u) * 32u - 1u >= ABUFFER_SIZE) { \n\ + if ((idx + 1u) * 32u - 1u >= ABUFFER_SIZE) \n\ discard; \n\ - return; \n\ - } \n\ Pixel pixel; \n\ pixel.color = texture(tex, gl_FragCoord.xy / screen_size); \n\ pixel.depth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ @@ -219,11 +219,13 @@ void main(void) \n\ if (all(greaterThanEqual(coords, ivec2(0))) && all(lessThan(coords, imageSize(abufferPointerImg)))) { \n\ \n\ uint idx = imageLoad(abufferPointerImg, coords).x; \n\ + if ((idx + 1u) * 32u - 1u >= ABUFFER_SIZE) \n\ + discard; \n\ while (idx != EOL) { \n\ if (pixels[idx].seq_num > 0) { \n\ #if LAST_PASS == 0 \n\ - if (gl_FragDepth <= pixels[idx].depth) \n\ - atomicXor(pixels[idx].blend_stencil, 2u); \n\ +// if (gl_FragDepth <= pixels[idx].depth) \n\ +// atomicXor(pixels[idx].blend_stencil, 2u); \n\ #else \n\ if (mod(pixels[idx].blend_stencil, 256u) != 0u) \n\ pixels[idx].color.a = 1.0; // FIXME \n\ @@ -306,8 +308,10 @@ void initABuffer() CompilePipelineShader(&g_abuffer_tr_modvol_final_shader, source); } - glGenVertexArrays(1, &g_quadVertexArray); - glGenBuffers(1, &g_quadBuffer); + if (g_quadVertexArray == 0) + glGenVertexArrays(1, &g_quadVertexArray); + if (g_quadBuffer == 0) + glGenBuffers(1, &g_quadBuffer); glCheck(); @@ -391,12 +395,7 @@ void DrawTranslucentModVols(int first, int count) glActiveTexture(GL_TEXTURE3); glBindTexture(GL_TEXTURE_2D, pixels_pointers); glActiveTexture(GL_TEXTURE0); - - // Why do I need to rebind? - glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer); - glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer); - glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomic_buffer); + glBindTexture(GL_TEXTURE_2D, 0); glcache.Disable(GL_BLEND); glcache.Disable(GL_DEPTH_TEST); @@ -485,11 +484,11 @@ void renderABuffer(bool sortFragments) glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_BUFFER_UPDATE_BARRIER_BIT); - GLuint size = 0; - glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, 4, &size); - //printf("ABUFFER %d pixels used\n", size); - if ((size + 1) * 32 - 1 >= ABUFFER_SIZE) - printf("ABUFFER OVERRUN %d pixels\n", size); +// GLuint size = 0; +// glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, 4, &size); +// printf("ABUFFER %d pixels used\n", size); +// if ((size + 1) * 32 - 1 >= ABUFFER_SIZE) +// printf("ABUFFER OVERRUN %d pixels\n", size); DrawQuad(); diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index f03b0899d..ce9c7e178 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -635,6 +635,7 @@ void DrawStrips() CreateGeometryTexture(); } glcache.ClearColor(0, 0, 0, 0); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glcache.Disable(GL_SCISSOR_TEST); glcache.DepthMask(GL_TRUE); glStencilMask(0xFF); @@ -696,12 +697,17 @@ void DrawStrips() glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - renderPass2(opaqueTexId, depthTexId); +// renderPass2(opaqueTexId, depthTexId); // // PASS 3: Render TR to a-buffers // SetupMainVBO(); + glcache.Disable(GL_DEPTH_TEST); + + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, depthTexId); + glActiveTexture(GL_TEXTURE0); //Alpha blended DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); @@ -722,6 +728,9 @@ void DrawStrips() glBindFramebuffer(GL_FRAMEBUFFER, output_fbo); glCheck(); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, opaqueTexId); + renderABuffer(pvrrc.isAutoSort); SetupMainVBO(); } diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 5a0a1eca8..5082f083f 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -965,6 +965,54 @@ bool gl_create_resources(); //setup extern void initABuffer(); +void gl_DebugOutput(GLenum source, + GLenum type, + GLuint id, + GLenum severity, + GLsizei length, + const GLchar *message, + void *userParam) +{ + // ignore non-significant error/warning codes + if(id == 131169 || id == 131185 || id == 131218 || id == 131204) return; + + printf("OpenGL Debug message (%d): %s\n", id, message); + + switch (source) + { + case GL_DEBUG_SOURCE_API: printf("Source: API"); break; + case GL_DEBUG_SOURCE_WINDOW_SYSTEM: printf("Source: Window System"); break; + case GL_DEBUG_SOURCE_SHADER_COMPILER: printf("Source: Shader Compiler"); break; + case GL_DEBUG_SOURCE_THIRD_PARTY: printf("Source: Third Party"); break; + case GL_DEBUG_SOURCE_APPLICATION: printf("Source: Application"); break; + case GL_DEBUG_SOURCE_OTHER: printf("Source: Other"); break; + } + printf(" "); + + switch (type) + { + case GL_DEBUG_TYPE_ERROR: printf("Type: Error"); break; + case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR: printf("Type: Deprecated Behaviour"); break; + case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR: printf("Type: Undefined Behaviour"); break; + case GL_DEBUG_TYPE_PORTABILITY: printf("Type: Portability"); break; + case GL_DEBUG_TYPE_PERFORMANCE: printf("Type: Performance"); break; + case GL_DEBUG_TYPE_MARKER: printf("Type: Marker"); break; + case GL_DEBUG_TYPE_PUSH_GROUP: printf("Type: Push Group"); break; + case GL_DEBUG_TYPE_POP_GROUP: printf("Type: Pop Group"); break; + case GL_DEBUG_TYPE_OTHER: printf("Type: Other"); break; + } + printf(" "); + + switch (severity) + { + case GL_DEBUG_SEVERITY_HIGH: printf("Severity: high"); break; + case GL_DEBUG_SEVERITY_MEDIUM: printf("Severity: medium"); break; + case GL_DEBUG_SEVERITY_LOW: printf("Severity: low"); break; + case GL_DEBUG_SEVERITY_NOTIFICATION: printf("Severity: notification"); break; + }; + printf("\n"); +} + bool gles_init() { @@ -983,6 +1031,12 @@ bool gles_init() #endif #endif +// glEnable(GL_DEBUG_OUTPUT); +// glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); +// glDebugMessageCallback(gl_DebugOutput, NULL); +// glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE); + + //clean up the buffer glcache.ClearColor(0.f, 0.f, 0.f, 0.f); glClear(GL_COLOR_BUFFER_BIT); @@ -1769,10 +1823,8 @@ bool RenderFrame() glcache.ClearColor(0,0,0,1.0f); glcache.Disable(GL_SCISSOR_TEST); - - glcache.DepthMask(GL_TRUE); - glStencilMask(0xFF); glCheck(); - glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glCheck(); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glClear(GL_COLOR_BUFFER_BIT); glCheck(); //move vertex to gpu diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index 2458277a5..fd50f1a26 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -380,7 +380,6 @@ TextureCacheData *getTextureCacheData(TSP tsp, TCW tcw); struct FBT { u32 TexAddr; - GLuint depthb,stencilb; GLuint tex; GLuint fbo; }; @@ -393,8 +392,6 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) if (rv.fbo) glDeleteFramebuffers(1,&rv.fbo); if (rv.tex) glcache.DeleteTextures(1,&rv.tex); - if (rv.depthb) glDeleteRenderbuffers(1,&rv.depthb); - if (rv.stencilb) glDeleteRenderbuffers(1,&rv.stencilb); rv.TexAddr=addy>>3; @@ -409,26 +406,6 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) // Get the currently bound frame buffer object. On most platforms this just gives 0. //glGetIntegerv(GL_FRAMEBUFFER_BINDING, &m_i32OriginalFbo); - // Generate and bind a render buffer which will become a depth buffer shared between our two FBOs - glGenRenderbuffers(1, &rv.depthb); - glBindRenderbuffer(GL_RENDERBUFFER, rv.depthb); - - /* - Currently it is unknown to GL that we want our new render buffer to be a depth buffer. - glRenderbufferStorage will fix this and in this case will allocate a depth buffer - m_i32TexSize by m_i32TexSize. - */ - -#ifdef GLES - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24_OES, fbw2, fbh2); -#else - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24, fbw2, fbh2); -#endif - - glGenRenderbuffers(1, &rv.stencilb); - glBindRenderbuffer(GL_RENDERBUFFER, rv.stencilb); - glRenderbufferStorage(GL_RENDERBUFFER, GL_STENCIL_INDEX8, fbw2, fbh2); - // Create a texture for rendering to rv.tex = glcache.GenTexture(); glcache.BindTexture(GL_TEXTURE_2D, rv.tex); @@ -442,9 +419,6 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) // Attach the texture to the FBO glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rv.tex, 0); - // Attach the depth buffer we created earlier to our FBO. - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rv.depthb); - // Check that our FBO creation was successful GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); @@ -589,10 +563,11 @@ void ReadRTTBuffer() { } fb_rtt.tex = 0; - if (fb_rtt.fbo) { glDeleteFramebuffers(1,&fb_rtt.fbo); fb_rtt.fbo = 0; } - if (fb_rtt.depthb) { glDeleteRenderbuffers(1,&fb_rtt.depthb); fb_rtt.depthb = 0; } - if (fb_rtt.stencilb) { glDeleteRenderbuffers(1,&fb_rtt.stencilb); fb_rtt.stencilb = 0; } - + if (fb_rtt.fbo) + { + glDeleteFramebuffers(1,&fb_rtt.fbo); + fb_rtt.fbo = 0; + } } static int TexCacheLookups; From 74a28f08fcb55fce5a6126663a50d3817bbf9386 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 29 May 2018 19:52:53 +0200 Subject: [PATCH 13/65] read_frame fix --- core/hw/pvr/Renderer_if.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index a8f9cc241..daf9fdfbc 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -165,7 +165,7 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) { } int sizeofPolyParam = sizeof(PolyParam); int sizeofVertex = sizeof(Vertex); - if (id0[8] != '3') + if (id0[8] == '3') { sizeofPolyParam -= 12; sizeofVertex -= 16; From f3b831195558c292637cc433573deafd13900b72 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 30 May 2018 15:17:24 +0200 Subject: [PATCH 14/65] read_frame fix (again) --- core/hw/pvr/Renderer_if.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index daf9fdfbc..ec9edd643 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -165,7 +165,7 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) { } int sizeofPolyParam = sizeof(PolyParam); int sizeofVertex = sizeof(Vertex); - if (id0[8] == '3') + if (id0[7] == '3') { sizeofPolyParam -= 12; sizeofVertex -= 16; From 901940634f6d7c676c961e6c38740e89a180bfbb Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 30 May 2018 16:29:32 +0200 Subject: [PATCH 15/65] Create a PolyParam for each TR polygon strip so that we have a correct seq_num -> needed for sorting. --- core/hw/pvr/ta.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/hw/pvr/ta.h b/core/hw/pvr/ta.h index bbbd8e17b..2347dbf62 100644 --- a/core/hw/pvr/ta.h +++ b/core/hw/pvr/ta.h @@ -22,7 +22,7 @@ void ta_vtx_data(u32* data, u32 size); bool ta_parse_vdrc(TA_context* ctx); -#define STRIPS_AS_PPARAMS 0 +#define STRIPS_AS_PPARAMS 1 #define TRIG_SORT 0 From ef7cf6c0e5b39a6d7e39f42f45b1227a554762b2 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 30 May 2018 16:30:07 +0200 Subject: [PATCH 16/65] WIP two-volume mode support --- core/rend/gles/abuffer.cpp | 23 +++--- core/rend/gles/gldraw.cpp | 94 +++++++++++++++------- core/rend/gles/gles.cpp | 159 ++++++++++++++++++++++++++----------- core/rend/gles/gles.h | 51 ++++++++++-- 4 files changed, 230 insertions(+), 97 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index e7c5c805f..7399b1d16 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -246,13 +246,13 @@ void initABuffer() if (pixels_pointers == 0) pixels_pointers = glcache.GenTexture(); - glActiveTexture(GL_TEXTURE3); + glActiveTexture(GL_TEXTURE4); glBindTexture(GL_TEXTURE_2D, pixels_pointers); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); //Uses GL_R32F instead of GL_R32I that is not working in R257.15 glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, g_imageWidth, g_imageHeight, 0, GL_RED, GL_FLOAT, 0); - glBindImageTexture(3, pixels_pointers, 0, false, 0, GL_READ_WRITE, GL_R32UI); + glBindImageTexture(4, pixels_pointers, 0, false, 0, GL_READ_WRITE, GL_R32UI); glCheck(); if (pixels_buffer == 0 ) @@ -358,18 +358,19 @@ void DrawQuad() glEnableVertexAttribArray(VERTEX_UV_ARRAY); glCheck(); glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u)); glCheck(); + glDisableVertexAttribArray(VERTEX_UV1_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_OFFS1_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_BASE1_ARRAY); + glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, indices); glCheck(); } void renderPass2(GLuint textureId, GLuint depthTexId) { + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, depthTexId); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, textureId); - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_2D, depthTexId); - glActiveTexture(GL_TEXTURE3); - glBindTexture(GL_TEXTURE_2D, pixels_pointers); - glActiveTexture(GL_TEXTURE0); glcache.UseProgram(g_abuffer_pass2_shader.program); ShaderUniforms.Set(&g_abuffer_pass2_shader); @@ -388,12 +389,10 @@ void DrawTranslucentModVols(int first, int count) printf("Drawing %d translucent modvols %d triangles\n", count, pvrrc.modtrig.used()); SetupModvolVBO(); - glActiveTexture(GL_TEXTURE1); - glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE2); glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE3); - glBindTexture(GL_TEXTURE_2D, pixels_pointers); + glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, 0); @@ -464,10 +463,6 @@ void DrawTranslucentModVols(int first, int count) void renderABuffer(bool sortFragments) { - glActiveTexture(GL_TEXTURE3); - glBindTexture(GL_TEXTURE_2D, pixels_pointers); - glCheck(); - glcache.UseProgram(sortFragments ? g_abuffer_final_shader.program : g_abuffer_final_nosort_shader.program); ShaderUniforms.Set(&g_abuffer_final_shader); diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index ce9c7e178..c88529bb4 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -84,6 +84,7 @@ GLuint geom_fbo; GLuint stencilTexId; GLuint opaqueTexId; GLuint depthTexId; +GLuint texSamplers[2]; s32 SetTileClip(u32 val, bool set) { @@ -148,12 +149,12 @@ void SetCull(u32 CulliMode) } } -static void SetTextureRepeatMode(GLuint dir, u32 clamp, u32 mirror) +static void SetTextureRepeatMode(int index, GLuint dir, u32 clamp, u32 mirror) { if (clamp) - glcache.TexParameteri(GL_TEXTURE_2D, dir, GL_CLAMP_TO_EDGE); + glSamplerParameteri(texSamplers[index], dir, GL_CLAMP_TO_EDGE); else - glcache.TexParameteri(GL_TEXTURE_2D, dir, mirror ? GL_MIRRORED_REPEAT : GL_REPEAT); + glSamplerParameteri(texSamplers[index], dir, mirror ? GL_MIRRORED_REPEAT : GL_REPEAT); } template @@ -171,6 +172,7 @@ template 0, 0, 2, + false, // TODO Can PT have two different textures for area 0 and 1 ?? pass); CurrentShader = gl.getShader(shaderId); if (CurrentShader->program == -1) { @@ -182,6 +184,7 @@ template CurrentShader->pp_ShadInstr = 0; CurrentShader->pp_Offset = 0; CurrentShader->pp_FogCtrl = 2; + CurrentShader->pp_TwoVolumes = false; CurrentShader->pass = pass; CompilePipelineShader(CurrentShader); } @@ -196,6 +199,7 @@ template gp->tsp.ShadInstr, gp->pcw.Offset, gp->tsp.FogCtrl, + gp->tsp1.full != -1, pass); CurrentShader = gl.getShader(shaderId); if (CurrentShader->program == -1) { @@ -207,21 +211,25 @@ template CurrentShader->pp_ShadInstr = gp->tsp.ShadInstr; CurrentShader->pp_Offset = gp->pcw.Offset; CurrentShader->pp_FogCtrl = gp->tsp.FogCtrl; + CurrentShader->pp_TwoVolumes = gp->tsp1.full != -1; CurrentShader->pass = pass; CompilePipelineShader(CurrentShader); } } glcache.UseProgram(CurrentShader->program); + + ShaderUniforms.tsp0 = gp->tsp; + ShaderUniforms.tsp1 = gp->tsp1; + ShaderUniforms.tcw0 = gp->tcw; + ShaderUniforms.tcw1 = gp->tcw1; + if (Type == ListType_Opaque || Type == ListType_Punch_Through) // TODO Can PT have a >0 and <1 alpha? { - ShaderUniforms.blend_mode[0] = 1; - ShaderUniforms.blend_mode[1] = 0; - } - else - { - ShaderUniforms.blend_mode[0] = gp->tsp.SrcInstr; - ShaderUniforms.blend_mode[1] = gp->tsp.DstInstr; + ShaderUniforms.tsp0.SrcInstr = 1; + ShaderUniforms.tsp0.DstInstr = 0; + ShaderUniforms.tsp1.SrcInstr = 1; + ShaderUniforms.tsp1.DstInstr = 0; } SetTileClip(gp->tileclip,true); @@ -237,28 +245,39 @@ template if (CurrentShader->pp_Texture) { - glcache.BindTexture(GL_TEXTURE_2D, gp->texid == -1 ? 0 : gp->texid); - - if (gp->texid > 0) + for (int i = 0; i < 2; i++) { - SetTextureRepeatMode(GL_TEXTURE_WRAP_S, gp->tsp.ClampU, gp->tsp.FlipU); - SetTextureRepeatMode(GL_TEXTURE_WRAP_T, gp->tsp.ClampV, gp->tsp.FlipV); + glActiveTexture(GL_TEXTURE0 + i); + GLuint texid = i == 0 ? gp->texid : gp->texid1; - //set texture filter mode - if (gp->tsp.FilterMode == 0) + glBindTexture(GL_TEXTURE_2D, texid == -1 ? 0 : texid); + + if (texid != -1) { - //disable filtering, mipmaps - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - } - else - { - //bilinear filtering - //PowerVR supports also trilinear via two passes, but we ignore that for now - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (gp->tcw.MipMapped && settings.rend.UseMipmaps) ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + TSP tsp = i == 0 ? gp->tsp : gp->tsp1; + TCW tcw = i == 0 ? gp->tcw : gp->tcw1; + + glBindSampler(i, texSamplers[i]); + SetTextureRepeatMode(i, GL_TEXTURE_WRAP_S, tsp.ClampU, tsp.FlipU); + SetTextureRepeatMode(i, GL_TEXTURE_WRAP_T, tsp.ClampV, tsp.FlipV); + + //set texture filter mode + if (tsp.FilterMode == 0) + { + //disable filtering, mipmaps + glSamplerParameteri(texSamplers[i], GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(texSamplers[i], GL_TEXTURE_MAG_FILTER, GL_NEAREST); + } + else + { + //bilinear filtering + //PowerVR supports also trilinear via two passes, but we ignore that for now + glSamplerParameteri(texSamplers[i], GL_TEXTURE_MIN_FILTER, (tcw.MipMapped && settings.rend.UseMipmaps) ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR); + glSamplerParameteri(texSamplers[i], GL_TEXTURE_MAG_FILTER, GL_LINEAR); + } } } + glActiveTexture(GL_TEXTURE0); } if (Type==ListType_Translucent && !SortingEnabled) @@ -447,6 +466,15 @@ void SetupMainVBO() glEnableVertexAttribArray(VERTEX_UV_ARRAY); glCheck(); glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_BASE1_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_BASE1_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex, col1)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_OFFS1_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_OFFS1_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex, spc1)); glCheck(); + + glEnableVertexAttribArray(VERTEX_UV1_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_UV1_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, u1)); glCheck(); } void SetupModvolVBO() @@ -464,6 +492,9 @@ void SetupModvolVBO() glDisableVertexAttribArray(VERTEX_UV_ARRAY); glDisableVertexAttribArray(VERTEX_COL_OFFS_ARRAY); glDisableVertexAttribArray(VERTEX_COL_BASE_ARRAY); + glDisableVertexAttribArray(VERTEX_UV1_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_OFFS1_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_BASE1_ARRAY); } void DrawModVols(int first, int count) { @@ -641,6 +672,8 @@ void DrawStrips() glStencilMask(0xFF); glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glCheck(); } + if (texSamplers[0] == 0) + glGenSamplers(2, texSamplers); SetupMainVBO(); //Draw the strips ! @@ -674,7 +707,7 @@ void DrawStrips() glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); // Bind stencil buffer for the fragment shader (shadowing) - glActiveTexture(GL_TEXTURE2); + glActiveTexture(GL_TEXTURE3); glBindTexture(GL_TEXTURE_2D, stencilTexId); glActiveTexture(GL_TEXTURE0); glCheck(); @@ -691,7 +724,7 @@ void DrawStrips() // PASS 2: Render opaque and PT texture to a-buffers along with depth // // Unbind stencil - glActiveTexture(GL_TEXTURE2); + glActiveTexture(GL_TEXTURE3); glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE0); @@ -705,7 +738,7 @@ void DrawStrips() SetupMainVBO(); glcache.Disable(GL_DEPTH_TEST); - glActiveTexture(GL_TEXTURE1); + glActiveTexture(GL_TEXTURE2); glBindTexture(GL_TEXTURE_2D, depthTexId); glActiveTexture(GL_TEXTURE0); @@ -729,6 +762,7 @@ void DrawStrips() glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glActiveTexture(GL_TEXTURE0); + glBindSampler(0, 0); glBindTexture(GL_TEXTURE_2D, opaqueTexId); renderABuffer(pvrrc.isAutoSort); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 5082f083f..e99d97fd0 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -84,16 +84,25 @@ uniform highp vec4 depth_scale; \n\ " attr " lowp vec4 in_base; \n\ " attr " lowp vec4 in_offs; \n\ " attr " mediump vec2 in_uv; \n\ +" attr " lowp vec4 in_base1; \n\ +" attr " lowp vec4 in_offs1; \n\ +" attr " mediump vec2 in_uv1; \n\ /* output */ \n\ " vary " lowp vec4 vtx_base; \n\ " vary " lowp vec4 vtx_offs; \n\ " vary " mediump vec2 vtx_uv; \n\ +" vary " lowp vec4 vtx_base1; \n\ +" vary " lowp vec4 vtx_offs1; \n\ +" vary " mediump vec2 vtx_uv1; \n\ " vary " mediump float vtx_z; \n\ void main() \n\ { \n\ vtx_base=in_base; \n\ vtx_offs=in_offs; \n\ vtx_uv=in_uv; \n\ + vtx_base1 = in_base1; \n\ + vtx_offs1 = in_offs1; \n\ + vtx_uv1 = in_uv1; \n\ vec4 vpos=in_pos; \n\ vtx_z = vpos.z; \n\ vpos.w=1.0/vpos.z; \n" @@ -183,6 +192,7 @@ const char* PixelPipelineShader = SHADER_HEADER #define pp_ShadInstr %d \n\ #define pp_Offset %d \n\ #define pp_FogCtrl %d \n\ +#define pp_TwoVolumes %d \n\ #define PASS %d \n" #ifndef GLES "\ @@ -200,6 +210,12 @@ const char* PixelPipelineShader = SHADER_HEADER #define DST_ALPHA 6u \n\ #define INVERSE_DST_ALPHA 7u \n\ \n\ +#if pp_TwoVolumes == 1 // FIXME This is not needed for pass 3 (TR) and causes issues? Fix it Felix!\n\ +#define IF(x) if (x) \n\ +#else \n\ +#define IF(x) \n\ +#endif \n\ + \n\ /* Shader program params*/ \n\ /* gles has no alpha test stage, so its emulated on the shader */ \n\ uniform lowp float cp_AlphaTestValue; \n\ @@ -209,17 +225,33 @@ uniform highp vec2 sp_LOG_FOG_COEFS; \n\ uniform highp float sp_FOG_DENSITY; \n\ uniform highp float shade_scale_factor; \n\ uniform lowp vec2 screen_size; \n\ -uniform sampler2D tex,fog_table; \n\ +uniform sampler2D tex0, tex1; \n\ +uniform sampler2D fog_table; \n\ uniform int pp_Number; \n\ uniform usampler2D shadow_stencil; \n\ uniform sampler2D DepthTex; \n\ -uniform uvec2 blend_mode; \n\ uniform uint pp_Stencil; \n\ \n\ +uniform uvec2 blend_mode0; \n\ +#if pp_TwoVolumes == 1 \n\ +uniform bool use_alpha0; \n\ +uniform bool ignore_tex_alpha0; \n\ +uniform int shading_instr0; \n\ +uniform int fog_control0; \n\ +uniform uvec2 blend_mode1; \n\ +uniform bool use_alpha1; \n\ +uniform bool ignore_tex_alpha1; \n\ +uniform int shading_instr1; \n\ +uniform int fog_control1; \n\ +#endif \n\ + \n\ /* Vertex input*/ \n\ " vary " lowp vec4 vtx_base; \n\ " vary " lowp vec4 vtx_offs; \n\ " vary " mediump vec2 vtx_uv; \n\ +" vary " lowp vec4 vtx_base1; \n\ +" vary " lowp vec4 vtx_offs1; \n\ +" vary " mediump vec2 vtx_uv1; \n\ " vary " mediump float vtx_z; \n\ lowp float fog_mode2(highp float w) \n\ { \n\ @@ -250,41 +282,74 @@ void main() \n\ discard; \n\ #endif \n\ \n\ - highp vec4 color=vtx_base; \n\ - #if pp_UseAlpha==0 \n\ - color.a=1.0; \n\ + highp vec4 color = vtx_base; \n\ + lowp vec4 offset = vtx_offs; \n\ + mediump vec2 uv = vtx_uv; \n\ + bool area1 = false; \n\ + uvec2 blend_mode = blend_mode0; \n\ + \n\ + #if pp_TwoVolumes == 1 \n\ + bool use_alpha = use_alpha0; \n\ + bool ignore_tex_alpha = ignore_tex_alpha0; \n\ + int shading_instr = shading_instr0; \n\ + int fog_control = fog_control0; \n\ + #if PASS == 1 \n\ + uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / screen_size); \n\ + if (stencil.r == 0x81u) { \n\ + color = vtx_base1; \n\ + offset = vtx_offs1; \n\ + uv = vtx_uv1; \n\ + area1 = true; \n\ + blend_mode = blend_mode1; \n\ + use_alpha = use_alpha1; \n\ + ignore_tex_alpha = ignore_tex_alpha1; \n\ + shading_instr = shading_instr1; \n\ + fog_control = fog_control1; \n\ + } \n\ + #endif\n\ #endif\n\ - #if pp_FogCtrl==3 // LUT Mode 2 \n\ - color=vec4(sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ + \n\ + #if pp_UseAlpha==0 || pp_TwoVolumes == 1 \n\ + IF(!use_alpha) \n\ + color.a=1.0; \n\ + #endif\n\ + #if pp_FogCtrl==3 || pp_TwoVolumes == 1 // LUT Mode 2 \n\ + IF(fog_control == 3) \n\ + color=vec4(sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ #endif\n\ #if pp_Texture==1 \n\ { \n\ - lowp vec4 texcol=" TEXLOOKUP "(tex,vtx_uv); \n\ + lowp vec4 texcol=" TEXLOOKUP "(area1 ? tex1 : tex0, uv); \n\ \n\ - #if pp_IgnoreTexA==1 \n\ - texcol.a=1.0; \n\ + #if pp_IgnoreTexA==1 || pp_TwoVolumes == 1 \n\ + IF(ignore_tex_alpha) \n\ + texcol.a=1.0; \n\ #endif\n\ \n\ #if cp_AlphaTest == 1 \n\ if (cp_AlphaTestValue>texcol.a) discard;\n\ #endif \n\ - #if pp_ShadInstr==0 // DECAL \n\ + #if pp_ShadInstr==0 || pp_TwoVolumes == 1 // DECAL \n\ + IF(shading_instr == 0) \n\ { \n\ color=texcol; \n\ } \n\ #endif\n\ - #if pp_ShadInstr==1 // MODULATE \n\ + #if pp_ShadInstr==1 || pp_TwoVolumes == 1 // MODULATE \n\ + IF(shading_instr == 1) \n\ { \n\ color.rgb*=texcol.rgb; \n\ color.a=texcol.a; \n\ } \n\ #endif\n\ - #if pp_ShadInstr==2 // DECAL ALPHA \n\ + #if pp_ShadInstr==2 || pp_TwoVolumes == 1 // DECAL ALPHA \n\ + IF(shading_instr == 2) \n\ { \n\ color.rgb=mix(color.rgb,texcol.rgb,texcol.a); \n\ } \n\ #endif\n\ - #if pp_ShadInstr==3 // MODULATE ALPHA \n\ + #if pp_ShadInstr==3 || pp_TwoVolumes == 1 // MODULATE ALPHA \n\ + IF(shading_instr == 3) \n\ { \n\ color*=texcol; \n\ } \n\ @@ -292,20 +357,22 @@ void main() \n\ \n\ #if pp_Offset==1 \n\ { \n\ - color.rgb+=vtx_offs.rgb; \n\ - if (pp_FogCtrl==1) // Per vertex \n\ - color.rgb=mix(color.rgb,sp_FOG_COL_VERT.rgb,vtx_offs.a); \n\ + color.rgb += offset.rgb; \n\ + #if pp_FogCtrl == 1 || pp_TwoVolumes == 1 // Per vertex \n\ + IF(fog_control == 1) \n\ + color.rgb=mix(color.rgb, sp_FOG_COL_VERT.rgb, offset.a); \n\ + #endif\n\ } \n\ #endif\n\ } \n\ #endif\n\ - #if PASS == 1 \n\ - //uvec4 stencil = texture(shadow_stencil, vec2(gl_FragCoord.x / 1280, gl_FragCoord.y / 960)); \n\ + #if PASS == 1 && pp_TwoVolumes == 0 \n\ uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / screen_size); \n\ if (stencil.r == 0x81u) \n\ color.rgb *= shade_scale_factor; \n\ #endif\n\ - #if pp_FogCtrl==0 // LUT \n\ + #if pp_FogCtrl==0 || pp_TwoVolumes == 1 // LUT \n\ + IF(fog_control == 0) \n\ { \n\ color.rgb=mix(color.rgb,sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ } \n\ @@ -774,6 +841,9 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) glBindAttribLocation(program, VERTEX_COL_BASE_ARRAY, "in_base"); glBindAttribLocation(program, VERTEX_COL_OFFS_ARRAY, "in_offs"); glBindAttribLocation(program, VERTEX_UV_ARRAY, "in_uv"); + glBindAttribLocation(program, VERTEX_COL_BASE1_ARRAY, "in_base1"); + glBindAttribLocation(program, VERTEX_COL_OFFS1_ARRAY, "in_offs1"); + glBindAttribLocation(program, VERTEX_UV1_ARRAY, "in_uv1"); #ifndef GLES glBindFragDataLocation(program, 0, "FragColor"); @@ -815,7 +885,7 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, - u32 pp_FogCtrl, int pass) + u32 pp_FogCtrl, bool pp_TwoVolumes, int pass) { u32 rv=0; @@ -827,6 +897,7 @@ int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, rv<<=2; rv|=pp_ShadInstr; rv<<=1; rv|=pp_Offset; rv<<=2; rv|=pp_FogCtrl; + rv <<= 1; rv |= (int)pp_TwoVolumes; rv <<= 2; rv |= pass; return rv; @@ -838,15 +909,18 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe sprintf(pshader, source, s->cp_AlphaTest,s->pp_ClipTestMode,s->pp_UseAlpha, - s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pass); + s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pp_TwoVolumes, s->pass); s->program=gl_CompileAndLink(VertexShaderSource,pshader); - //setup texture 0 as the input for the shader - GLuint gu=glGetUniformLocation(s->program, "tex"); - if (s->pp_Texture==1) - glUniform1i(gu,0); + GLint gu = glGetUniformLocation(s->program, "tex0"); + if (s->pp_Texture == 1 && gu != -1) + glUniform1i(gu, 0); + // Setup texture 1 as the input for area 1 in two volume mode + gu = glGetUniformLocation(s->program, "tex1"); + if (s->pp_Texture == 1 && gu != -1) + glUniform1i(gu, 1); //get the uniform locations s->scale = glGetUniformLocation(s->program, "scale"); @@ -880,22 +954,27 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe // Use texture 1 for depth texture gu = glGetUniformLocation(s->program, "DepthTex"); if (gu != -1) - glUniform1i(gu, 1); // GL_TEXTURE1 + glUniform1i(gu, 2); // GL_TEXTURE2 // Shadow stencil for OP/PT rendering pass gu = glGetUniformLocation(s->program, "shadow_stencil"); if (gu != -1) - glUniform1i(gu, 2); // GL_TEXTURE2 + glUniform1i(gu, 3); // GL_TEXTURE3 - // A-buffers -// gu = glGetUniformLocation(s->program, "abufferPointerImg"); -// if (gu != -1) -// glUniform1i(gu, 3); // GL_TEXTURE3 - - s->blend_mode = glGetUniformLocation(s->program, "blend_mode"); s->pp_Number = glGetUniformLocation(s->program, "pp_Number"); s->pp_Stencil = glGetUniformLocation(s->program, "pp_Stencil"); + s->blend_mode0 = glGetUniformLocation(s->program, "blend_mode0"); + s->blend_mode1 = glGetUniformLocation(s->program, "blend_mode1"); + s->use_alpha0 = glGetUniformLocation(s->program, "use_alpha0"); + s->use_alpha1 = glGetUniformLocation(s->program, "use_alpha1"); + s->ignore_tex_alpha0 = glGetUniformLocation(s->program, "ignore_tex_alpha0"); + s->ignore_tex_alpha1 = glGetUniformLocation(s->program, "ignore_tex_alpha1"); + s->shading_instr0 = glGetUniformLocation(s->program, "shading_instr0"); + s->shading_instr1 = glGetUniformLocation(s->program, "shading_instr1"); + s->fog_control0 = glGetUniformLocation(s->program, "fog_control0"); + s->fog_control1 = glGetUniformLocation(s->program, "fog_control1"); + return glIsProgram(s->program)==GL_TRUE; } @@ -924,7 +1003,6 @@ bool gl_create_resources() gl.OSD_SHADER.program=gl_CompileAndLink(VertexShaderSource,OSD_Shader); - printf("OSD: %d\n",gl.OSD_SHADER.program); gl.OSD_SHADER.scale=glGetUniformLocation(gl.OSD_SHADER.program, "scale"); gl.OSD_SHADER.depth_scale=glGetUniformLocation(gl.OSD_SHADER.program, "depth_scale"); glUniform1i(glGetUniformLocation(gl.OSD_SHADER.program, "tex"),0); //bind osd texture to slot 0 @@ -1749,18 +1827,7 @@ bool RenderFrame() glUniform4fv( gl.OSD_SHADER.depth_scale, 1, td); ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; -/* - for (u32 i=0;iprogram == -1) - continue; - glcache.UseProgram(s->program); - - ShaderUniforms.Set(s); - } -*/ //setup render target first if (is_rtt) { diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 5238e421f..b307c1bb5 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -39,6 +39,9 @@ #define VERTEX_COL_BASE_ARRAY 1 #define VERTEX_COL_OFFS_ARRAY 2 #define VERTEX_UV_ARRAY 3 +#define VERTEX_COL_BASE1_ARRAY 4 +#define VERTEX_COL_OFFS1_ARRAY 5 +#define VERTEX_UV1_ARRAY 6 //vertex types @@ -57,14 +60,19 @@ struct PipelineShader GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY,sp_LOG_FOG_COEFS; GLuint shade_scale_factor; GLuint screen_size; - GLuint blend_mode; GLuint pp_Number; GLuint pp_Stencil; + GLuint blend_mode0, blend_mode1; + GLuint use_alpha0, use_alpha1; + GLuint ignore_tex_alpha0, ignore_tex_alpha1; + GLuint shading_instr0, shading_instr1; + GLuint fog_control0, fog_control1; // u32 cp_AlphaTest; s32 pp_ClipTestMode; u32 pp_Texture, pp_UseAlpha, pp_IgnoreTexA, pp_ShadInstr, pp_Offset, pp_FogCtrl; int pass; + bool pp_TwoVolumes; }; @@ -135,7 +143,7 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt); void ReadRTTBuffer(); int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, - u32 pp_FogCtrl, int pass); + u32 pp_FogCtrl, bool two_volumes, int pass); void SetCull(u32 CulliMode); struct ShaderUniforms_t @@ -147,9 +155,12 @@ struct ShaderUniforms_t float ps_FOG_COL_RAM[3]; float ps_FOG_COL_VERT[3]; float fog_coefs[2]; - GLuint blend_mode[2]; int poly_number; u32 stencil; + TSP tsp0; + TSP tsp1; + TCW tcw0; + TCW tcw1; void Set(PipelineShader* s) { @@ -180,8 +191,34 @@ struct ShaderUniforms_t if (s->shade_scale_factor != -1) glUniform1f(s->shade_scale_factor, FPU_SHAD_SCALE.scale_factor / 256.f); - if (s->blend_mode != -1) - glUniform2uiv(s->blend_mode, 1, blend_mode); + if (s->blend_mode0 != -1) { + u32 blend_mode[2] = { tsp0.SrcInstr, tsp0.DstInstr }; + glUniform2uiv(s->blend_mode0, 1, blend_mode); + } + if (s->blend_mode1 != -1) { + u32 blend_mode[2] = { tsp1.SrcInstr, tsp1.DstInstr }; + glUniform2uiv(s->blend_mode1, 1, blend_mode); + } + + if (s->use_alpha0 != -1) + glUniform1i(s->use_alpha0, tsp0.UseAlpha); + if (s->use_alpha1 != -1) + glUniform1i(s->use_alpha1, tsp1.UseAlpha); + + if (s->ignore_tex_alpha0 != -1) + glUniform1i(s->ignore_tex_alpha0, tsp0.IgnoreTexA); + if (s->ignore_tex_alpha1 != -1) + glUniform1i(s->ignore_tex_alpha1, tsp1.IgnoreTexA); + + if (s->shading_instr0 != -1) + glUniform1i(s->shading_instr0, tsp0.ShadInstr); + if (s->shading_instr1 != -1) + glUniform1i(s->shading_instr1, tsp1.ShadInstr); + + if (s->fog_control0 != -1) + glUniform1i(s->fog_control0, tsp0.FogCtrl); + if (s->fog_control1 != -1) + glUniform1i(s->fog_control1, tsp1.FogCtrl); if (s->pp_Number != -1) glUniform1i(s->pp_Number, poly_number); @@ -215,7 +252,7 @@ extern GLuint opaqueTexId; #extension GL_ARB_shading_language_420pack : enable \n\ \n\ #define ABUFFER_SIZE " ABUFFER_SIZE_STR " \n\ -coherent uniform layout(size1x32, binding = 3) uimage2D abufferPointerImg; \n\ +coherent uniform layout(size1x32, binding = 4) uimage2D abufferPointerImg; \n\ struct Pixel { \n\ mediump vec4 color; \n\ mediump float depth; \n\ @@ -224,7 +261,7 @@ struct Pixel { \n\ uint next; \n\ }; \n\ #define EOL 0xFFFFFFFFu \n\ -layout (binding = 0) buffer PixelBuffer { \n\ +layout (binding = 0, std430) buffer PixelBuffer { \n\ Pixel pixels[]; \n\ }; \n\ layout(binding = 0, offset = 0) uniform atomic_uint buffer_index; \n\ From 8d1bfa76836efa469b4c4452c99b5f44eab5acc9 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 30 May 2018 20:59:57 +0200 Subject: [PATCH 17/65] Translucent modifer volumes --- core/rend/gles/abuffer.cpp | 119 +++++++++++++++++-------------------- core/rend/gles/gldraw.cpp | 14 ++--- core/rend/gles/gles.cpp | 85 ++++++++++++-------------- core/rend/gles/gles.h | 57 +++++++----------- 4 files changed, 119 insertions(+), 156 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 7399b1d16..a35c3a062 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -12,9 +12,9 @@ GLuint atomic_buffer; PipelineShader g_abuffer_final_shader; PipelineShader g_abuffer_final_nosort_shader; PipelineShader g_abuffer_clear_shader; -PipelineShader g_abuffer_pass2_shader; PipelineShader g_abuffer_tr_modvol_shader; PipelineShader g_abuffer_tr_modvol_final_shader; +static GLuint volume_mode_uniform; static GLuint g_quadBuffer = 0; static GLuint g_quadVertexArray = 0; @@ -29,6 +29,7 @@ static const char *final_shader_source = SHADER_HEADER "\ \n\ layout(binding = 0) uniform sampler2D tex; \n\ uniform lowp vec2 screen_size; \n\ +uniform highp float shade_scale_factor; \n\ \n\ out vec4 FragColor; \n\ \n\ @@ -94,34 +95,38 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ vec4 finalColor = texture(tex, gl_FragCoord.xy / screen_size); \n\ for (int i = 0; i < num_frag; i++) { \n\ vec4 srcColor = pixel_list[i].color; \n\ + if ((pixel_list[i].blend_stencil & 0x81u) == 0x81u) \n\ + srcColor.rgb *= shade_scale_factor; \n\ float srcAlpha = srcColor.a; \n\ float dstAlpha = finalColor.a; \n\ + vec4 srcCoef; \n\ \n\ int srcBlend = int(pixel_list[i].blend_stencil) / 256 / 8; \n\ switch (srcBlend) \n\ { \n\ case 0: // zero \n\ - srcColor = vec4(0); \n\ + srcCoef = vec4(0); \n\ break; \n\ case 1: // one \n\ + srcCoef = vec4(1); \n\ break; \n\ case 2: // other color \n\ - srcColor *= finalColor; \n\ + srcCoef = finalColor; \n\ break; \n\ case 3: // inverse other color \n\ - srcColor *= vec4(1) - finalColor; \n\ + srcCoef = vec4(1) - finalColor; \n\ break; \n\ case 4: // src alpha \n\ - srcColor *= srcAlpha; \n\ + srcCoef = vec4(srcAlpha); \n\ break; \n\ case 5: // inverse src alpha \n\ - srcColor *= 1 - srcAlpha; \n\ + srcCoef = vec4(1 - srcAlpha); \n\ break; \n\ case 6: // dst alpha \n\ - srcColor *= dstAlpha; \n\ + srcCoef = vec4(dstAlpha); \n\ break; \n\ case 7: // inverse dst alpha \n\ - srcColor *= 1 - dstAlpha; \n\ + srcCoef = vec4(1 - dstAlpha); \n\ break; \n\ } \n\ int dstBlend = (int(pixel_list[i].blend_stencil) / 256) % 8; \n\ @@ -133,10 +138,10 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ case 1: // one \n\ break; \n\ case 2: // other color \n\ - finalColor *= pixel_list[i].color; \n\ + finalColor *= srcColor; \n\ break; \n\ case 3: // inverse other color \n\ - finalColor *= vec4(1) - pixel_list[i].color; \n\ + finalColor *= vec4(1) - srcColor; \n\ break; \n\ case 4: // src alpha \n\ finalColor *= srcAlpha; \n\ @@ -151,7 +156,7 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ finalColor *= 1 - dstAlpha; \n\ break; \n\ } \n\ - finalColor = clamp(finalColor + srcColor, 0, 1); \n\ + finalColor = clamp(finalColor + srcColor * srcCoef, 0, 1); \n\ } \n\ \n\ return finalColor; \n\ @@ -182,56 +187,51 @@ void main(void) \n\ } \n\ "; -// Renders the opaque and pt rendered texture into a-buffers -static const char *pass2_shader_source = SHADER_HEADER "\ -uniform lowp vec2 screen_size; \n\ -uniform sampler2D DepthTex; \n\ -uniform sampler2D tex; \n\ - \n\ -void main(void) \n\ -{ \n\ - ivec2 coords = ivec2(gl_FragCoord.xy); \n\ - \n\ - uint idx = atomicCounterIncrement(buffer_index); \n\ - if ((idx + 1u) * 32u - 1u >= ABUFFER_SIZE) \n\ - discard; \n\ - Pixel pixel; \n\ - pixel.color = texture(tex, gl_FragCoord.xy / screen_size); \n\ - pixel.depth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ - pixel.seq_num = 0; \n\ - pixel.blend_stencil = 0x800u; \n\ - pixel.next = imageAtomicExchange(abufferPointerImg, coords, idx); \n\ - pixels[idx] = pixel; \n\ - \n\ - // Discard fragment so nothing is written to the framebuffer \n\ - discard; \n\ -} \n\ -"; - static const char *tr_modvol_shader_source = SHADER_HEADER "\ #define LAST_PASS %d \n\ +uniform int volume_mode; \n\ void main(void) \n\ { \n\ #if LAST_PASS == 0 \n\ setFragDepth(); \n\ #endif \n\ ivec2 coords = ivec2(gl_FragCoord.xy); \n\ - if (all(greaterThanEqual(coords, ivec2(0))) && all(lessThan(coords, imageSize(abufferPointerImg)))) { \n\ + if (all(greaterThanEqual(coords, ivec2(0))) && all(lessThan(coords, imageSize(abufferPointerImg)))) \n\ + { \n\ \n\ uint idx = imageLoad(abufferPointerImg, coords).x; \n\ if ((idx + 1u) * 32u - 1u >= ABUFFER_SIZE) \n\ discard; \n\ + int list_len = 0; \n\ while (idx != EOL) { \n\ - if (pixels[idx].seq_num > 0) { \n\ + uint stencil = pixels[idx].blend_stencil; \n\ + if ((stencil & 0x80u) == 0x80u) \n\ + { \n\ #if LAST_PASS == 0 \n\ -// if (gl_FragDepth <= pixels[idx].depth) \n\ -// atomicXor(pixels[idx].blend_stencil, 2u); \n\ + if (gl_FragDepth <= pixels[idx].depth) \n\ + atomicXor(pixels[idx].blend_stencil, 2u); \n\ #else \n\ - if (mod(pixels[idx].blend_stencil, 256u) != 0u) \n\ - pixels[idx].color.a = 1.0; // FIXME \n\ + switch (volume_mode) \n\ + { \n\ + case 1: // Inclusion volume \n\ + if ((stencil & 2u) == 2u) \n\ + pixels[idx].blend_stencil = bitfieldInsert(stencil, 1u, 0, 2); \n\ + else \n\ + pixels[idx].blend_stencil = bitfieldInsert(stencil, 0u, 1, 1); \n\ + break; \n\ + case 2: // Exclusion volume \n\ + if ((stencil & 3u) == 1u) \n\ + pixels[idx].blend_stencil = bitfieldInsert(stencil, 1u, 0, 2); \n\ + else \n\ + pixels[idx].blend_stencil = bitfieldInsert(stencil, 0u, 0, 2); \n\ + break; \n\ + } \n\ #endif \n\ } \n\ idx = pixels[idx].next; \n\ + list_len++; // FIXME Why do I need this? Linked list corruption? \n\ + if (list_len >= 32) \n\ + break; \n\ } \n\ } \n\ \n\ @@ -293,8 +293,6 @@ void initABuffer() } if (g_abuffer_clear_shader.program == 0) CompilePipelineShader(&g_abuffer_clear_shader, clear_shader_source); - if (g_abuffer_pass2_shader.program == 0) - CompilePipelineShader(&g_abuffer_pass2_shader, pass2_shader_source); if (g_abuffer_tr_modvol_shader.program == 0) { char source[8192]; @@ -306,6 +304,7 @@ void initABuffer() char source[8192]; sprintf(source, tr_modvol_shader_source, 1); CompilePipelineShader(&g_abuffer_tr_modvol_final_shader, source); + volume_mode_uniform = glGetUniformLocation(g_abuffer_tr_modvol_final_shader.program, "volume_mode"); } if (g_quadVertexArray == 0) @@ -365,34 +364,18 @@ void DrawQuad() glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, indices); glCheck(); } -void renderPass2(GLuint textureId, GLuint depthTexId) -{ - glActiveTexture(GL_TEXTURE2); - glBindTexture(GL_TEXTURE_2D, depthTexId); - glActiveTexture(GL_TEXTURE0); - glBindTexture(GL_TEXTURE_2D, textureId); - - glcache.UseProgram(g_abuffer_pass2_shader.program); - ShaderUniforms.Set(&g_abuffer_pass2_shader); - - glcache.Disable(GL_BLEND); - glcache.Disable(GL_DEPTH_TEST); - glcache.Disable(GL_CULL_FACE); - - DrawQuad(); -} - void DrawTranslucentModVols(int first, int count) { if (count == 0 || pvrrc.modtrig.used() == 0) return; - printf("Drawing %d translucent modvols %d triangles\n", count, pvrrc.modtrig.used()); SetupModvolVBO(); glActiveTexture(GL_TEXTURE2); glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE3); glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, 0); @@ -412,8 +395,10 @@ void DrawTranslucentModVols(int first, int count) u32 cmv_count = count - 1; ISP_Modvol* params = &pvrrc.global_param_mvo_tr.head()[first]; + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + //ISP_Modvol - for (u32 cmv = 0; cmv < 3 /* FIXME cmv_count */; cmv++) + for (u32 cmv = 0; cmv < cmv_count; cmv++) { ISP_Modvol ispc = params[cmv]; @@ -449,8 +434,10 @@ void DrawTranslucentModVols(int first, int count) //Sum the area glcache.UseProgram(g_abuffer_tr_modvol_final_shader.program); glCheck(); - - glDrawArrays(GL_TRIANGLES, mod_last * 3, (mod_base - mod_last + 1) * 3); glCheck(); + glUniform1i(volume_mode_uniform, mv_mode); + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + DrawQuad(); + SetupModvolVBO(); //update pointers mod_last = mod_base + 1; diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index c88529bb4..31c3bf825 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -608,7 +608,6 @@ void DrawModVols(int first, int count) } void renderABuffer(bool sortFragments); -void renderPass2(GLuint textureId, GLuint depthTexId); void DrawTranslucentModVols(int first, int count); void CreateGeometryTexture() @@ -690,7 +689,7 @@ void DrawStrips() glcache.DepthMask(GL_TRUE); glcache.Disable(GL_BLEND); // - // PASS 0: Geometry pass to update the depth and stencil + // PASS 1: Geometry pass to update the depth and stencil // glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); @@ -702,7 +701,7 @@ void DrawStrips() DrawModVols(previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); // - // PASS 1: Render OP and PT to fbo + // PASS 2: Render OP and PT to fbo // glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); @@ -720,9 +719,6 @@ void DrawStrips() //Alpha tested DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 1); - // - // PASS 2: Render opaque and PT texture to a-buffers along with depth - // // Unbind stencil glActiveTexture(GL_TEXTURE3); glBindTexture(GL_TEXTURE_2D, 0); @@ -730,8 +726,6 @@ void DrawStrips() glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); -// renderPass2(opaqueTexId, depthTexId); - // // PASS 3: Render TR to a-buffers // @@ -746,8 +740,8 @@ void DrawStrips() DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); glCheck(); -// glMemoryBarrier(GL_ALL_BARRIER_BITS); -// DrawTranslucentModVols(previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count); + // Translucent modifier volumes + DrawTranslucentModVols(previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count); // FIXME Depth of translucent poly must be used for next render pass if any // FIXME Multipass in general... diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index e99d97fd0..5996c8250 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -232,17 +232,12 @@ uniform usampler2D shadow_stencil; \n\ uniform sampler2D DepthTex; \n\ uniform uint pp_Stencil; \n\ \n\ -uniform uvec2 blend_mode0; \n\ +uniform uvec2 blend_mode[2]; \n\ #if pp_TwoVolumes == 1 \n\ -uniform bool use_alpha0; \n\ -uniform bool ignore_tex_alpha0; \n\ -uniform int shading_instr0; \n\ -uniform int fog_control0; \n\ -uniform uvec2 blend_mode1; \n\ -uniform bool use_alpha1; \n\ -uniform bool ignore_tex_alpha1; \n\ -uniform int shading_instr1; \n\ -uniform int fog_control1; \n\ +uniform bool use_alpha[2]; \n\ +uniform bool ignore_tex_alpha[2]; \n\ +uniform int shading_instr[2]; \n\ +uniform int fog_control[2]; \n\ #endif \n\ \n\ /* Vertex input*/ \n\ @@ -286,13 +281,13 @@ void main() \n\ lowp vec4 offset = vtx_offs; \n\ mediump vec2 uv = vtx_uv; \n\ bool area1 = false; \n\ - uvec2 blend_mode = blend_mode0; \n\ + uvec2 cur_blend_mode = blend_mode[0]; \n\ \n\ #if pp_TwoVolumes == 1 \n\ - bool use_alpha = use_alpha0; \n\ - bool ignore_tex_alpha = ignore_tex_alpha0; \n\ - int shading_instr = shading_instr0; \n\ - int fog_control = fog_control0; \n\ + bool cur_use_alpha = use_alpha[0]; \n\ + bool cur_ignore_tex_alpha = ignore_tex_alpha[0]; \n\ + int cur_shading_instr = shading_instr[0]; \n\ + int cur_fog_control = fog_control[0]; \n\ #if PASS == 1 \n\ uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / screen_size); \n\ if (stencil.r == 0x81u) { \n\ @@ -300,21 +295,21 @@ void main() \n\ offset = vtx_offs1; \n\ uv = vtx_uv1; \n\ area1 = true; \n\ - blend_mode = blend_mode1; \n\ - use_alpha = use_alpha1; \n\ - ignore_tex_alpha = ignore_tex_alpha1; \n\ - shading_instr = shading_instr1; \n\ - fog_control = fog_control1; \n\ + cur_blend_mode = blend_mode[1]; \n\ + cur_use_alpha = use_alpha[1]; \n\ + cur_ignore_tex_alpha = ignore_tex_alpha[1]; \n\ + cur_shading_instr = shading_instr[1]; \n\ + cur_fog_control = fog_control[1]; \n\ } \n\ #endif\n\ #endif\n\ \n\ #if pp_UseAlpha==0 || pp_TwoVolumes == 1 \n\ - IF(!use_alpha) \n\ + IF(!cur_use_alpha) \n\ color.a=1.0; \n\ #endif\n\ #if pp_FogCtrl==3 || pp_TwoVolumes == 1 // LUT Mode 2 \n\ - IF(fog_control == 3) \n\ + IF(cur_fog_control == 3) \n\ color=vec4(sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ #endif\n\ #if pp_Texture==1 \n\ @@ -322,7 +317,7 @@ void main() \n\ lowp vec4 texcol=" TEXLOOKUP "(area1 ? tex1 : tex0, uv); \n\ \n\ #if pp_IgnoreTexA==1 || pp_TwoVolumes == 1 \n\ - IF(ignore_tex_alpha) \n\ + IF(cur_ignore_tex_alpha) \n\ texcol.a=1.0; \n\ #endif\n\ \n\ @@ -330,26 +325,26 @@ void main() \n\ if (cp_AlphaTestValue>texcol.a) discard;\n\ #endif \n\ #if pp_ShadInstr==0 || pp_TwoVolumes == 1 // DECAL \n\ - IF(shading_instr == 0) \n\ + IF(cur_shading_instr == 0) \n\ { \n\ color=texcol; \n\ } \n\ #endif\n\ #if pp_ShadInstr==1 || pp_TwoVolumes == 1 // MODULATE \n\ - IF(shading_instr == 1) \n\ + IF(cur_shading_instr == 1) \n\ { \n\ color.rgb*=texcol.rgb; \n\ color.a=texcol.a; \n\ } \n\ #endif\n\ #if pp_ShadInstr==2 || pp_TwoVolumes == 1 // DECAL ALPHA \n\ - IF(shading_instr == 2) \n\ + IF(cur_shading_instr == 2) \n\ { \n\ color.rgb=mix(color.rgb,texcol.rgb,texcol.a); \n\ } \n\ #endif\n\ #if pp_ShadInstr==3 || pp_TwoVolumes == 1 // MODULATE ALPHA \n\ - IF(shading_instr == 3) \n\ + IF(cur_shading_instr == 3) \n\ { \n\ color*=texcol; \n\ } \n\ @@ -359,7 +354,7 @@ void main() \n\ { \n\ color.rgb += offset.rgb; \n\ #if pp_FogCtrl == 1 || pp_TwoVolumes == 1 // Per vertex \n\ - IF(fog_control == 1) \n\ + IF(cur_fog_control == 1) \n\ color.rgb=mix(color.rgb, sp_FOG_COL_VERT.rgb, offset.a); \n\ #endif\n\ } \n\ @@ -372,7 +367,7 @@ void main() \n\ color.rgb *= shade_scale_factor; \n\ #endif\n\ #if pp_FogCtrl==0 || pp_TwoVolumes == 1 // LUT \n\ - IF(fog_control == 0) \n\ + IF(cur_fog_control == 0) \n\ { \n\ color.rgb=mix(color.rgb,sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ } \n\ @@ -388,10 +383,10 @@ void main() \n\ #elif PASS > 1 \n\ // Discard as many pixels as possible \n\ bool ignore = false; \n\ - switch (blend_mode.y) // DST \n\ + switch (cur_blend_mode.y) // DST \n\ { \n\ case ONE: \n\ - switch (blend_mode.x) \n\ + switch (cur_blend_mode.x) \n\ { \n\ case ZERO: \n\ ignore = true; \n\ @@ -410,19 +405,19 @@ void main() \n\ } \n\ break; \n\ case OTHER_COLOR: \n\ - if (blend_mode.x == ZERO && color.r == 1.0 && color.g == 1.0 && color.b == 1.0 && color.a == 1.0) \n\ + if (cur_blend_mode.x == ZERO && color.r == 1.0 && color.g == 1.0 && color.b == 1.0 && color.a == 1.0) \n\ ignore = true; \n\ break; \n\ case INVERSE_OTHER_COLOR: \n\ - if (blend_mode.x <= SRC_ALPHA && color.r == 0.0 && color.g == 0.0 && color.b == 0.0 && color.a == 0.0) \n\ + if (cur_blend_mode.x <= SRC_ALPHA && color.r == 0.0 && color.g == 0.0 && color.b == 0.0 && color.a == 0.0) \n\ ignore = true; \n\ break; \n\ case SRC_ALPHA: \n\ - if ((blend_mode.x == ZERO || blend_mode.x == INVERSE_SRC_ALPHA) && color.a == 1.0) \n\ + if ((cur_blend_mode.x == ZERO || cur_blend_mode.x == INVERSE_SRC_ALPHA) && color.a == 1.0) \n\ ignore = true; \n\ break; \n\ case INVERSE_SRC_ALPHA: \n\ - switch (blend_mode.x) // SRC \n\ + switch (cur_blend_mode.x) // SRC \n\ { \n\ case ZERO: \n\ case SRC_ALPHA: \n\ @@ -451,7 +446,7 @@ void main() \n\ pixel.color = color; \n\ pixel.depth = gl_FragDepth; \n\ pixel.seq_num = pp_Number; \n\ - pixel.blend_stencil = (blend_mode.x * 8u + blend_mode.y) * 256u + pp_Stencil; \n\ + pixel.blend_stencil = (cur_blend_mode.x * 8u + cur_blend_mode.y) * 256u + pp_Stencil; \n\ pixel.next = imageAtomicExchange(abufferPointerImg, coords, idx); \n\ pixels[idx] = pixel; \n\ } \n\ @@ -964,16 +959,11 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe s->pp_Number = glGetUniformLocation(s->program, "pp_Number"); s->pp_Stencil = glGetUniformLocation(s->program, "pp_Stencil"); - s->blend_mode0 = glGetUniformLocation(s->program, "blend_mode0"); - s->blend_mode1 = glGetUniformLocation(s->program, "blend_mode1"); - s->use_alpha0 = glGetUniformLocation(s->program, "use_alpha0"); - s->use_alpha1 = glGetUniformLocation(s->program, "use_alpha1"); - s->ignore_tex_alpha0 = glGetUniformLocation(s->program, "ignore_tex_alpha0"); - s->ignore_tex_alpha1 = glGetUniformLocation(s->program, "ignore_tex_alpha1"); - s->shading_instr0 = glGetUniformLocation(s->program, "shading_instr0"); - s->shading_instr1 = glGetUniformLocation(s->program, "shading_instr1"); - s->fog_control0 = glGetUniformLocation(s->program, "fog_control0"); - s->fog_control1 = glGetUniformLocation(s->program, "fog_control1"); + s->blend_mode = glGetUniformLocation(s->program, "blend_mode"); + s->use_alpha = glGetUniformLocation(s->program, "use_alpha"); + s->ignore_tex_alpha = glGetUniformLocation(s->program, "ignore_tex_alpha"); + s->shading_instr = glGetUniformLocation(s->program, "shading_instr"); + s->fog_control = glGetUniformLocation(s->program, "fog_control"); return glIsProgram(s->program)==GL_TRUE; } @@ -1053,6 +1043,9 @@ void gl_DebugOutput(GLenum source, { // ignore non-significant error/warning codes if(id == 131169 || id == 131185 || id == 131218 || id == 131204) return; + if (id == 131186) + // Warning when fetching the atomic_uint pixel count + return; printf("OpenGL Debug message (%d): %s\n", id, message); diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index b307c1bb5..d5e75f346 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -62,11 +62,11 @@ struct PipelineShader GLuint screen_size; GLuint pp_Number; GLuint pp_Stencil; - GLuint blend_mode0, blend_mode1; - GLuint use_alpha0, use_alpha1; - GLuint ignore_tex_alpha0, ignore_tex_alpha1; - GLuint shading_instr0, shading_instr1; - GLuint fog_control0, fog_control1; + GLuint blend_mode; + GLuint use_alpha; + GLuint ignore_tex_alpha; + GLuint shading_instr; + GLuint fog_control; // u32 cp_AlphaTest; s32 pp_ClipTestMode; @@ -162,6 +162,12 @@ struct ShaderUniforms_t TCW tcw0; TCW tcw1; + void setUniformArray(GLuint location, int v0, int v1) + { + int array[] = { v0, v1 }; + glUniform1iv(location, 2, array); + } + void Set(PipelineShader* s) { if (s->cp_AlphaTestValue!=-1) @@ -191,34 +197,22 @@ struct ShaderUniforms_t if (s->shade_scale_factor != -1) glUniform1f(s->shade_scale_factor, FPU_SHAD_SCALE.scale_factor / 256.f); - if (s->blend_mode0 != -1) { - u32 blend_mode[2] = { tsp0.SrcInstr, tsp0.DstInstr }; - glUniform2uiv(s->blend_mode0, 1, blend_mode); - } - if (s->blend_mode1 != -1) { - u32 blend_mode[2] = { tsp1.SrcInstr, tsp1.DstInstr }; - glUniform2uiv(s->blend_mode1, 1, blend_mode); + if (s->blend_mode != -1) { + u32 blend_mode[] = { tsp0.SrcInstr, tsp0.DstInstr, tsp1.SrcInstr, tsp1.DstInstr }; + glUniform2uiv(s->blend_mode, 2, blend_mode); } - if (s->use_alpha0 != -1) - glUniform1i(s->use_alpha0, tsp0.UseAlpha); - if (s->use_alpha1 != -1) - glUniform1i(s->use_alpha1, tsp1.UseAlpha); + if (s->use_alpha != -1) + setUniformArray(s->use_alpha, tsp0.UseAlpha, tsp1.UseAlpha); - if (s->ignore_tex_alpha0 != -1) - glUniform1i(s->ignore_tex_alpha0, tsp0.IgnoreTexA); - if (s->ignore_tex_alpha1 != -1) - glUniform1i(s->ignore_tex_alpha1, tsp1.IgnoreTexA); + if (s->ignore_tex_alpha != -1) + setUniformArray(s->ignore_tex_alpha, tsp0.IgnoreTexA, tsp1.IgnoreTexA); - if (s->shading_instr0 != -1) - glUniform1i(s->shading_instr0, tsp0.ShadInstr); - if (s->shading_instr1 != -1) - glUniform1i(s->shading_instr1, tsp1.ShadInstr); + if (s->shading_instr != -1) + setUniformArray(s->shading_instr, tsp0.ShadInstr, tsp1.ShadInstr); - if (s->fog_control0 != -1) - glUniform1i(s->fog_control0, tsp0.FogCtrl); - if (s->fog_control1 != -1) - glUniform1i(s->fog_control1, tsp1.FogCtrl); + if (s->fog_control != -1) + setUniformArray(s->fog_control, tsp0.FogCtrl, tsp1.FogCtrl); if (s->pp_Number != -1) glUniform1i(s->pp_Number, poly_number); @@ -244,12 +238,7 @@ extern GLuint opaqueTexId; #define ABUFFER_SIZE 256*1024*1024 #define ABUFFER_SIZE_STR "(256u * 1024u * 1024u)" -#define SHADER_HEADER "#version 140 \n\ -#extension GL_EXT_shader_image_load_store : enable \n\ -#extension GL_ARB_shader_storage_buffer_object : enable \n\ -#extension GL_ARB_shader_atomic_counters : enable \n\ -#extension GL_ARB_shader_image_size : enable \n\ -#extension GL_ARB_shading_language_420pack : enable \n\ +#define SHADER_HEADER "#version 430 \n\ \n\ #define ABUFFER_SIZE " ABUFFER_SIZE_STR " \n\ coherent uniform layout(size1x32, binding = 4) uimage2D abufferPointerImg; \n\ From 0bd53de8e68bcf6fd52a8771c7c01f53f317e5db Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 30 May 2018 22:41:22 +0200 Subject: [PATCH 18/65] Increased global_param_tr list (Rez). Add log to identify which list is overrun. --- core/hw/pvr/helper_classes.h | 12 ++++++++---- core/hw/pvr/ta_ctx.h | 24 +++++++++++++++--------- 2 files changed, 23 insertions(+), 13 deletions(-) diff --git a/core/hw/pvr/helper_classes.h b/core/hw/pvr/helper_classes.h index d52153d5b..2e1f01d63 100644 --- a/core/hw/pvr/helper_classes.h +++ b/core/hw/pvr/helper_classes.h @@ -8,6 +8,7 @@ struct List int size; bool* overrun; + const char *list_name; __forceinline int used() const { return size-avail; } __forceinline int bytes() const { return used()* sizeof(T); } @@ -17,6 +18,8 @@ struct List { *overrun |= true; Clear(); + if (list_name != NULL) + printf("List overrun for list %s\n", list_name); return daty; } @@ -45,7 +48,7 @@ struct List T* head() const { return daty-used(); } - void InitBytes(int maxbytes,bool* ovrn) + void InitBytes(int maxbytes,bool* ovrn, const char *name) { maxbytes-=maxbytes%sizeof(T); @@ -58,11 +61,12 @@ struct List overrun=ovrn; Clear(); + list_name = name; } - void Init(int maxsize,bool* ovrn) + void Init(int maxsize,bool* ovrn, const char *name) { - InitBytes(maxsize*sizeof(T),ovrn); + InitBytes(maxsize*sizeof(T),ovrn, name); } void Clear() @@ -76,4 +80,4 @@ struct List Clear(); free(daty); } -}; \ No newline at end of file +}; diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index d0e5f0e77..ab4d7ffae 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -194,17 +194,23 @@ struct TA_context { tad.Reset((u8*)OS_aligned_malloc(32, 2*1024*1024)); - rend.verts.InitBytes(2*1024*1024,&rend.Overrun); //up to 2 mb of vtx data/frame = ~ 48k vtx/frame - rend.idx.Init(60*1024,&rend.Overrun); //up to 60K indexes ( idx have stripification overhead ) - rend.global_param_op.Init(4096,&rend.Overrun); - rend.global_param_pt.Init(4096,&rend.Overrun); - rend.global_param_mvo.Init(4096,&rend.Overrun); - rend.global_param_tr.Init(4096,&rend.Overrun); - rend.global_param_mvo_tr.Init(4096,&rend.Overrun); + rend.verts.InitBytes(2*1024*1024, &rend.Overrun, "verts"); //up to 2 mb of vtx data/frame = ~ 48k vtx/frame + rend.idx.Init(60*1024, &rend.Overrun, "idx"); //up to 60K indexes ( idx have stripification overhead ) + rend.global_param_op.Init(4096, &rend.Overrun, "global_param_op"); + rend.global_param_pt.Init(4096, &rend.Overrun, "global_param_pt"); + rend.global_param_mvo.Init(4096, &rend.Overrun, "global_param_mvo"); +#if STRIPS_AS_PPARAMS + // That makes a lot of polyparams but this is required for proper sorting... + // Rez uses more than 8192 translucent polygons sometimes + rend.global_param_tr.Init(10240, &rend.Overrun, "global_param_tr"); +#else + rend.global_param_tr.Init(4096, &rend.Overrun, "global_param_tr"); +#endif + rend.global_param_mvo_tr.Init(4096, &rend.Overrun, "global_param_mvo_tr"); - rend.modtrig.Init(16384,&rend.Overrun); + rend.modtrig.Init(16384, &rend.Overrun, "modtrig"); - rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun); // 10 render passes + rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun, "render_passes"); // 10 render passes Reset(); } From bb6e33e0d56022cb6c1cac3ca0f805ae3f7f83c8 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Thu, 31 May 2018 20:06:58 +0200 Subject: [PATCH 19/65] Multipass rendering support --- core/rend/gles/abuffer.cpp | 50 +++++--------- core/rend/gles/gldraw.cpp | 133 ++++++++++++++++++++++--------------- core/rend/gles/gles.cpp | 13 ++-- core/rend/gles/gles.h | 5 ++ 4 files changed, 110 insertions(+), 91 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index a35c3a062..31cde0e25 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -379,7 +379,6 @@ void DrawTranslucentModVols(int first, int count) glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, 0); - glcache.Disable(GL_BLEND); glcache.Disable(GL_DEPTH_TEST); glcache.Disable(GL_STENCIL_TEST); @@ -395,7 +394,7 @@ void DrawTranslucentModVols(int first, int count) u32 cmv_count = count - 1; ISP_Modvol* params = &pvrrc.global_param_mvo_tr.head()[first]; - glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); //ISP_Modvol for (u32 cmv = 0; cmv < cmv_count; cmv++) @@ -415,35 +414,21 @@ void DrawTranslucentModVols(int first, int count) verify(mod_base > 0 && mod_base + sz <= pvrrc.modtrig.used()); - if (mv_mode == 0) //normal trigs + glcache.UseProgram(g_abuffer_tr_modvol_shader.program); + SetCull(ispc.CullMode); glCheck(); + glDrawArrays(GL_TRIANGLES, mod_base * 3, sz * 3); glCheck(); + + if (mv_mode == 1 || mv_mode == 2) { - glcache.UseProgram(g_abuffer_tr_modvol_shader.program); glCheck(); - SetCull(ispc.CullMode); glCheck(); - glDrawArrays(GL_TRIANGLES, mod_base * 3, sz * 3); glCheck(); - } - else if (mv_mode < 3) - { - while(sz) - { - //merge and clear all the prev. stencil bits + //Sum the area + glcache.UseProgram(g_abuffer_tr_modvol_final_shader.program); + glUniform1i(volume_mode_uniform, mv_mode); + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + DrawQuad(); + SetupModvolVBO(); - //Count Intersections (last poly) - glcache.UseProgram(g_abuffer_tr_modvol_shader.program); glCheck(); - SetCull(ispc.CullMode); glCheck(); - glDrawArrays(GL_TRIANGLES, mod_base * 3, 3); glCheck(); - - //Sum the area - glcache.UseProgram(g_abuffer_tr_modvol_final_shader.program); glCheck(); - glUniform1i(volume_mode_uniform, mv_mode); - glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - DrawQuad(); - SetupModvolVBO(); - - //update pointers - mod_last = mod_base + 1; - sz--; - mod_base++; - } + //update pointers + mod_last = mod_base + 1; } } } @@ -453,10 +438,9 @@ void renderABuffer(bool sortFragments) glcache.UseProgram(sortFragments ? g_abuffer_final_shader.program : g_abuffer_final_nosort_shader.program); ShaderUniforms.Set(&g_abuffer_final_shader); - glcache.Disable(GL_BLEND); glcache.Disable(GL_DEPTH_TEST); glcache.Disable(GL_CULL_FACE); - glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_BUFFER_UPDATE_BARRIER_BIT); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); DrawQuad(); glCheck(); @@ -464,8 +448,6 @@ void renderABuffer(bool sortFragments) glcache.UseProgram(g_abuffer_clear_shader.program); ShaderUniforms.Set(&g_abuffer_clear_shader); - glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT|GL_BUFFER_UPDATE_BARRIER_BIT); - // GLuint size = 0; // glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, 4, &size); // printf("ABUFFER %d pixels used\n", size); @@ -479,5 +461,7 @@ void renderABuffer(bool sortFragments) glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0 , sizeof(GLuint), &zero); glActiveTexture(GL_TEXTURE0); + + glMemoryBarrier(GL_ATOMIC_COUNTER_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); glCheck(); } diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 31c3bf825..6f21b1988 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -241,6 +241,7 @@ template glcache.StencilFunc(GL_ALWAYS,stencil,stencil); ShaderUniforms.stencil = stencil; + ShaderUniforms.depth_func = gp->isp.DepthMode; ShaderUniforms.Set(CurrentShader); if (CurrentShader->pp_Texture) @@ -280,14 +281,6 @@ template glActiveTexture(GL_TEXTURE0); } - if (Type==ListType_Translucent && !SortingEnabled) - { - glcache.Enable(GL_BLEND); - glcache.BlendFunc(SrcBlendGL[gp->tsp.SrcInstr],DstBlendGL[gp->tsp.DstInstr]); - } - else - glcache.Disable(GL_BLEND); - //set cull mode ! //cflip is required when exploding triangles for triangle sorting //gcflip is global clip flip, needed for when rendering to texture due to mirrored Y direction @@ -309,10 +302,11 @@ template glcache.DepthFunc(Zfunction[gp->isp.DepthMode]); } -// if (Type == ListType_Translucent) -// glcache.DepthMask(GL_FALSE); -// else + // Depth buffer is updated in pass 1 for OP and PT, but in pass 0 for TR (multipass only) + if (pass != 0 || Type == ListType_Translucent) glcache.DepthMask(!gp->isp.ZWriteDis); + else + glcache.DepthMask(GL_FALSE); } template @@ -325,32 +319,20 @@ void DrawList(const List& gply, int first, int count, int pass) return; //we want at least 1 PParam - - //set some 'global' modes for all primitives - - if (pass == 0) - { - glcache.Enable(GL_STENCIL_TEST); - glcache.StencilFunc(GL_ALWAYS,0,0); - glcache.StencilOp(GL_KEEP,GL_KEEP,GL_REPLACE); - } - else - { - glcache.StencilMask(0); - glcache.Disable(GL_STENCIL_TEST); - } - while(count-->0) { if (params->count>2) //this actually happens for some games. No idea why .. { - if (Type == ListType_Translucent && params->tsp.SrcInstr == 0 && params->tsp.DstInstr == 1) + if (pass != 0) { - // No-op - params++; - continue; + // No need to draw this one + if (Type == ListType_Translucent && params->tsp.SrcInstr == 0 && params->tsp.DstInstr == 1) + { + params++; + continue; + } } - ShaderUniforms.poly_number = params - gply.head() + 1; + ShaderUniforms.poly_number = params - gply.head(); SetGPState(params, pass); glDrawElements(GL_TRIANGLE_STRIP, params->count, GL_UNSIGNED_SHORT, (GLvoid*)(2*params->first)); glCheck(); } @@ -610,7 +592,20 @@ void DrawModVols(int first, int count) void renderABuffer(bool sortFragments); void DrawTranslucentModVols(int first, int count); -void CreateGeometryTexture() +GLuint CreateColorFBOTexture() +{ + GLuint texId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, texId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screen_width, screen_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texId, 0); + glCheck(); + + return texId; +} + +void CreateTextures() { glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); @@ -624,13 +619,7 @@ void CreateGeometryTexture() glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencilTexId, 0); glCheck(); glCheck(); - opaqueTexId = glcache.GenTexture(); - glcache.BindTexture(GL_TEXTURE_2D, opaqueTexId); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screen_width, screen_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); - glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, opaqueTexId, 0); - glCheck(); + opaqueTexId = CreateColorFBOTexture(); depthTexId = glcache.GenTexture(); glTextureView(depthTexId, GL_TEXTURE_2D, stencilTexId, GL_DEPTH32F_STENCIL8, 0, 1, 0, 1); @@ -646,12 +635,12 @@ void DrawStrips() { GLint output_fbo; - glGetIntegerv(GL_FRAMEBUFFER_BINDING, &output_fbo); + glGetIntegerv(GL_FRAMEBUFFER_BINDING, &output_fbo); // TODO pass fbo id as parameter if (geom_fbo == 0) { glGenFramebuffers(1, &geom_fbo); - CreateGeometryTexture(); + CreateTextures(); GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); @@ -662,7 +651,7 @@ void DrawStrips() glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); if (stencilTexId == 0) { - CreateGeometryTexture(); + CreateTextures(); } glcache.ClearColor(0, 0, 0, 0); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); @@ -679,20 +668,23 @@ void DrawStrips() //We use sampler 0 glActiveTexture(GL_TEXTURE0); + glcache.Disable(GL_BLEND); RenderPass previous_pass = {0}; - for (int render_pass = 0; render_pass < pvrrc.render_passes.used(); render_pass++) { + int render_pass_count = pvrrc.render_passes.used(); + + for (int render_pass = 0; render_pass < render_pass_count; render_pass++) + { const RenderPass& current_pass = pvrrc.render_passes.head()[render_pass]; - //initial state - glcache.Enable(GL_DEPTH_TEST); - glcache.DepthMask(GL_TRUE); - glcache.Disable(GL_BLEND); // - // PASS 1: Geometry pass to update the depth and stencil + // PASS 1: Geometry pass to update the stencil // - glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glcache.Enable(GL_DEPTH_TEST); + glcache.DepthMask(GL_FALSE); + glcache.Enable(GL_STENCIL_TEST); + glcache.StencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 0); DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 0); @@ -704,6 +696,7 @@ void DrawStrips() // PASS 2: Render OP and PT to fbo // glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glcache.Disable(GL_STENCIL_TEST); // Bind stencil buffer for the fragment shader (shadowing) glActiveTexture(GL_TEXTURE3); @@ -711,8 +704,6 @@ void DrawStrips() glActiveTexture(GL_TEXTURE0); glCheck(); -// Multipass: render on generated tex of previous pass? -// FIXME re-rendering on same depth buffer: what if GL_LESS is used? //Opaque DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 1); @@ -737,14 +728,48 @@ void DrawStrips() glActiveTexture(GL_TEXTURE0); //Alpha blended - DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); // 3 because pass 2 is no more glCheck(); // Translucent modifier volumes DrawTranslucentModVols(previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count); - // FIXME Depth of translucent poly must be used for next render pass if any - // FIXME Multipass in general... + if (render_pass < render_pass_count - 1) + { + // + // PASS 3b: Geometry pass with TR to update the depth for the next TA render pass + // + // Unbind depth texture + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + + glcache.Enable(GL_DEPTH_TEST); + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 0); + + // + // PASS 3c: Render a-buffer to temporary texture + // + GLuint texId = CreateColorFBOTexture(); + + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glActiveTexture(GL_TEXTURE0); + glBindSampler(0, 0); + glBindTexture(GL_TEXTURE_2D, opaqueTexId); + + renderABuffer(pvrrc.isAutoSort); + SetupMainVBO(); + + glcache.DeleteTextures(1, &opaqueTexId); + opaqueTexId = texId; + + // Clear the stencil from this pass + glStencilMask(0xFF); + glClear(GL_STENCIL_BUFFER_BIT); + + glCheck(); + } previous_pass = current_pass; } diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 5996c8250..42a127c82 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -231,6 +231,7 @@ uniform int pp_Number; \n\ uniform usampler2D shadow_stencil; \n\ uniform sampler2D DepthTex; \n\ uniform uint pp_Stencil; \n\ +uniform int pp_DepthFunc; \n\ \n\ uniform uvec2 blend_mode[2]; \n\ #if pp_TwoVolumes == 1 \n\ @@ -259,10 +260,13 @@ void main() \n\ \n\ #if PASS == 3 \n\ // Manual depth testing \n\ - highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ - // FIXME this causes dots to appear. Loss of precision? \n\ - if (gl_FragDepth > frontDepth) \n\ - discard; \n\ + // Depth func Always seems to be needed ? \n\ + if (pp_DepthFunc != 7) // TODO Use a #def \n\ + { \n\ + highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ + if (gl_FragDepth > frontDepth) \n\ + discard; \n\ + } \n\ #endif \n\ // Clip outside the box \n\ #if pp_ClipTestMode==1 \n\ @@ -958,6 +962,7 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe s->pp_Number = glGetUniformLocation(s->program, "pp_Number"); s->pp_Stencil = glGetUniformLocation(s->program, "pp_Stencil"); + s->pp_DepthFunc = glGetUniformLocation(s->program, "pp_DepthFunc"); s->blend_mode = glGetUniformLocation(s->program, "blend_mode"); s->use_alpha = glGetUniformLocation(s->program, "use_alpha"); diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index d5e75f346..e786377f6 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -62,6 +62,7 @@ struct PipelineShader GLuint screen_size; GLuint pp_Number; GLuint pp_Stencil; + GLuint pp_DepthFunc; GLuint blend_mode; GLuint use_alpha; GLuint ignore_tex_alpha; @@ -161,6 +162,7 @@ struct ShaderUniforms_t TSP tsp1; TCW tcw0; TCW tcw1; + int depth_func; void setUniformArray(GLuint location, int v0, int v1) { @@ -219,6 +221,9 @@ struct ShaderUniforms_t if (s->pp_Stencil != -1) glUniform1ui(s->pp_Stencil, stencil); + + if (s->pp_DepthFunc != -1) + glUniform1i(s->pp_DepthFunc, depth_func); } }; From e2201c7dd0b5fc5b34d7d2b899251b69620582b2 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Thu, 31 May 2018 21:31:20 +0200 Subject: [PATCH 20/65] Skip empty render passes --- core/rend/gles/gldraw.cpp | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 6f21b1988..e1282c957 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -677,6 +677,29 @@ void DrawStrips() { const RenderPass& current_pass = pvrrc.render_passes.head()[render_pass]; + // Check if we can skip this pass in case nothing is drawn (Cosmic Smash) + bool skip = true; + for (int j = previous_pass.op_count; skip && j < current_pass.op_count; j++) + { + if (pvrrc.global_param_op.head()[j].count > 2) + skip = false; + } + for (int j = previous_pass.pt_count; skip && j < current_pass.pt_count; j++) + { + if (pvrrc.global_param_pt.head()[j].count > 2) + skip = false; + } + for (int j = previous_pass.tr_count; skip && j < current_pass.tr_count; j++) + { + if (pvrrc.global_param_tr.head()[j].count > 2) + skip = false; + } + if (skip) + { + previous_pass = current_pass; + continue; + } + // // PASS 1: Geometry pass to update the stencil // From 33b378cbf5d3073d2b755f464ea9cd5a7c2eb0fb Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Thu, 31 May 2018 21:35:18 +0200 Subject: [PATCH 21/65] Dynamic pixel buffer resizing --- core/rend/gles/abuffer.cpp | 31 ++++++++++++++++++++++++------- core/rend/gles/gles.cpp | 2 +- core/rend/gles/gles.h | 6 ------ 3 files changed, 25 insertions(+), 14 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 31cde0e25..b722d9240 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -21,6 +21,8 @@ static GLuint g_quadVertexArray = 0; static int g_imageWidth = 0; static int g_imageHeight = 0; +GLuint pixel_buffer_size = 64 * 1024 * 1024; // Initial size 64 MB + #define MAX_PIXELS_PER_FRAGMENT "32" static const char *final_shader_source = SHADER_HEADER "\ @@ -200,7 +202,7 @@ void main(void) \n\ { \n\ \n\ uint idx = imageLoad(abufferPointerImg, coords).x; \n\ - if ((idx + 1u) * 32u - 1u >= ABUFFER_SIZE) \n\ + if (idx >= pixels.length()) // FIXME Shouldn't be necessary \n\ discard; \n\ int list_len = 0; \n\ while (idx != EOL) { \n\ @@ -262,7 +264,7 @@ void initABuffer() // Bind it glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer); // Declare storage - glBufferData(GL_SHADER_STORAGE_BUFFER, ABUFFER_SIZE, NULL, GL_DYNAMIC_COPY); + glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY); glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer); glCheck(); } @@ -448,12 +450,27 @@ void renderABuffer(bool sortFragments) glcache.UseProgram(g_abuffer_clear_shader.program); ShaderUniforms.Set(&g_abuffer_clear_shader); -// GLuint size = 0; -// glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, 4, &size); -// printf("ABUFFER %d pixels used\n", size); -// if ((size + 1) * 32 - 1 >= ABUFFER_SIZE) -// printf("ABUFFER OVERRUN %d pixels\n", size); + GLuint max_pixel_index = 0; + glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, 4, &max_pixel_index); +// printf("ABUFFER %d pixels used\n", max_pixel_index); + if ((max_pixel_index + 1) * 32 - 1 >= pixel_buffer_size) + { + GLint64 size; + glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &size); + if (pixel_buffer_size == size) + printf("A-buffer overflow: %d pixels. Buffer size already maxed out\n", max_pixel_index); + else + { + pixel_buffer_size = (GLuint)min(2 * (GLint64)pixel_buffer_size, size); + printf("A-buffer overflow: %d pixels. Resizing buffer to %d MB\n", max_pixel_index, pixel_buffer_size / 1024 / 1024); + + glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer); + glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer); + glCheck(); + } + } DrawQuad(); glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 42a127c82..e6673ed78 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -442,7 +442,7 @@ void main() \n\ { \n\ ivec2 coords = ivec2(gl_FragCoord.xy); \n\ uint idx = atomicCounterIncrement(buffer_index); \n\ - if ((idx + 1u) * 32u - 1u >= ABUFFER_SIZE) { \n\ + if (idx >= pixels.length()) { \n\ discard; \n\ return; \n\ } \n\ diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index e786377f6..3fa3c72a4 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -238,14 +238,8 @@ extern GLuint stencilTexId; extern GLuint depthTexId; extern GLuint opaqueTexId; -// Must match! -// in bytes -#define ABUFFER_SIZE 256*1024*1024 -#define ABUFFER_SIZE_STR "(256u * 1024u * 1024u)" - #define SHADER_HEADER "#version 430 \n\ \n\ -#define ABUFFER_SIZE " ABUFFER_SIZE_STR " \n\ coherent uniform layout(size1x32, binding = 4) uimage2D abufferPointerImg; \n\ struct Pixel { \n\ mediump vec4 color; \n\ From 6fdc9fb0aaaf7e5aaa836ec2fff15cd50c3be791 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 2 Jun 2018 19:00:46 +0200 Subject: [PATCH 22/65] Fixed depth problems due to reusing the same depth buffer twice. Fixed widescreen issue with quads: left margin wasn't drawn or cleared. Was also interfering with RTT. Fixed problem with TR modifier volumes atomic operations. --- core/rend/gles/abuffer.cpp | 95 +++++++------- core/rend/gles/gldraw.cpp | 251 ++++++++++++++++++++++--------------- core/rend/gles/gles.cpp | 92 ++++++++------ core/rend/gles/gles.h | 19 +-- core/rend/gles/gltex.cpp | 10 +- 5 files changed, 271 insertions(+), 196 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index b722d9240..19e563557 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -30,7 +30,6 @@ static const char *final_shader_source = SHADER_HEADER "\ #define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT " \n\ \n\ layout(binding = 0) uniform sampler2D tex; \n\ -uniform lowp vec2 screen_size; \n\ uniform highp float shade_scale_factor; \n\ \n\ out vec4 FragColor; \n\ @@ -94,7 +93,7 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ // Sort fragments in local memory array \n\ bubbleSort(num_frag); \n\ \n\ - vec4 finalColor = texture(tex, gl_FragCoord.xy / screen_size); \n\ + vec4 finalColor = texture(tex, gl_FragCoord.xy / textureSize(tex, 0)); \n\ for (int i = 0; i < num_frag; i++) { \n\ vec4 srcColor = pixel_list[i].color; \n\ if ((pixel_list[i].blend_stencil & 0x81u) == 0x81u) \n\ @@ -191,6 +190,7 @@ void main(void) \n\ static const char *tr_modvol_shader_source = SHADER_HEADER "\ #define LAST_PASS %d \n\ +#define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT " \n\ uniform int volume_mode; \n\ void main(void) \n\ { \n\ @@ -213,26 +213,25 @@ void main(void) \n\ if (gl_FragDepth <= pixels[idx].depth) \n\ atomicXor(pixels[idx].blend_stencil, 2u); \n\ #else \n\ + uint prev_val; \n\ switch (volume_mode) \n\ { \n\ case 1: // Inclusion volume \n\ - if ((stencil & 2u) == 2u) \n\ - pixels[idx].blend_stencil = bitfieldInsert(stencil, 1u, 0, 2); \n\ - else \n\ - pixels[idx].blend_stencil = bitfieldInsert(stencil, 0u, 1, 1); \n\ + prev_val = atomicAnd(pixels[idx].blend_stencil, 0xFFFFFFFDu); \n\ + if ((prev_val & 3u) == 2u) \n\ + pixels[idx].blend_stencil = bitfieldInsert(stencil, 1u, 0, 1); \n\ break; \n\ case 2: // Exclusion volume \n\ - if ((stencil & 3u) == 1u) \n\ - pixels[idx].blend_stencil = bitfieldInsert(stencil, 1u, 0, 2); \n\ - else \n\ - pixels[idx].blend_stencil = bitfieldInsert(stencil, 0u, 0, 2); \n\ + prev_val = atomicAnd(pixels[idx].blend_stencil, 0xFFFFFFFCu); \n\ + if ((prev_val & 3u) == 1u) \n\ + pixels[idx].blend_stencil = bitfieldInsert(stencil, 1u, 0, 1); \n\ break; \n\ } \n\ #endif \n\ } \n\ idx = pixels[idx].next; \n\ - list_len++; // FIXME Why do I need this? Linked list corruption? \n\ - if (list_len >= 32) \n\ + list_len++; \n\ + if (list_len >= MAX_PIXELS_PER_FRAGMENT) \n\ break; \n\ } \n\ } \n\ @@ -246,16 +245,19 @@ void initABuffer() g_imageWidth = screen_width; g_imageHeight = screen_height; - if (pixels_pointers == 0) - pixels_pointers = glcache.GenTexture(); - glActiveTexture(GL_TEXTURE4); - glBindTexture(GL_TEXTURE_2D, pixels_pointers); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - //Uses GL_R32F instead of GL_R32I that is not working in R257.15 - glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, g_imageWidth, g_imageHeight, 0, GL_RED, GL_FLOAT, 0); - glBindImageTexture(4, pixels_pointers, 0, false, 0, GL_READ_WRITE, GL_R32UI); - glCheck(); + if (g_imageWidth > 0 && g_imageHeight > 0) + { + if (pixels_pointers == 0) + pixels_pointers = glcache.GenTexture(); + glActiveTexture(GL_TEXTURE4); + glBindTexture(GL_TEXTURE_2D, pixels_pointers); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + //Uses GL_R32F instead of GL_R32I that is not working in R257.15 + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, g_imageWidth, g_imageHeight, 0, GL_RED, GL_FLOAT, 0); + glBindImageTexture(4, pixels_pointers, 0, false, 0, GL_READ_WRITE, GL_R32UI); + glCheck(); + } if (pixels_buffer == 0 ) { @@ -275,21 +277,22 @@ void initABuffer() glGenBuffers(1, &atomic_buffer); // Bind it glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer); - // Declare storage - glBufferData(GL_ATOMIC_COUNTER_BUFFER, 4, NULL, GL_DYNAMIC_COPY); + // Declare storage. Using GL_DYNAMIC_READ instead of GL_DYNAMIC_COPY as the latter makes + // reading the counter after each frame very slow. + glBufferData(GL_ATOMIC_COUNTER_BUFFER, 4, NULL, GL_DYNAMIC_READ); glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomic_buffer); glCheck(); } if (g_abuffer_final_shader.program == 0) { - char source[8192]; + char source[16384]; sprintf(source, final_shader_source, 1); CompilePipelineShader(&g_abuffer_final_shader, source); } if (g_abuffer_final_nosort_shader.program == 0) { - char source[8192]; + char source[16384]; sprintf(source, final_shader_source, 0); CompilePipelineShader(&g_abuffer_final_nosort_shader, source); } @@ -297,13 +300,13 @@ void initABuffer() CompilePipelineShader(&g_abuffer_clear_shader, clear_shader_source); if (g_abuffer_tr_modvol_shader.program == 0) { - char source[8192]; + char source[16384]; sprintf(source, tr_modvol_shader_source, 0); CompilePipelineShader(&g_abuffer_tr_modvol_shader, source); } if (g_abuffer_tr_modvol_final_shader.program == 0) { - char source[8192]; + char source[16384]; sprintf(source, tr_modvol_shader_source, 1); CompilePipelineShader(&g_abuffer_tr_modvol_final_shader, source); volume_mode_uniform = glGetUniformLocation(g_abuffer_tr_modvol_final_shader.program, "volume_mode"); @@ -321,25 +324,26 @@ void initABuffer() void reshapeABuffer(int w, int h) { if (w != g_imageWidth || h != g_imageHeight) { - glcache.DeleteTextures(1, &pixels_pointers); - pixels_pointers = 0; - - // FIXME We might need to resize the pixels_buffer accordingly + if (pixels_pointers != 0) + { + glcache.DeleteTextures(1, &pixels_pointers); + pixels_pointers = 0; + } initABuffer(); } } - -void DrawQuad() +void DrawQuad(int width, int height) { glBindVertexArray(g_quadVertexArray); + float xmin = (ShaderUniforms.scale_coefs[2] - 1) * screen_width / 2; struct Vertex vertices[] = { - { 0, screen_height, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 }, - { 0, 0, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 }, - { screen_width, screen_height, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 }, - { screen_width, 0, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 }, + { xmin, height, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 }, + { xmin, 0, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 }, + { width, height, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 }, + { width, 0, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 }, }; GLushort indices[] = { 0, 1, 2, 1, 3 }; @@ -418,6 +422,8 @@ void DrawTranslucentModVols(int first, int count) glcache.UseProgram(g_abuffer_tr_modvol_shader.program); SetCull(ispc.CullMode); glCheck(); + + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); glDrawArrays(GL_TRIANGLES, mod_base * 3, sz * 3); glCheck(); if (mv_mode == 1 || mv_mode == 2) @@ -425,8 +431,9 @@ void DrawTranslucentModVols(int first, int count) //Sum the area glcache.UseProgram(g_abuffer_tr_modvol_final_shader.program); glUniform1i(volume_mode_uniform, mv_mode); + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - DrawQuad(); + DrawQuad(viewport_width, viewport_height); SetupModvolVBO(); //update pointers @@ -443,7 +450,8 @@ void renderABuffer(bool sortFragments) glcache.Disable(GL_DEPTH_TEST); glcache.Disable(GL_CULL_FACE); glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); - DrawQuad(); + + DrawQuad(viewport_width, viewport_height); glCheck(); @@ -471,7 +479,11 @@ void renderABuffer(bool sortFragments) glCheck(); } } - DrawQuad(); + + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + DrawQuad(viewport_width, viewport_height); + + glMemoryBarrier(GL_ATOMIC_COUNTER_BARRIER_BIT); glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer); GLuint zero = 0; @@ -479,6 +491,5 @@ void renderABuffer(bool sortFragments) glActiveTexture(GL_TEXTURE0); - glMemoryBarrier(GL_ATOMIC_COUNTER_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); glCheck(); } diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index e1282c957..6a34786d9 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -85,6 +85,8 @@ GLuint stencilTexId; GLuint opaqueTexId; GLuint depthTexId; GLuint texSamplers[2]; +GLuint depth_fbo; +GLuint depthSaveTexId; s32 SetTileClip(u32 val, bool set) { @@ -191,6 +193,9 @@ template } else { + // Two volumes mode only supported for OP and PT + bool two_volumes_mode = (gp->tsp1.full != -1) && Type != ListType_Translucent; + shaderId = GetProgramID(Type == ListType_Punch_Through ? 1 : 0, clipping + 1, gp->pcw.Texture, @@ -199,7 +204,7 @@ template gp->tsp.ShadInstr, gp->pcw.Offset, gp->tsp.FogCtrl, - gp->tsp1.full != -1, + two_volumes_mode, pass); CurrentShader = gl.getShader(shaderId); if (CurrentShader->program == -1) { @@ -211,7 +216,7 @@ template CurrentShader->pp_ShadInstr = gp->tsp.ShadInstr; CurrentShader->pp_Offset = gp->pcw.Offset; CurrentShader->pp_FogCtrl = gp->tsp.FogCtrl; - CurrentShader->pp_TwoVolumes = gp->tsp1.full != -1; + CurrentShader->pp_TwoVolumes = two_volumes_mode; CurrentShader->pass = pass; CompilePipelineShader(CurrentShader); } @@ -302,8 +307,8 @@ template glcache.DepthFunc(Zfunction[gp->isp.DepthMode]); } - // Depth buffer is updated in pass 1 for OP and PT, but in pass 0 for TR (multipass only) - if (pass != 0 || Type == ListType_Translucent) + // Depth buffer is updated in pass 0 (and also in pass 1 for OP PT) + if (pass < 2) glcache.DepthMask(!gp->isp.ZWriteDis); else glcache.DepthMask(GL_FALSE); @@ -607,8 +612,6 @@ GLuint CreateColorFBOTexture() void CreateTextures() { - glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); - stencilTexId = glcache.GenTexture(); glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); glCheck(); glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); // OpenGL >= 4.3 @@ -631,15 +634,13 @@ void CreateTextures() glCheck(); } -void DrawStrips() +void DrawStrips(GLuint output_fbo) { - - GLint output_fbo; - glGetIntegerv(GL_FRAMEBUFFER_BINDING, &output_fbo); // TODO pass fbo id as parameter - if (geom_fbo == 0) { glGenFramebuffers(1, &geom_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + CreateTextures(); GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); @@ -650,19 +651,17 @@ void DrawStrips() { glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); if (stencilTexId == 0) - { CreateTextures(); - } - glcache.ClearColor(0, 0, 0, 0); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - glcache.Disable(GL_SCISSOR_TEST); - glcache.DepthMask(GL_TRUE); - glStencilMask(0xFF); - glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glCheck(); } if (texSamplers[0] == 0) glGenSamplers(2, texSamplers); + glcache.ClearColor(0, 0, 0, 0); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glcache.DepthMask(GL_TRUE); + glStencilMask(0xFF); + glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glCheck(); + SetupMainVBO(); //Draw the strips ! @@ -677,121 +676,171 @@ void DrawStrips() { const RenderPass& current_pass = pvrrc.render_passes.head()[render_pass]; - // Check if we can skip this pass in case nothing is drawn (Cosmic Smash) - bool skip = true; - for (int j = previous_pass.op_count; skip && j < current_pass.op_count; j++) + // Check if we can skip this pass, in part or completely, in case nothing is drawn (Cosmic Smash) + bool skip_op_pt = false; // true; + bool skip_tr = false; // true; + for (int j = previous_pass.op_count; skip_op_pt && j < current_pass.op_count; j++) { if (pvrrc.global_param_op.head()[j].count > 2) - skip = false; + skip_op_pt = false; } - for (int j = previous_pass.pt_count; skip && j < current_pass.pt_count; j++) + for (int j = previous_pass.pt_count; skip_op_pt && j < current_pass.pt_count; j++) { if (pvrrc.global_param_pt.head()[j].count > 2) - skip = false; + skip_op_pt = false; } - for (int j = previous_pass.tr_count; skip && j < current_pass.tr_count; j++) + for (int j = previous_pass.tr_count; skip_tr && j < current_pass.tr_count; j++) { if (pvrrc.global_param_tr.head()[j].count > 2) - skip = false; + skip_tr = false; } - if (skip) + if (skip_op_pt && skip_tr) { previous_pass = current_pass; continue; } - // - // PASS 1: Geometry pass to update the stencil - // - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - glcache.Enable(GL_DEPTH_TEST); - glcache.DepthMask(GL_FALSE); - glcache.Enable(GL_STENCIL_TEST); - glcache.StencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); - - DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 0); - DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 0); - - // Modifier volumes - DrawModVols(previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); - - // - // PASS 2: Render OP and PT to fbo - // - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - glcache.Disable(GL_STENCIL_TEST); - - // Bind stencil buffer for the fragment shader (shadowing) - glActiveTexture(GL_TEXTURE3); - glBindTexture(GL_TEXTURE_2D, stencilTexId); - glActiveTexture(GL_TEXTURE0); - glCheck(); - - //Opaque - DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 1); - - //Alpha tested - DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 1); - - // Unbind stencil - glActiveTexture(GL_TEXTURE3); - glBindTexture(GL_TEXTURE_2D, 0); - glActiveTexture(GL_TEXTURE0); - - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - - // - // PASS 3: Render TR to a-buffers - // - SetupMainVBO(); - glcache.Disable(GL_DEPTH_TEST); - - glActiveTexture(GL_TEXTURE2); - glBindTexture(GL_TEXTURE_2D, depthTexId); - glActiveTexture(GL_TEXTURE0); - - //Alpha blended - DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); // 3 because pass 2 is no more - glCheck(); - - // Translucent modifier volumes - DrawTranslucentModVols(previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count); - - if (render_pass < render_pass_count - 1) + if (!skip_op_pt) { // - // PASS 3b: Geometry pass with TR to update the depth for the next TA render pass + // PASS 1: Geometry pass to update depth and stencil // - // Unbind depth texture - glActiveTexture(GL_TEXTURE2); - glBindTexture(GL_TEXTURE_2D, 0); - glActiveTexture(GL_TEXTURE0); + if (render_pass > 0) + { + // Make a copy of the depth buffer that will be reused in pass 2 + if (depth_fbo == 0) + glGenFramebuffers(1, &depth_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, depth_fbo); + if (depthSaveTexId == 0) + { + depthSaveTexId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, depthSaveTexId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH32F_STENCIL8, screen_width, screen_height, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, NULL); glCheck(); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthSaveTexId, 0); glCheck(); + } + GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); + verify(uStatus == GL_FRAMEBUFFER_COMPLETE); + glBindFramebuffer(GL_READ_FRAMEBUFFER, geom_fbo); + glBlitFramebuffer(0, 0, screen_width, screen_height, 0, 0, screen_width, screen_height, GL_DEPTH_BUFFER_BIT, GL_NEAREST); + glCheck(); + + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + } + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); glcache.Enable(GL_DEPTH_TEST); - DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 0); + glcache.DepthMask(GL_TRUE); + glcache.Enable(GL_STENCIL_TEST); + glcache.StencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + + DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 0); + DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 0); + + // Modifier volumes + DrawModVols(previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); // - // PASS 3c: Render a-buffer to temporary texture + // PASS 2: Render OP and PT to fbo // - GLuint texId = CreateColorFBOTexture(); + if (render_pass == 0) + { + glcache.DepthMask(GL_TRUE); + glClear(GL_DEPTH_BUFFER_BIT); + } + else + { + // Restore the depth buffer from the last render pass + // FIXME This is pretty slow apparently (CS) + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, geom_fbo); + glBindFramebuffer(GL_READ_FRAMEBUFFER, depth_fbo); + glBlitFramebuffer(0, 0, screen_width, screen_height, 0, 0, screen_width, screen_height, GL_DEPTH_BUFFER_BIT, GL_NEAREST); + glCheck(); + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + } glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glcache.Disable(GL_STENCIL_TEST); + // Bind stencil buffer for the fragment shader (shadowing) + glActiveTexture(GL_TEXTURE3); + glBindTexture(GL_TEXTURE_2D, stencilTexId); glActiveTexture(GL_TEXTURE0); - glBindSampler(0, 0); - glBindTexture(GL_TEXTURE_2D, opaqueTexId); + glCheck(); - renderABuffer(pvrrc.isAutoSort); + //Opaque + DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 1); + + //Alpha tested + DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 1); + + // Unbind stencil + glActiveTexture(GL_TEXTURE3); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + } + + if (!skip_tr) + { + // + // PASS 3: Render TR to a-buffers + // SetupMainVBO(); + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glcache.Disable(GL_DEPTH_TEST); - glcache.DeleteTextures(1, &opaqueTexId); - opaqueTexId = texId; + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, depthTexId); + glActiveTexture(GL_TEXTURE0); + //Alpha blended + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); // 3 because pass 2 is no more + glCheck(); + + // Translucent modifier volumes + DrawTranslucentModVols(previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count); + + if (render_pass < render_pass_count - 1) + { + // + // PASS 3b: Geometry pass with TR to update the depth for the next TA render pass + // + // Unbind depth texture + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + + glcache.Enable(GL_DEPTH_TEST); + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 0); + + // + // PASS 3c: Render a-buffer to temporary texture + // + GLuint texId = CreateColorFBOTexture(); + + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glActiveTexture(GL_TEXTURE0); + glBindSampler(0, 0); + glBindTexture(GL_TEXTURE_2D, opaqueTexId); + + renderABuffer(pvrrc.isAutoSort); + SetupMainVBO(); + + glcache.DeleteTextures(1, &opaqueTexId); + opaqueTexId = texId; + + glCheck(); + } + } + + if (!skip_op_pt && render_pass < render_pass_count - 1) + { // Clear the stencil from this pass glStencilMask(0xFF); glClear(GL_STENCIL_BUFFER_BIT); - - glCheck(); } previous_pass = current_pass; diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index e6673ed78..72b06fcb3 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -60,6 +60,7 @@ Tile clip float fb_scale_x,fb_scale_y; float scale_x, scale_y; +int viewport_width, viewport_height; #ifndef GLES #define attr "in" @@ -210,7 +211,7 @@ const char* PixelPipelineShader = SHADER_HEADER #define DST_ALPHA 6u \n\ #define INVERSE_DST_ALPHA 7u \n\ \n\ -#if pp_TwoVolumes == 1 // FIXME This is not needed for pass 3 (TR) and causes issues? Fix it Felix!\n\ +#if pp_TwoVolumes == 1 \n\ #define IF(x) if (x) \n\ #else \n\ #define IF(x) \n\ @@ -224,7 +225,6 @@ uniform lowp vec3 sp_FOG_COL_RAM,sp_FOG_COL_VERT; \n\ uniform highp vec2 sp_LOG_FOG_COEFS; \n\ uniform highp float sp_FOG_DENSITY; \n\ uniform highp float shade_scale_factor; \n\ -uniform lowp vec2 screen_size; \n\ uniform sampler2D tex0, tex1; \n\ uniform sampler2D fog_table; \n\ uniform int pp_Number; \n\ @@ -263,7 +263,7 @@ void main() \n\ // Depth func Always seems to be needed ? \n\ if (pp_DepthFunc != 7) // TODO Use a #def \n\ { \n\ - highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / screen_size).r; \n\ + highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / textureSize(DepthTex, 0)).r; \n\ if (gl_FragDepth > frontDepth) \n\ discard; \n\ } \n\ @@ -293,7 +293,7 @@ void main() \n\ int cur_shading_instr = shading_instr[0]; \n\ int cur_fog_control = fog_control[0]; \n\ #if PASS == 1 \n\ - uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / screen_size); \n\ + uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / textureSize(shadow_stencil, 0)); \n\ if (stencil.r == 0x81u) { \n\ color = vtx_base1; \n\ offset = vtx_offs1; \n\ @@ -366,7 +366,7 @@ void main() \n\ } \n\ #endif\n\ #if PASS == 1 && pp_TwoVolumes == 0 \n\ - uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / screen_size); \n\ + uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / textureSize(shadow_stencil, 0)); \n\ if (stencil.r == 0x81u) \n\ color.rgb *= shade_scale_factor; \n\ #endif\n\ @@ -386,74 +386,73 @@ void main() \n\ FRAGCOL " = color; \n\ #elif PASS > 1 \n\ // Discard as many pixels as possible \n\ - bool ignore = false; \n\ switch (cur_blend_mode.y) // DST \n\ { \n\ case ONE: \n\ - switch (cur_blend_mode.x) \n\ + switch (cur_blend_mode.x) // SRC \n\ { \n\ case ZERO: \n\ - ignore = true; \n\ - break; \n\ + discard; \n\ case ONE: \n\ case OTHER_COLOR: \n\ case INVERSE_OTHER_COLOR: \n\ - ignore = color.r == 0.0 && color.g == 0.0 && color.b == 0.0 && color.a == 0.0; \n\ + if (color == vec4(0, 0, 0, 0)) \n\ + discard; \n\ break; \n\ case SRC_ALPHA: \n\ - ignore = (color.r == 0.0 && color.g == 0.0 && color.b == 0.0) || color.a == 0.0; \n\ + if (color.rgb == vec3(0, 0, 0) || color.a == 0) \n\ + discard; \n\ break; \n\ case INVERSE_SRC_ALPHA: \n\ - ignore = (color.r == 0.0 && color.g == 0.0 && color.b == 0.0) || color.a == 1.0; \n\ + if (color.rgb == vec3(0, 0, 0) || color.a == 1) \n\ + discard; \n\ break; \n\ } \n\ break; \n\ case OTHER_COLOR: \n\ - if (cur_blend_mode.x == ZERO && color.r == 1.0 && color.g == 1.0 && color.b == 1.0 && color.a == 1.0) \n\ - ignore = true; \n\ + if (cur_blend_mode.x == ZERO && color == vec4(1, 1, 1, 1)) \n\ + discard; \n\ break; \n\ case INVERSE_OTHER_COLOR: \n\ - if (cur_blend_mode.x <= SRC_ALPHA && color.r == 0.0 && color.g == 0.0 && color.b == 0.0 && color.a == 0.0) \n\ - ignore = true; \n\ + if (cur_blend_mode.x <= SRC_ALPHA && color == vec4(0, 0, 0, 0)) \n\ + discard; \n\ break; \n\ case SRC_ALPHA: \n\ - if ((cur_blend_mode.x == ZERO || cur_blend_mode.x == INVERSE_SRC_ALPHA) && color.a == 1.0) \n\ - ignore = true; \n\ + if ((cur_blend_mode.x == ZERO || cur_blend_mode.x == INVERSE_SRC_ALPHA) && color.a == 1) \n\ + discard; \n\ break; \n\ case INVERSE_SRC_ALPHA: \n\ switch (cur_blend_mode.x) // SRC \n\ { \n\ case ZERO: \n\ case SRC_ALPHA: \n\ - ignore = color.a == 0.0; \n\ + if (color.a == 0) \n\ + discard; \n\ break; \n\ case ONE: \n\ case OTHER_COLOR: \n\ case INVERSE_OTHER_COLOR: \n\ - ignore = color.r == 0.0 && color.g == 0.0 && color.b == 0.0 && color.a == 0.0; \n\ + if (color == vec4(0, 0, 0, 0)) \n\ + discard; \n\ break; \n\ } \n\ break; \n\ } \n\ \n\ - \n\ - \n\ - if (!ignore) \n\ - { \n\ - ivec2 coords = ivec2(gl_FragCoord.xy); \n\ - uint idx = atomicCounterIncrement(buffer_index); \n\ - if (idx >= pixels.length()) { \n\ - discard; \n\ - return; \n\ - } \n\ - Pixel pixel; \n\ - pixel.color = color; \n\ - pixel.depth = gl_FragDepth; \n\ - pixel.seq_num = pp_Number; \n\ - pixel.blend_stencil = (cur_blend_mode.x * 8u + cur_blend_mode.y) * 256u + pp_Stencil; \n\ - pixel.next = imageAtomicExchange(abufferPointerImg, coords, idx); \n\ - pixels[idx] = pixel; \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + uint idx = getNextPixelIndex(); \n\ + if (idx >= pixels.length()) { \n\ + discard; \n\ + return; \n\ } \n\ + Pixel pixel; \n\ + pixel.color = color; \n\ + pixel.depth = gl_FragDepth; \n\ + pixel.seq_num = pp_Number; \n\ + pixel.blend_stencil = (cur_blend_mode.x * 8u + cur_blend_mode.y) * 256u + pp_Stencil; \n\ + pixel.next = imageAtomicExchange(abufferPointerImg, coords, idx); \n\ + pixels[idx] = pixel; \n\ + \n\ discard; \n\ \n\ #endif \n\ @@ -904,7 +903,7 @@ int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipelineShader */) { - char pshader[8192]; + char pshader[16384]; sprintf(pshader, source, s->cp_AlphaTest,s->pp_ClipTestMode,s->pp_UseAlpha, @@ -947,7 +946,6 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe s->sp_FOG_COL_RAM=-1; s->sp_LOG_FOG_COEFS=-1; } - s->screen_size = glGetUniformLocation(s->program, "screen_size"); s->shade_scale_factor = glGetUniformLocation(s->program, "shade_scale_factor"); // Use texture 1 for depth texture @@ -1826,6 +1824,8 @@ bool RenderFrame() ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; + GLuint output_fbo; + //setup render target first if (is_rtt) { @@ -1864,7 +1864,9 @@ bool RenderFrame() } //printf("RTT packmode=%d stride=%d - %d,%d -> %d,%d\n", FB_W_CTRL.fb_packmode, FB_W_LINESTRIDE.stride * 8, // FB_X_CLIP.min, FB_Y_CLIP.min, FB_X_CLIP.max, FB_Y_CLIP.max); - BindRTT(FB_W_SOF1 & VRAM_MASK, dc_width, dc_height, channels, format); + output_fbo = BindRTT(FB_W_SOF1 & VRAM_MASK, dc_width, dc_height, channels, format); + viewport_width = dc_width; + viewport_height = dc_height; } else { @@ -1873,6 +1875,9 @@ bool RenderFrame() glBindFramebuffer(GL_FRAMEBUFFER,0); #endif glViewport(0, 0, screen_width, screen_height); + viewport_width = screen_width; + viewport_height = screen_height; + output_fbo = 0; } bool wide_screen_on = !is_rtt && settings.rend.WideScreen @@ -1933,7 +1938,7 @@ bool RenderFrame() //restore scale_x scale_x /= scissoring_scale_x; - DrawStrips(); + DrawStrips(output_fbo); #if HOST_OS==OS_WINDOWS //Sleep(40); //to test MT stability @@ -2013,6 +2018,11 @@ struct glesrend : Renderer glcache.DeleteTextures(1, &opaqueTexId); opaqueTexId = 0; } + if (depthSaveTexId != 0) + { + glcache.DeleteTextures(1, &depthSaveTexId); + depthSaveTexId = 0; + } reshapeABuffer(w, h); } void Term() { } diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 3fa3c72a4..80ee1f2ae 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -47,9 +47,10 @@ //vertex types extern u32 gcflip; extern float scale_x, scale_y; +extern int viewport_width, viewport_height; -void DrawStrips(); +void DrawStrips(GLuint output_fbo); struct PipelineShader { @@ -59,7 +60,6 @@ struct PipelineShader GLuint pp_ClipTest,cp_AlphaTestValue; GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY,sp_LOG_FOG_COEFS; GLuint shade_scale_factor; - GLuint screen_size; GLuint pp_Number; GLuint pp_Stencil; GLuint pp_DepthFunc; @@ -140,7 +140,7 @@ void SortPParams(int first, int count); extern int screen_width; extern int screen_height; -void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt); +GLuint BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt); void ReadRTTBuffer(); int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, @@ -193,9 +193,6 @@ struct ShaderUniforms_t if (s->sp_LOG_FOG_COEFS!=-1) glUniform2fv(s->sp_LOG_FOG_COEFS,1, fog_coefs); - if (s->screen_size != -1) - glUniform2f(s->screen_size, (float)screen_width, (float)screen_height); - if (s->shade_scale_factor != -1) glUniform1f(s->shade_scale_factor, FPU_SHAD_SCALE.scale_factor / 256.f); @@ -237,10 +234,11 @@ GLuint loadPNG(const string& subpath, int &width, int &height); extern GLuint stencilTexId; extern GLuint depthTexId; extern GLuint opaqueTexId; +extern GLuint depthSaveTexId; #define SHADER_HEADER "#version 430 \n\ \n\ -coherent uniform layout(size1x32, binding = 4) uimage2D abufferPointerImg; \n\ +layout(size1x32, binding = 4) uniform coherent restrict uimage2D abufferPointerImg; \n\ struct Pixel { \n\ mediump vec4 color; \n\ mediump float depth; \n\ @@ -249,11 +247,16 @@ struct Pixel { \n\ uint next; \n\ }; \n\ #define EOL 0xFFFFFFFFu \n\ -layout (binding = 0, std430) buffer PixelBuffer { \n\ +layout (binding = 0, std430) coherent restrict buffer PixelBuffer { \n\ Pixel pixels[]; \n\ }; \n\ layout(binding = 0, offset = 0) uniform atomic_uint buffer_index; \n\ \n\ +uint getNextPixelIndex() \n\ +{ \n\ + return atomicCounterIncrement(buffer_index); \n\ +} \n\ +\n\ void setFragDepth(void) \n\ { \n\ highp float w = 100000.0 * gl_FragCoord.w; \n\ diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index fd50f1a26..d2f5e02c0 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -386,7 +386,7 @@ struct FBT FBT fb_rtt; -void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) +GLuint BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) { FBT& rv=fb_rtt; @@ -395,11 +395,11 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) rv.TexAddr=addy>>3; - // Find the largest square power of two texture that fits into the viewport - int fbh2 = 2; + // Find the smallest power of two texture that fits the viewport + int fbh2 = 8; while (fbh2 < fbh) fbh2 *= 2; - int fbw2 = 2; + int fbw2 = 8; while (fbw2 < fbw) fbw2 *= 2; @@ -425,6 +425,8 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) verify(uStatus == GL_FRAMEBUFFER_COMPLETE); glViewport(0, 0, fbw, fbh); // TODO CLIP_X/Y min? + + return rv.fbo; } void ReadRTTBuffer() { From ef954dfe26e15e5d8b438308ba059efee76ad134 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 2 Jun 2018 23:12:01 +0200 Subject: [PATCH 23/65] Got rid of viewport_width/height. Another quad fix. --- core/rend/gles/abuffer.cpp | 27 ++++++++++++++++++--------- core/rend/gles/gles.cpp | 5 ----- core/rend/gles/gles.h | 1 - 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 19e563557..7aa7cfa87 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -334,16 +334,25 @@ void reshapeABuffer(int w, int h) } } -void DrawQuad(int width, int height) +void DrawQuad() { glBindVertexArray(g_quadVertexArray); - float xmin = (ShaderUniforms.scale_coefs[2] - 1) * screen_width / 2; + float xmin = (ShaderUniforms.scale_coefs[2] - 1) / ShaderUniforms.scale_coefs[0]; + float xmax = (ShaderUniforms.scale_coefs[2] + 1) / ShaderUniforms.scale_coefs[0]; + float ymin = (ShaderUniforms.scale_coefs[3] - 1) / ShaderUniforms.scale_coefs[1]; + float ymax = (ShaderUniforms.scale_coefs[3] + 1) / ShaderUniforms.scale_coefs[1]; + if (ymin > ymax) + { + float t = ymin; + ymin = ymax; + ymax = t; + } struct Vertex vertices[] = { - { xmin, height, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 }, - { xmin, 0, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 }, - { width, height, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 }, - { width, 0, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 }, + { xmin, ymax, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 }, + { xmin, ymin, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 }, + { xmax, ymax, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 }, + { xmax, ymin, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 }, }; GLushort indices[] = { 0, 1, 2, 1, 3 }; @@ -433,7 +442,7 @@ void DrawTranslucentModVols(int first, int count) glUniform1i(volume_mode_uniform, mv_mode); glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - DrawQuad(viewport_width, viewport_height); + DrawQuad(); SetupModvolVBO(); //update pointers @@ -451,7 +460,7 @@ void renderABuffer(bool sortFragments) glcache.Disable(GL_CULL_FACE); glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); - DrawQuad(viewport_width, viewport_height); + DrawQuad(); glCheck(); @@ -481,7 +490,7 @@ void renderABuffer(bool sortFragments) } glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); - DrawQuad(viewport_width, viewport_height); + DrawQuad(); glMemoryBarrier(GL_ATOMIC_COUNTER_BARRIER_BIT); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 72b06fcb3..5fec048ea 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -60,7 +60,6 @@ Tile clip float fb_scale_x,fb_scale_y; float scale_x, scale_y; -int viewport_width, viewport_height; #ifndef GLES #define attr "in" @@ -1865,8 +1864,6 @@ bool RenderFrame() //printf("RTT packmode=%d stride=%d - %d,%d -> %d,%d\n", FB_W_CTRL.fb_packmode, FB_W_LINESTRIDE.stride * 8, // FB_X_CLIP.min, FB_Y_CLIP.min, FB_X_CLIP.max, FB_Y_CLIP.max); output_fbo = BindRTT(FB_W_SOF1 & VRAM_MASK, dc_width, dc_height, channels, format); - viewport_width = dc_width; - viewport_height = dc_height; } else { @@ -1875,8 +1872,6 @@ bool RenderFrame() glBindFramebuffer(GL_FRAMEBUFFER,0); #endif glViewport(0, 0, screen_width, screen_height); - viewport_width = screen_width; - viewport_height = screen_height; output_fbo = 0; } diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 80ee1f2ae..b1e470cd7 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -47,7 +47,6 @@ //vertex types extern u32 gcflip; extern float scale_x, scale_y; -extern int viewport_width, viewport_height; void DrawStrips(GLuint output_fbo); From 5cff417da1a022dd4b275d2740d2a33f24292f15 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 3 Jun 2018 11:39:32 +0200 Subject: [PATCH 24/65] Texture-based fog table --- core/rend/gles/gles.cpp | 102 +++++++++++----------------------------- core/rend/gles/gles.h | 6 +-- 2 files changed, 29 insertions(+), 79 deletions(-) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 5fec048ea..ace5e33cb 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -221,11 +221,10 @@ const char* PixelPipelineShader = SHADER_HEADER uniform lowp float cp_AlphaTestValue; \n\ uniform lowp vec4 pp_ClipTest; \n\ uniform lowp vec3 sp_FOG_COL_RAM,sp_FOG_COL_VERT; \n\ -uniform highp vec2 sp_LOG_FOG_COEFS; \n\ uniform highp float sp_FOG_DENSITY; \n\ uniform highp float shade_scale_factor; \n\ uniform sampler2D tex0, tex1; \n\ -uniform sampler2D fog_table; \n\ +layout(binding = 5) uniform sampler2D fog_table; \n\ uniform int pp_Number; \n\ uniform usampler2D shadow_stencil; \n\ uniform sampler2D DepthTex; \n\ @@ -250,8 +249,11 @@ uniform int fog_control[2]; \n\ " vary " mediump float vtx_z; \n\ lowp float fog_mode2(highp float w) \n\ { \n\ - highp float fog_idx = clamp(w * sp_FOG_DENSITY, 0.0, 127.99); \n\ - return clamp(sp_LOG_FOG_COEFS.y * log2(fog_idx) + sp_LOG_FOG_COEFS.x, 0.001, 1.0); //the clamp is required due to yet another bug !\n\ + uint i = clamp(uint(floor(log2(w * sp_FOG_DENSITY))), 0u, 7u); \n\ + highp float m = w * sp_FOG_DENSITY * 16 / pow(2, i) - 16; \n\ + float idx = floor(m) + i * 16 + 0.5; \n\ + vec4 fog_coef = texture(fog_table, vec2(idx / 128, 0.75 - (m - floor(m)) / 2)); \n\ + return fog_coef.a; \n\ } \n\ void main() \n\ { \n\ @@ -488,6 +490,7 @@ gl_ctx gl; int screen_width; int screen_height; +GLuint fogTextureId; #if (HOST_OS != OS_DARWIN) && !defined(TARGET_NACL32) #if defined(GLES) && !defined(USE_SDL) @@ -938,12 +941,10 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe if (s->pp_FogCtrl==0 || s->pp_FogCtrl==3) { s->sp_FOG_COL_RAM=glGetUniformLocation(s->program, "sp_FOG_COL_RAM"); - s->sp_LOG_FOG_COEFS=glGetUniformLocation(s->program, "sp_LOG_FOG_COEFS"); } else { s->sp_FOG_COL_RAM=-1; - s->sp_LOG_FOG_COEFS=-1; } s->shade_scale_factor = glGetUniformLocation(s->program, "shade_scale_factor"); @@ -1124,68 +1125,31 @@ bool gles_init() } -void tryfit(float* x,float* y) +void UpdateFogTexture(u8 *fog_table) { - //y=B*ln(x)+A - - double sylnx=0,sy=0,slnx=0,slnx2=0; - - u32 cnt=0; - - for (int i=0;i<128;i++) + glActiveTexture(GL_TEXTURE5); + if (fogTextureId == 0) { - int rep=1; - - //discard values clipped to 0 or 1 - if (i<127 && y[i]==1 && y[i+1]==1) - continue; - - if (i>0 && y[i]==0 && y[i-1]==0) - continue; - - //Add many samples for first and last value (fog-in, fog-out -> important) - if (i>0 && y[i]!=1 && y[i-1]==1) - rep=10000; - - if (i<127 && y[i]!=0 && y[i+1]==0) - rep=10000; - - for (int j=0;j>4)*(1+(i&15)/16.f); - yvals[i]=fog_table[i*4+1]/255.0f; - } - - tryfit(xvals,yvals); + fog_needs_update = false; + UpdateFogTexture((u8 *)FOG_TABLE); } glcache.UseProgram(gl.modvol_shader.program); diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index b1e470cd7..498a6e529 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -57,7 +57,7 @@ struct PipelineShader GLuint scale,depth_scale; GLuint pp_ClipTest,cp_AlphaTestValue; - GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY,sp_LOG_FOG_COEFS; + GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY; GLuint shade_scale_factor; GLuint pp_Number; GLuint pp_Stencil; @@ -154,7 +154,6 @@ struct ShaderUniforms_t float fog_den_float; float ps_FOG_COL_RAM[3]; float ps_FOG_COL_VERT[3]; - float fog_coefs[2]; int poly_number; u32 stencil; TSP tsp0; @@ -189,9 +188,6 @@ struct ShaderUniforms_t if (s->sp_FOG_COL_VERT!=-1) glUniform3fv( s->sp_FOG_COL_VERT, 1, ps_FOG_COL_VERT); - if (s->sp_LOG_FOG_COEFS!=-1) - glUniform2fv(s->sp_LOG_FOG_COEFS,1, fog_coefs); - if (s->shade_scale_factor != -1) glUniform1f(s->shade_scale_factor, FPU_SHAD_SCALE.scale_factor / 256.f); From 11caeb9d02c8d61b82d784903d02e878ffda4a3d Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 4 Jun 2018 12:46:30 +0200 Subject: [PATCH 25/65] Fix OSD location when scale_x > 1. Reset atomic abuffer counter and check overflow at beginning of cycle, hopefully for better perfs --- core/rend/gles/abuffer.cpp | 54 ++++++++++++++++++++------------------ core/rend/gles/gldraw.cpp | 17 +++++++----- core/rend/gles/gles.cpp | 26 ++++-------------- core/rend/gles/gles.h | 7 ++++- 4 files changed, 51 insertions(+), 53 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 7aa7cfa87..82f9dc3b8 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -277,10 +277,11 @@ void initABuffer() glGenBuffers(1, &atomic_buffer); // Bind it glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer); - // Declare storage. Using GL_DYNAMIC_READ instead of GL_DYNAMIC_COPY as the latter makes - // reading the counter after each frame very slow. - glBufferData(GL_ATOMIC_COUNTER_BUFFER, 4, NULL, GL_DYNAMIC_READ); + // Declare storage + glBufferData(GL_ATOMIC_COUNTER_BUFFER, 4, NULL, GL_DYNAMIC_COPY); glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomic_buffer); + GLint zero = 0; + glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint), &zero); glCheck(); } @@ -451,24 +452,11 @@ void DrawTranslucentModVols(int first, int count) } } -void renderABuffer(bool sortFragments) +void checkOverflowAndReset() { - glcache.UseProgram(sortFragments ? g_abuffer_final_shader.program : g_abuffer_final_nosort_shader.program); - ShaderUniforms.Set(&g_abuffer_final_shader); - - glcache.Disable(GL_DEPTH_TEST); - glcache.Disable(GL_CULL_FACE); - glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); - - DrawQuad(); - - glCheck(); - - glcache.UseProgram(g_abuffer_clear_shader.program); - ShaderUniforms.Set(&g_abuffer_clear_shader); - + // Using atomic counter GLuint max_pixel_index = 0; - glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, 4, &max_pixel_index); + glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &max_pixel_index); // printf("ABUFFER %d pixels used\n", max_pixel_index); if ((max_pixel_index + 1) * 32 - 1 >= pixel_buffer_size) { @@ -488,16 +476,32 @@ void renderABuffer(bool sortFragments) glCheck(); } } + // Reset counter + max_pixel_index = 0; + glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0 , sizeof(GLuint), &max_pixel_index); +} + +void renderABuffer(bool sortFragments) +{ + // Render to output FBO + glcache.UseProgram(sortFragments ? g_abuffer_final_shader.program : g_abuffer_final_nosort_shader.program); + ShaderUniforms.Set(&g_abuffer_final_shader); + + glcache.Disable(GL_DEPTH_TEST); + glcache.Disable(GL_CULL_FACE); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); + + DrawQuad(); + + glCheck(); + + // Clear A-buffer pointers + glcache.UseProgram(g_abuffer_clear_shader.program); + ShaderUniforms.Set(&g_abuffer_clear_shader); glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); DrawQuad(); - glMemoryBarrier(GL_ATOMIC_COUNTER_BARRIER_BIT); - - glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer); - GLuint zero = 0; - glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0 , sizeof(GLuint), &zero); - glActiveTexture(GL_TEXTURE0); glCheck(); diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 6a34786d9..51d48eb9c 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -116,12 +116,14 @@ s32 SetTileClip(u32 val, bool set) if (csx <= 0 && csy <= 0 && cex >= 640 && cey >= 480) return 0; - if (set && clip_mode) { - csx *= scale_x; - csy *= scale_y; - cex *= scale_x; - cey *= scale_y; - if (!pvrrc.isRTT) { + if (set && clip_mode) + { + if (!pvrrc.isRTT) + { + csx *= scale_x; + csy *= scale_y; + cex *= scale_x; + cey *= scale_y; float t = cey; cey = 480 - csy; csy = 480 - t; @@ -596,6 +598,7 @@ void DrawModVols(int first, int count) void renderABuffer(bool sortFragments); void DrawTranslucentModVols(int first, int count); +void checkOverflowAndReset(); GLuint CreateColorFBOTexture() { @@ -636,6 +639,8 @@ void CreateTextures() void DrawStrips(GLuint output_fbo) { + checkOverflowAndReset(); + if (geom_fbo == 0) { glGenFramebuffers(1, &geom_fbo); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index ace5e33cb..5321a005c 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -442,10 +442,7 @@ void main() \n\ \n\ ivec2 coords = ivec2(gl_FragCoord.xy); \n\ uint idx = getNextPixelIndex(); \n\ - if (idx >= pixels.length()) { \n\ - discard; \n\ - return; \n\ - } \n\ + \n\ Pixel pixel; \n\ pixel.color = color; \n\ pixel.depth = gl_FragDepth; \n\ @@ -1315,7 +1312,7 @@ static void DrawRightedText(float yy, float scale, int transparency, const char* float w=float(strlen(text)*14)*scale; - float x = 320 + 240 * screen_width / screen_height - w; + float x = (ShaderUniforms.scale_coefs[2] + 1) / ShaderUniforms.scale_coefs[0] - w; float y=yy; float h=16.0f*scale; w=14.0f*scale; @@ -1487,18 +1484,8 @@ void OSD_DRAW() #endif if (osd_font) { - float u=0; - float v=0; - verify(glIsProgram(gl.OSD_SHADER.program)); - float dc_width=640; - float dc_height=480; - - float dc2s_scale_h=screen_height/480.0f; - float ds2s_offs_x=(screen_width-dc2s_scale_h*640)/2; - - glcache.BindTexture(GL_TEXTURE_2D,osd_font); glcache.UseProgram(gl.OSD_SHADER.program); @@ -1506,15 +1493,12 @@ void OSD_DRAW() glcache.Disable(GL_DEPTH_TEST); glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glcache.DepthMask(false); glcache.DepthFunc(GL_ALWAYS); - glcache.Disable(GL_CULL_FACE); glcache.Disable(GL_SCISSOR_TEST); - int dfa=osd_count/4; for (int i=0;i= pixels.length()) \n\ + // Buffer overflow \n\ + discard; \n\ + \n\ + return index; \n\ } \n\ \n\ void setFragDepth(void) \n\ From 64e5d585b6cdb250f292ee0db1387efd2abb779f Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 4 Jun 2018 19:30:48 +0200 Subject: [PATCH 26/65] Disable overflow detection until I can figure out why it slows things down like that. Buffer set to 512MB. Also z=1 value for quads looks logical. --- core/rend/gles/abuffer.cpp | 50 +++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 82f9dc3b8..ccf2c862e 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -21,7 +21,7 @@ static GLuint g_quadVertexArray = 0; static int g_imageWidth = 0; static int g_imageHeight = 0; -GLuint pixel_buffer_size = 64 * 1024 * 1024; // Initial size 64 MB +GLuint pixel_buffer_size = 512 * 1024 * 1024; // Initial size 64 MB #define MAX_PIXELS_PER_FRAGMENT "32" @@ -350,10 +350,10 @@ void DrawQuad() ymax = t; } struct Vertex vertices[] = { - { xmin, ymax, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 }, - { xmin, ymin, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 }, - { xmax, ymax, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 }, - { xmax, ymin, 0.001, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 }, + { xmin, ymax, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 }, + { xmin, ymin, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 }, + { xmax, ymax, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 }, + { xmax, ymin, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 }, }; GLushort indices[] = { 0, 1, 2, 1, 3 }; @@ -456,26 +456,26 @@ void checkOverflowAndReset() { // Using atomic counter GLuint max_pixel_index = 0; - glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &max_pixel_index); -// printf("ABUFFER %d pixels used\n", max_pixel_index); - if ((max_pixel_index + 1) * 32 - 1 >= pixel_buffer_size) - { - GLint64 size; - glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &size); - if (pixel_buffer_size == size) - printf("A-buffer overflow: %d pixels. Buffer size already maxed out\n", max_pixel_index); - else - { - pixel_buffer_size = (GLuint)min(2 * (GLint64)pixel_buffer_size, size); - - printf("A-buffer overflow: %d pixels. Resizing buffer to %d MB\n", max_pixel_index, pixel_buffer_size / 1024 / 1024); - - glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer); - glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY); - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer); - glCheck(); - } - } +// glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &max_pixel_index); +//// printf("ABUFFER %d pixels used\n", max_pixel_index); +// if ((max_pixel_index + 1) * 32 - 1 >= pixel_buffer_size) +// { +// GLint64 size; +// glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &size); +// if (pixel_buffer_size == size) +// printf("A-buffer overflow: %d pixels. Buffer size already maxed out\n", max_pixel_index); +// else +// { +// pixel_buffer_size = (GLuint)min(2 * (GLint64)pixel_buffer_size, size); +// +// printf("A-buffer overflow: %d pixels. Resizing buffer to %d MB\n", max_pixel_index, pixel_buffer_size / 1024 / 1024); +// +// glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer); +// glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY); +// glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer); +// glCheck(); +// } +// } // Reset counter max_pixel_index = 0; glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0 , sizeof(GLuint), &max_pixel_index); From 63b90a5c8ebd1316e938bbee762950047807a844 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 5 Jun 2018 12:06:51 +0200 Subject: [PATCH 27/65] fog "Z" value must be clamped between 1 and 255.9 --- core/rend/gles/gles.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 5321a005c..ea471c825 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -249,8 +249,9 @@ uniform int fog_control[2]; \n\ " vary " mediump float vtx_z; \n\ lowp float fog_mode2(highp float w) \n\ { \n\ - uint i = clamp(uint(floor(log2(w * sp_FOG_DENSITY))), 0u, 7u); \n\ - highp float m = w * sp_FOG_DENSITY * 16 / pow(2, i) - 16; \n\ + highp float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999); \n\ + uint i = uint(floor(log2(z))); \n\ + highp float m = z * 16 / pow(2, i) - 16; \n\ float idx = floor(m) + i * 16 + 0.5; \n\ vec4 fog_coef = texture(fog_table, vec2(idx / 128, 0.75 - (m - floor(m)) / 2)); \n\ return fog_coef.a; \n\ From 9eded0cf372cb78296f16cb5b8b9624f174bf9e4 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Thu, 7 Jun 2018 17:13:42 +0200 Subject: [PATCH 28/65] Fix SW1-JPB random texture corruption problem and video choppiness. --- core/hw/pvr/pvr_regs.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/core/hw/pvr/pvr_regs.cpp b/core/hw/pvr/pvr_regs.cpp index 9bcdba2d2..01cbafd41 100644 --- a/core/hw/pvr/pvr_regs.cpp +++ b/core/hw/pvr/pvr_regs.cpp @@ -71,8 +71,11 @@ void pvr_WriteReg(u32 paddr,u32 data) return; } - if (addr == TA_YUV_TEX_BASE_addr) + if (addr == TA_YUV_TEX_BASE_addr) { + PvrReg(addr, u32) = data; YUV_init(); + return; + } if (addr>=PALETTE_RAM_START_addr) { From 56ed4f70339cbee685bbdb5ae09dc2afb4795756 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Thu, 7 Jun 2018 17:33:40 +0200 Subject: [PATCH 29/65] Added TA_YUV_TEX_CTRL struct --- core/hw/pvr/pvr_mem.cpp | 10 +++++----- core/hw/pvr/pvr_regs.h | 19 +++++++++++++++++-- 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/core/hw/pvr/pvr_mem.cpp b/core/hw/pvr/pvr_mem.cpp index 81c91e36e..3ada3b2cc 100644 --- a/core/hw/pvr/pvr_mem.cpp +++ b/core/hw/pvr/pvr_mem.cpp @@ -38,9 +38,9 @@ void YUV_init() YUV_dest=TA_YUV_TEX_BASE&VRAM_MASK;//TODO : add the masking needed TA_YUV_TEX_CNT=0; - YUV_blockcount=(((TA_YUV_TEX_CTRL>>0)&0x3F)+1)*(((TA_YUV_TEX_CTRL>>8)&0x3F)+1); + YUV_blockcount = (TA_YUV_TEX_CTRL.yuv_u_size + 1) * (TA_YUV_TEX_CTRL.yuv_v_size + 1); - if ((TA_YUV_TEX_CTRL>>16 )&1) + if (TA_YUV_TEX_CTRL.yuv_tex != 0) { die ("YUV: Not supported configuration\n"); YUV_x_size=16; @@ -48,8 +48,8 @@ void YUV_init() } else // yesh!!! { - YUV_x_size=(((TA_YUV_TEX_CTRL>>0)&0x3F)+1)*16; - YUV_y_size=(((TA_YUV_TEX_CTRL>>8)&0x3F)+1)*16; + YUV_x_size = (TA_YUV_TEX_CTRL.yuv_u_size + 1) * 16; + YUV_y_size = (TA_YUV_TEX_CTRL.yuv_v_size + 1) * 16; } } @@ -165,7 +165,7 @@ void YUV_data(u32* data , u32 count) YUV_init(); } - u32 block_size=(TA_YUV_TEX_CTRL & (1<<24))==0?384:512; + u32 block_size = TA_YUV_TEX_CTRL.yuv_form == 0 ? 384 : 512; verify(block_size==384); //no support for 512 diff --git a/core/hw/pvr/pvr_regs.h b/core/hw/pvr/pvr_regs.h index 93969c8bc..94125bfc4 100644 --- a/core/hw/pvr/pvr_regs.h +++ b/core/hw/pvr/pvr_regs.h @@ -370,7 +370,22 @@ union TA_GLOB_TILE_CLIP_type }; u32 full; }; - + +union TA_YUV_TEX_CTRL_type +{ + struct + { + u32 yuv_u_size : 6; + u32 reserved1 : 2; + u32 yuv_v_size : 6; + u32 reserved2 : 2; + u32 yuv_tex : 1; + u32 reserved3 : 7; + u32 yuv_form : 1; + u32 reserved4 : 7; + }; + u32 full; +}; // TA REGS #define TA_OL_BASE_addr 0x00000124 // RW Object list write start address @@ -483,7 +498,7 @@ union TA_GLOB_TILE_CLIP_type #define TA_ALLOC_CTRL PvrReg(TA_ALLOC_CTRL_addr,u32) // RW Object list control #define TA_LIST_INIT PvrReg(TA_LIST_INIT_addr,u32) // RW TA initialization #define TA_YUV_TEX_BASE PvrReg(TA_YUV_TEX_BASE_addr,u32) // RW YUV422 texture write start address -#define TA_YUV_TEX_CTRL PvrReg(TA_YUV_TEX_CTRL_addr,u32) // RW YUV converter control +#define TA_YUV_TEX_CTRL PvrReg(TA_YUV_TEX_CTRL_addr, TA_YUV_TEX_CTRL_type) // RW YUV converter control #define TA_YUV_TEX_CNT PvrReg(TA_YUV_TEX_CNT_addr,u32) // R YUV converter macro block counter value #define TA_LIST_CONT PvrReg(TA_LIST_CONT_addr,u32) // RW TA continuation processing From 718c341aa46121f3c3b305160810228986a06eb3 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Thu, 7 Jun 2018 17:36:43 +0200 Subject: [PATCH 30/65] Increase TA context size (verts, idx) --- core/hw/pvr/ta_ctx.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index ab4d7ffae..ab405c99b 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -194,8 +194,8 @@ struct TA_context { tad.Reset((u8*)OS_aligned_malloc(32, 2*1024*1024)); - rend.verts.InitBytes(2*1024*1024, &rend.Overrun, "verts"); //up to 2 mb of vtx data/frame = ~ 48k vtx/frame - rend.idx.Init(60*1024, &rend.Overrun, "idx"); //up to 60K indexes ( idx have stripification overhead ) + rend.verts.InitBytes(4 * 1024 * 1024, &rend.Overrun, "verts"); //up to 4 mb of vtx data/frame = ~ 96k vtx/frame + rend.idx.Init(120 * 1024, &rend.Overrun, "idx"); //up to 120K indexes ( idx have stripification overhead ) rend.global_param_op.Init(4096, &rend.Overrun, "global_param_op"); rend.global_param_pt.Init(4096, &rend.Overrun, "global_param_pt"); rend.global_param_mvo.Init(4096, &rend.Overrun, "global_param_mvo"); From 43109bfc4524edef7b5b7dc7cd394f612ab82a30 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Thu, 7 Jun 2018 18:23:26 +0200 Subject: [PATCH 31/65] Use float constants in GLSL --- core/rend/gles/gles.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 2164e45da..362ee5179 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -260,7 +260,7 @@ uint getNextPixelIndex() \n\ void setFragDepth(void) \n\ { \n\ highp float w = 100000.0 * gl_FragCoord.w; \n\ - gl_FragDepth = 1 - log2(1.0 + w) / 34; \n\ + gl_FragDepth = 1.0 - log2(1.0 + w) / 34.0; \n\ } \n\ " From 609d5bcd19ee63a0ca06ed8acb1f2705a771be7f Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Fri, 8 Jun 2018 19:35:03 +0200 Subject: [PATCH 32/65] Push more silence on audio underrun to catch up --- core/oslib/audiobackend_alsa.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/oslib/audiobackend_alsa.cpp b/core/oslib/audiobackend_alsa.cpp index e25c560ce..6764b54a0 100644 --- a/core/oslib/audiobackend_alsa.cpp +++ b/core/oslib/audiobackend_alsa.cpp @@ -7,6 +7,8 @@ static bool pcm_blocking = true; static snd_pcm_uframes_t buffer_size; static snd_pcm_uframes_t period_size; +#define MAX_LATENCY 100 + // We're making these functions static - there's no need to pollute the global namespace static void alsa_init() { @@ -89,7 +91,7 @@ static void alsa_init() } else printf("ALSA: period size set to %ld\n", period_size); - buffer_size = (44100 * 100 /* settings.omx.Audio_Latency */ / 1000 / period_size + 1) * period_size; + buffer_size = (44100 * MAX_LATENCY / 1000 / period_size + 1) * period_size; rc=snd_pcm_hw_params_set_buffer_size_near(handle, params, &buffer_size); if (rc < 0) { @@ -122,7 +124,7 @@ static u32 alsa_push(void* frame, u32 samples, bool wait) fprintf(stderr, "ALSA: underrun occurred\n"); snd_pcm_prepare(handle); // Write some silence then our samples - const size_t silence_size = period_size; + const size_t silence_size = period_size * 4; void *silence = alloca(silence_size * 4); memset(silence, 0, silence_size * 4); rc = snd_pcm_writei(handle, silence, silence_size); From 0e091e2e8423ffda6abf1945ace665f3724ed6c2 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 9 Jun 2018 12:22:47 +0200 Subject: [PATCH 33/65] Reset a-buffer pointers at init --- core/rend/gles/abuffer.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index ccf2c862e..a86d17803 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -240,6 +240,8 @@ void main(void) \n\ } \n\ "; +void DrawQuad(); + void initABuffer() { g_imageWidth = screen_width; @@ -320,6 +322,14 @@ void initABuffer() glCheck(); + // Clear A-buffer pointers + glcache.UseProgram(g_abuffer_clear_shader.program); + ShaderUniforms.Set(&g_abuffer_clear_shader); + + DrawQuad(); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + + glCheck(); } void reshapeABuffer(int w, int h) From 9b2762e1b418bfeaa18ff79c857724c3c19634fc Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 25 Jun 2018 11:26:32 +0200 Subject: [PATCH 34/65] Fix non-autosort translucent polys drawing. Depth func must be handled manually in non-autosort translucent mode. So it's now done in the final a-buffer shader. Fixes Namco logo in Soulcalibur. --- core/rend/gles/abuffer.cpp | 102 ++++++++++++++++++++++++++----------- core/rend/gles/gldraw.cpp | 32 ++++++++---- core/rend/gles/gles.cpp | 67 +++++++++++++----------- core/rend/gles/gles.h | 22 +++++--- 4 files changed, 148 insertions(+), 75 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index a86d17803..c882dbd2f 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -94,70 +94,110 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ bubbleSort(num_frag); \n\ \n\ vec4 finalColor = texture(tex, gl_FragCoord.xy / textureSize(tex, 0)); \n\ + float depth = 1.0; \n\ for (int i = 0; i < num_frag; i++) { \n\ +#if DEPTH_SORTED != 1 \n\ + float frag_depth = pixel_list[i].depth; \n\ + switch ((int(pixel_list[i].blend_stencil) >> 16) & 7) \n\ + { \n\ + case 0: // Never \n\ + continue; \n\ + case 1: // Greater \n\ + if (frag_depth <= depth) \n\ + continue; \n\ + break; \n\ + case 2: // Equal \n\ + if (frag_depth != depth) \n\ + continue; \n\ + break; \n\ + case 3: // Greater or equal \n\ + if (frag_depth < depth) \n\ + continue; \n\ + break; \n\ + case 4: // Less \n\ + if (frag_depth >= depth) \n\ + continue; \n\ + break; \n\ + case 5: // Not equal \n\ + if (frag_depth == depth) \n\ + continue; \n\ + break; \n\ + case 6: // Less or equal \n\ + if (frag_depth > depth) \n\ + continue; \n\ + break; \n\ + case 7: // Always \n\ + break; \n\ + } \n\ + bool depth_mask = ((int(pixel_list[i].blend_stencil) >> 19) & 1) == 1; \n\ + if (depth_mask) \n\ + depth = frag_depth; \n\ +#endif \n\ vec4 srcColor = pixel_list[i].color; \n\ if ((pixel_list[i].blend_stencil & 0x81u) == 0x81u) \n\ srcColor.rgb *= shade_scale_factor; \n\ float srcAlpha = srcColor.a; \n\ float dstAlpha = finalColor.a; \n\ vec4 srcCoef; \n\ + vec4 dstCoef; \n\ \n\ - int srcBlend = int(pixel_list[i].blend_stencil) / 256 / 8; \n\ + int srcBlend = (int(pixel_list[i].blend_stencil) >> 11) & 7; \n\ switch (srcBlend) \n\ { \n\ - case 0: // zero \n\ - srcCoef = vec4(0); \n\ + case ZERO: \n\ + srcCoef = vec4(0.0); \n\ break; \n\ - case 1: // one \n\ - srcCoef = vec4(1); \n\ + case ONE: \n\ + srcCoef = vec4(1.0); \n\ break; \n\ - case 2: // other color \n\ + case OTHER_COLOR: \n\ srcCoef = finalColor; \n\ break; \n\ - case 3: // inverse other color \n\ - srcCoef = vec4(1) - finalColor; \n\ + case INVERSE_OTHER_COLOR: \n\ + srcCoef = vec4(1.0) - finalColor; \n\ break; \n\ - case 4: // src alpha \n\ + case SRC_ALPHA: \n\ srcCoef = vec4(srcAlpha); \n\ break; \n\ - case 5: // inverse src alpha \n\ - srcCoef = vec4(1 - srcAlpha); \n\ + case INVERSE_SRC_ALPHA: \n\ + srcCoef = vec4(1.0 - srcAlpha); \n\ break; \n\ - case 6: // dst alpha \n\ + case DST_ALPHA: \n\ srcCoef = vec4(dstAlpha); \n\ break; \n\ - case 7: // inverse dst alpha \n\ - srcCoef = vec4(1 - dstAlpha); \n\ + case INVERSE_DST_ALPHA: \n\ + srcCoef = vec4(1.0 - dstAlpha); \n\ break; \n\ } \n\ - int dstBlend = (int(pixel_list[i].blend_stencil) / 256) % 8; \n\ + int dstBlend = (int(pixel_list[i].blend_stencil) >> 8) & 7; \n\ switch (dstBlend) \n\ { \n\ - case 0: // zero \n\ - finalColor = vec4(0); \n\ + case ZERO: \n\ + dstCoef = vec4(0.0); \n\ break; \n\ - case 1: // one \n\ + case ONE: \n\ + dstCoef = vec4(1.0); \n\ break; \n\ - case 2: // other color \n\ - finalColor *= srcColor; \n\ + case OTHER_COLOR: \n\ + dstCoef = srcColor; \n\ break; \n\ - case 3: // inverse other color \n\ - finalColor *= vec4(1) - srcColor; \n\ + case INVERSE_OTHER_COLOR: \n\ + dstCoef = vec4(1.0) - srcColor; \n\ break; \n\ - case 4: // src alpha \n\ - finalColor *= srcAlpha; \n\ + case SRC_ALPHA: \n\ + dstCoef = vec4(srcAlpha); \n\ break; \n\ - case 5: // inverse src alpha \n\ - finalColor *= 1 - srcAlpha; \n\ + case INVERSE_SRC_ALPHA: \n\ + dstCoef = vec4(1.0 - srcAlpha); \n\ break; \n\ - case 6: // dst alpha \n\ - finalColor *= dstAlpha; \n\ + case DST_ALPHA: \n\ + dstCoef = vec4(dstAlpha); \n\ break; \n\ - case 7: // inverse dst alpha \n\ - finalColor *= 1 - dstAlpha; \n\ + case INVERSE_DST_ALPHA: \n\ + dstCoef = vec4(1.0 - dstAlpha); \n\ break; \n\ } \n\ - finalColor = clamp(finalColor + srcColor * srcCoef, 0, 1); \n\ + finalColor = clamp(finalColor * dstCoef + srcColor * srcCoef, 0.0, 1.0); \n\ } \n\ \n\ return finalColor; \n\ diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 51d48eb9c..acbab18d7 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -33,12 +33,12 @@ const static u32 Zfunction[]= GL_NOTEQUAL, //GL_NOTEQUAL, //5 Not Equal GL_GEQUAL, //GL_GEQUAL, //6 Greater Or Equal #else - GL_GREATER, //GL_LESS/*EQUAL*/, //1 Less - GL_EQUAL, //GL_EQUAL, //2 Equal - GL_GEQUAL, //GL_LEQUAL, //3 Less Or Equal - GL_LESS, //GL_GREATER/*EQUAL*/, //4 Greater - GL_NOTEQUAL, //GL_NOTEQUAL, //5 Not Equal - GL_LEQUAL, //GL_GEQUAL, //6 Greater Or Equal + GL_GREATER, //1 Less + GL_EQUAL, //2 Equal + GL_GEQUAL, //3 Less Or Equal + GL_LESS, //4 Greater + GL_NOTEQUAL, //5 Not Equal + GL_LEQUAL, //6 Greater Or Equal #endif GL_ALWAYS, //GL_ALWAYS, //7 Always }; @@ -46,8 +46,8 @@ const static u32 Zfunction[]= /* 0 Zero (0, 0, 0, 0) 1 One (1, 1, 1, 1) -2 Dither Color (OR, OG, OB, OA) -3 Inverse Dither Color (1-OR, 1-OG, 1-OB, 1-OA) +2 Other Color (OR, OG, OB, OA) +3 Inverse Other Color (1-OR, 1-OG, 1-OB, 1-OA) 4 SRC Alpha (SA, SA, SA, SA) 5 Inverse SRC Alpha (1-SA, 1-SA, 1-SA, 1-SA) 6 DST Alpha (DA, DA, DA, DA) @@ -177,6 +177,7 @@ template 0, 2, false, // TODO Can PT have two different textures for area 0 and 1 ?? + 0, pass); CurrentShader = gl.getShader(shaderId); if (CurrentShader->program == -1) { @@ -189,6 +190,7 @@ template CurrentShader->pp_Offset = 0; CurrentShader->pp_FogCtrl = 2; CurrentShader->pp_TwoVolumes = false; + CurrentShader->pp_DepthFunc = 0; CurrentShader->pass = pass; CompilePipelineShader(CurrentShader); } @@ -198,6 +200,16 @@ template // Two volumes mode only supported for OP and PT bool two_volumes_mode = (gp->tsp1.full != -1) && Type != ListType_Translucent; + int depth_func = 0; + if (Type == ListType_Translucent) + { + // TR in autosort mode ignores specified depth func and defaults to GL_LEQUAL, except for GL_ALWAYS. + if (SortingEnabled && gp->isp.DepthMode != 7 && gp->isp.DepthMode != 6) + depth_func = 6; + else + depth_func = gp->isp.DepthMode; + } + shaderId = GetProgramID(Type == ListType_Punch_Through ? 1 : 0, clipping + 1, gp->pcw.Texture, @@ -207,6 +219,7 @@ template gp->pcw.Offset, gp->tsp.FogCtrl, two_volumes_mode, + depth_func, pass); CurrentShader = gl.getShader(shaderId); if (CurrentShader->program == -1) { @@ -219,6 +232,7 @@ template CurrentShader->pp_Offset = gp->pcw.Offset; CurrentShader->pp_FogCtrl = gp->tsp.FogCtrl; CurrentShader->pp_TwoVolumes = two_volumes_mode; + CurrentShader->pp_DepthFunc = depth_func; CurrentShader->pass = pass; CompilePipelineShader(CurrentShader); } @@ -248,7 +262,7 @@ template glcache.StencilFunc(GL_ALWAYS,stencil,stencil); ShaderUniforms.stencil = stencil; - ShaderUniforms.depth_func = gp->isp.DepthMode; + ShaderUniforms.depth_mask = gp->isp.ZWriteDis == 0; ShaderUniforms.Set(CurrentShader); if (CurrentShader->pp_Texture) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index ea471c825..ff4777c58 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -193,6 +193,7 @@ const char* PixelPipelineShader = SHADER_HEADER #define pp_Offset %d \n\ #define pp_FogCtrl %d \n\ #define pp_TwoVolumes %d \n\ +#define pp_DepthFunc %d \n\ #define PASS %d \n" #ifndef GLES "\ @@ -201,15 +202,6 @@ const char* PixelPipelineShader = SHADER_HEADER #endif \n" #endif "\ -#define ZERO 0u \n\ -#define ONE 1u \n\ -#define OTHER_COLOR 2u \n\ -#define INVERSE_OTHER_COLOR 3u \n\ -#define SRC_ALPHA 4u \n\ -#define INVERSE_SRC_ALPHA 5u \n\ -#define DST_ALPHA 6u \n\ -#define INVERSE_DST_ALPHA 7u \n\ - \n\ #if pp_TwoVolumes == 1 \n\ #define IF(x) if (x) \n\ #else \n\ @@ -229,9 +221,9 @@ uniform int pp_Number; \n\ uniform usampler2D shadow_stencil; \n\ uniform sampler2D DepthTex; \n\ uniform uint pp_Stencil; \n\ -uniform int pp_DepthFunc; \n\ +uniform bool depth_mask; \n\ \n\ -uniform uvec2 blend_mode[2]; \n\ +uniform ivec2 blend_mode[2]; \n\ #if pp_TwoVolumes == 1 \n\ uniform bool use_alpha[2]; \n\ uniform bool ignore_tex_alpha[2]; \n\ @@ -262,14 +254,30 @@ void main() \n\ \n\ #if PASS == 3 \n\ // Manual depth testing \n\ - // Depth func Always seems to be needed ? \n\ - if (pp_DepthFunc != 7) // TODO Use a #def \n\ - { \n\ - highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / textureSize(DepthTex, 0)).r; \n\ + highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / textureSize(DepthTex, 0)).r; \n\ + #if pp_DepthFunc == 0 // Never \n\ + discard; \n\ + #elif pp_DepthFunc == 1 // Greater \n\ + if (gl_FragDepth <= frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 2 // Equal \n\ + if (gl_FragDepth != frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 3 // Greater or equal \n\ + if (gl_FragDepth < frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 4 // Less \n\ + if (gl_FragDepth >= frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 5 // Not equal \n\ + if (gl_FragDepth == frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 6 // Less or equal \n\ if (gl_FragDepth > frontDepth) \n\ discard; \n\ - } \n\ + #endif \n\ #endif \n\ + \n\ // Clip outside the box \n\ #if pp_ClipTestMode==1 \n\ if (gl_FragCoord.x < pp_ClipTest.x || gl_FragCoord.x > pp_ClipTest.z \n\ @@ -287,7 +295,7 @@ void main() \n\ lowp vec4 offset = vtx_offs; \n\ mediump vec2 uv = vtx_uv; \n\ bool area1 = false; \n\ - uvec2 cur_blend_mode = blend_mode[0]; \n\ + ivec2 cur_blend_mode = blend_mode[0]; \n\ \n\ #if pp_TwoVolumes == 1 \n\ bool cur_use_alpha = use_alpha[0]; \n\ @@ -398,29 +406,29 @@ void main() \n\ case ONE: \n\ case OTHER_COLOR: \n\ case INVERSE_OTHER_COLOR: \n\ - if (color == vec4(0, 0, 0, 0)) \n\ + if (color == vec4(0.0)) \n\ discard; \n\ break; \n\ case SRC_ALPHA: \n\ - if (color.rgb == vec3(0, 0, 0) || color.a == 0) \n\ + if (color.rgb == vec3(0.0) || color.a == 0.0) \n\ discard; \n\ break; \n\ case INVERSE_SRC_ALPHA: \n\ - if (color.rgb == vec3(0, 0, 0) || color.a == 1) \n\ + if (color.rgb == vec3(0.0) || color.a == 1.0) \n\ discard; \n\ break; \n\ } \n\ break; \n\ case OTHER_COLOR: \n\ - if (cur_blend_mode.x == ZERO && color == vec4(1, 1, 1, 1)) \n\ + if (cur_blend_mode.x == ZERO && color == vec4(1.0)) \n\ discard; \n\ break; \n\ case INVERSE_OTHER_COLOR: \n\ - if (cur_blend_mode.x <= SRC_ALPHA && color == vec4(0, 0, 0, 0)) \n\ + if (cur_blend_mode.x <= SRC_ALPHA && color == vec4(0.0)) \n\ discard; \n\ break; \n\ case SRC_ALPHA: \n\ - if ((cur_blend_mode.x == ZERO || cur_blend_mode.x == INVERSE_SRC_ALPHA) && color.a == 1) \n\ + if ((cur_blend_mode.x == ZERO || cur_blend_mode.x == INVERSE_SRC_ALPHA) && color.a == 1.0) \n\ discard; \n\ break; \n\ case INVERSE_SRC_ALPHA: \n\ @@ -428,13 +436,13 @@ void main() \n\ { \n\ case ZERO: \n\ case SRC_ALPHA: \n\ - if (color.a == 0) \n\ + if (color.a == 0.0) \n\ discard; \n\ break; \n\ case ONE: \n\ case OTHER_COLOR: \n\ case INVERSE_OTHER_COLOR: \n\ - if (color == vec4(0, 0, 0, 0)) \n\ + if (color == vec4(0.0)) \n\ discard; \n\ break; \n\ } \n\ @@ -448,7 +456,7 @@ void main() \n\ pixel.color = color; \n\ pixel.depth = gl_FragDepth; \n\ pixel.seq_num = pp_Number; \n\ - pixel.blend_stencil = (cur_blend_mode.x * 8u + cur_blend_mode.y) * 256u + pp_Stencil; \n\ + pixel.blend_stencil = uint(depth_mask << 19) + uint(pp_DepthFunc << 16) + uint(((cur_blend_mode.x << 3) + cur_blend_mode.y) << 8) + pp_Stencil; \n\ pixel.next = imageAtomicExchange(abufferPointerImg, coords, idx); \n\ pixels[idx] = pixel; \n\ \n\ @@ -883,7 +891,7 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, - u32 pp_FogCtrl, bool pp_TwoVolumes, int pass) + u32 pp_FogCtrl, bool pp_TwoVolumes, u32 pp_DepthFunc, int pass) { u32 rv=0; @@ -896,6 +904,7 @@ int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, rv<<=1; rv|=pp_Offset; rv<<=2; rv|=pp_FogCtrl; rv <<= 1; rv |= (int)pp_TwoVolumes; + rv <<= 3; rv |= pp_DepthFunc; rv <<= 2; rv |= pass; return rv; @@ -907,7 +916,7 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe sprintf(pshader, source, s->cp_AlphaTest,s->pp_ClipTestMode,s->pp_UseAlpha, - s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pp_TwoVolumes, s->pass); + s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pp_TwoVolumes, s->pp_DepthFunc, s->pass); s->program=gl_CompileAndLink(VertexShaderSource,pshader); @@ -958,13 +967,13 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe s->pp_Number = glGetUniformLocation(s->program, "pp_Number"); s->pp_Stencil = glGetUniformLocation(s->program, "pp_Stencil"); - s->pp_DepthFunc = glGetUniformLocation(s->program, "pp_DepthFunc"); s->blend_mode = glGetUniformLocation(s->program, "blend_mode"); s->use_alpha = glGetUniformLocation(s->program, "use_alpha"); s->ignore_tex_alpha = glGetUniformLocation(s->program, "ignore_tex_alpha"); s->shading_instr = glGetUniformLocation(s->program, "shading_instr"); s->fog_control = glGetUniformLocation(s->program, "fog_control"); + s->depth_mask = glGetUniformLocation(s->program, "depth_mask"); return glIsProgram(s->program)==GL_TRUE; } diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 362ee5179..de96cf2af 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -61,16 +61,17 @@ struct PipelineShader GLuint shade_scale_factor; GLuint pp_Number; GLuint pp_Stencil; - GLuint pp_DepthFunc; GLuint blend_mode; GLuint use_alpha; GLuint ignore_tex_alpha; GLuint shading_instr; GLuint fog_control; + GLuint depth_mask; // u32 cp_AlphaTest; s32 pp_ClipTestMode; u32 pp_Texture, pp_UseAlpha, pp_IgnoreTexA, pp_ShadInstr, pp_Offset, pp_FogCtrl; + u32 pp_DepthFunc; int pass; bool pp_TwoVolumes; }; @@ -143,7 +144,7 @@ GLuint BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt); void ReadRTTBuffer(); int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, - u32 pp_FogCtrl, bool two_volumes, int pass); + u32 pp_FogCtrl, bool two_volumes, u32 pp_DepthFunc, int pass); void SetCull(u32 CulliMode); struct ShaderUniforms_t @@ -160,7 +161,7 @@ struct ShaderUniforms_t TSP tsp1; TCW tcw0; TCW tcw1; - int depth_func; + bool depth_mask; void setUniformArray(GLuint location, int v0, int v1) { @@ -193,7 +194,7 @@ struct ShaderUniforms_t if (s->blend_mode != -1) { u32 blend_mode[] = { tsp0.SrcInstr, tsp0.DstInstr, tsp1.SrcInstr, tsp1.DstInstr }; - glUniform2uiv(s->blend_mode, 2, blend_mode); + glUniform2iv(s->blend_mode, 2, (GLint *)blend_mode); } if (s->use_alpha != -1) @@ -214,8 +215,8 @@ struct ShaderUniforms_t if (s->pp_Stencil != -1) glUniform1ui(s->pp_Stencil, stencil); - if (s->pp_DepthFunc != -1) - glUniform1i(s->pp_DepthFunc, depth_func); + if (s->depth_mask != -1) + glUniform1i(s->depth_mask, depth_mask); } }; @@ -247,6 +248,15 @@ layout (binding = 0, std430) coherent restrict buffer PixelBuffer { \n\ }; \n\ layout(binding = 0, offset = 0) uniform atomic_uint buffer_index; \n\ \n\ +#define ZERO 0 \n\ +#define ONE 1 \n\ +#define OTHER_COLOR 2 \n\ +#define INVERSE_OTHER_COLOR 3 \n\ +#define SRC_ALPHA 4 \n\ +#define INVERSE_SRC_ALPHA 5 \n\ +#define DST_ALPHA 6 \n\ +#define INVERSE_DST_ALPHA 7 \n\ + \n\ uint getNextPixelIndex() \n\ { \n\ uint index = atomicCounterIncrement(buffer_index); \n\ From 13725ccdc10d8dc92043df4f19fec4f71cfb6aae Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 25 Jun 2018 15:48:57 +0200 Subject: [PATCH 35/65] Workaround for Virtua Tennis ball color problem. There's a texture corruption of the tennis ball and other textures, notably the players' bags in the first intro sequence. The corruption is due to render to texture squashing existing textures. Not sure what's going on but this avoids the texture corruption. The original problem remains. --- core/rend/gles/gltex.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index d2f5e02c0..cbe5d76c1 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -465,7 +465,7 @@ void ReadRTTBuffer() { glPixelStorei(GL_PACK_ALIGNMENT, 1); u16 *src = temp_tex_buffer; - u16 *dst = (u16 *)&vram[fb_rtt.TexAddr << 3]; + u16 *dst = (u16 *)&vram[tex_addr]; GLint color_fmt, color_type; glGetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &color_fmt); @@ -526,7 +526,7 @@ void ReadRTTBuffer() { } else { - memset(&vram[fb_rtt.TexAddr << 3], '\0', size); + //memset(&vram[fb_rtt.TexAddr << 3], '\0', size); } //dumpRtTexture(fb_rtt.TexAddr, w, h); From 2ee45d8d6b3f27ed71834fc0fab4b9f7277e3318 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 25 Jun 2018 16:48:57 +0200 Subject: [PATCH 36/65] Optimize RTT to VRAM. Add US version of THPS2 to per-game settings. --- core/nullDC.cpp | 2 +- core/rend/gles/gltex.cpp | 39 ++++++++++++++++++++++++--------------- 2 files changed, 25 insertions(+), 16 deletions(-) diff --git a/core/nullDC.cpp b/core/nullDC.cpp index 96336e1fd..187891639 100755 --- a/core/nullDC.cpp +++ b/core/nullDC.cpp @@ -131,7 +131,7 @@ cThread webui_thd(&webui_th,0); void LoadSpecialSettings() { - if (!strncmp("T13008D", reios_product_number, 7)) + if (!strncmp("T13008D", reios_product_number, 7) || !strncmp("T13006N", reios_product_number, 7)) // Tony Hawk's Pro Skater 2 settings.rend.RenderToTextureBuffer = 1; if (!strncmp("RDC-0057", reios_product_number, 8)) diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index cbe5d76c1..b713c17d5 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -487,23 +487,32 @@ void ReadRTTBuffer() { glReadPixels(0, h - lines, w, chunk_lines, GL_RGBA, GL_UNSIGNED_BYTE, p); for (u32 l = 0; l < chunk_lines; l++) { - for (u32 c = 0; c < w; c++) { - switch(fb_packmode) - { - case 0: //0x0 0555 KRGB 16 bit (default) Bit 15 is the value of fb_kval[7]. + switch(fb_packmode) + { + case 0: //0x0 0555 KRGB 16 bit (default) Bit 15 is the value of fb_kval[7]. + for (u32 c = 0; c < w; c++) { *dst++ = (((p[0] >> 3) & 0x1F) << 10) | (((p[1] >> 3) & 0x1F) << 5) | ((p[2] >> 3) & 0x1F) | kval_bit; - break; - case 1: //0x1 565 RGB 16 bit - *dst++ = (((p[0] >> 3) & 0x1F) << 11) | (((p[1] >> 2) & 0x3F) << 5) | ((p[2] >> 3) & 0x1F); - break; - case 2: //0x2 4444 ARGB 16 bit - *dst++ = (((p[0] >> 4) & 0xF) << 8) | (((p[1] >> 4) & 0xF) << 4) | ((p[2] >> 4) & 0xF) | (((p[3] >> 4) & 0xF) << 12); - break; - case 3://0x3 1555 ARGB 16 bit The alpha value is determined by comparison with the value of fb_alpha_threshold. - *dst++ = (((p[0] >> 3) & 0x1F) << 10) | (((p[1] >> 3) & 0x1F) << 5) | ((p[2] >> 3) & 0x1F) | (p[3] >= fb_alpha_threshold ? 0x8000 : 0); - break; + p += 4; } - p += 4; + break; + case 1: //0x1 565 RGB 16 bit + for (u32 c = 0; c < w; c++) { + *dst++ = (((p[0] >> 3) & 0x1F) << 11) | (((p[1] >> 2) & 0x3F) << 5) | ((p[2] >> 3) & 0x1F); + p += 4; + } + break; + case 2: //0x2 4444 ARGB 16 bit + for (u32 c = 0; c < w; c++) { + *dst++ = (((p[0] >> 4) & 0xF) << 8) | (((p[1] >> 4) & 0xF) << 4) | ((p[2] >> 4) & 0xF) | (((p[3] >> 4) & 0xF) << 12); + p += 4; + } + break; + case 3://0x3 1555 ARGB 16 bit The alpha value is determined by comparison with the value of fb_alpha_threshold. + for (u32 c = 0; c < w; c++) { + *dst++ = (((p[0] >> 3) & 0x1F) << 10) | (((p[1] >> 3) & 0x1F) << 5) | ((p[2] >> 3) & 0x1F) | (p[3] >= fb_alpha_threshold ? 0x8000 : 0); + p += 4; + } + break; } dst += (stride - w * 2) / 2; } From b38dea86ee847ea0acb879a8993352871c6926ef Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 26 Jun 2018 14:24:45 +0200 Subject: [PATCH 37/65] Improve modifier volume parsing and drawing logic. Parse modifier volumes similarily to other polys (first, count, params). Draw all triangles in one shot and use quad to sum up instead of redrawing entire strip. Use OR operation for open volumes/quads (Soulcalibur). Support for open transparent modifier volumes (OR). --- core/hw/pvr/ta_ctx.h | 9 +-- core/hw/pvr/ta_vtx.cpp | 44 +++++++----- core/rend/gles/abuffer.cpp | 109 +++++++++++++--------------- core/rend/gles/gldraw.cpp | 141 ++++++++++++++----------------------- core/rend/gles/gles.h | 1 + 5 files changed, 130 insertions(+), 174 deletions(-) diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index ab405c99b..d54e1f4e9 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -46,10 +46,11 @@ struct PolyParam u32 texid1; }; -struct ModParam +struct ModifierVolumeParam { - u32 first; //entry index , holds vertex/pos data + u32 first; u32 count; + ISP_Modvol isp; }; struct ModTriangle @@ -127,8 +128,8 @@ struct rend_context List verts; List idx; List modtrig; - List global_param_mvo; - List global_param_mvo_tr; + List global_param_mvo; + List global_param_mvo_tr; List global_param_op; List global_param_pt; diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index 211fd9169..1dfd60750 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -770,18 +770,9 @@ public: { CurrentPP=&nullPP; CurrentPPlist=0; - if (ListType==ListType_Opaque_Modifier_Volume) - { - ISP_Modvol p; - p.id=vdrc.modtrig.used(); - *vdrc.global_param_mvo.Append()=p; - } - else if (ListType == ListType_Translucent_Modifier_Volume) - { - ISP_Modvol p; - p.id = vdrc.modtrig.used(); - *vdrc.global_param_mvo_tr.Append()=p; - } + + if (ListType == ListType_Opaque_Modifier_Volume || ListType == ListType_Translucent_Modifier_Volume) + EndModVol(); } /* @@ -1433,21 +1424,38 @@ public: #endif } - //ModVolumes + // Modifier Volumes Vertex handlers + + static void EndModVol() + { + List *list = NULL; + if (CurrentList == ListType_Opaque_Modifier_Volume) + list = &vdrc.global_param_mvo; + else if (CurrentList == ListType_Translucent_Modifier_Volume) + list = &vdrc.global_param_mvo_tr; + else + return; + if (list->used() > 0) + { + ModifierVolumeParam *p = &(list->head()[list->used() - 1]); + p->count = vdrc.modtrig.used() - p->first; + } + } - //Mod Volume Vertex handlers static void StartModVol(TA_ModVolParam* param) { - ISP_Modvol* p = NULL; + EndModVol(); + + ModifierVolumeParam *p = NULL; if (CurrentList == ListType_Opaque_Modifier_Volume) p = vdrc.global_param_mvo.Append(); else if (CurrentList == ListType_Translucent_Modifier_Volume) p = vdrc.global_param_mvo_tr.Append(); else return; - p->full=param->isp.full; - p->VolumeLast=param->pcw.Volume; - p->id=vdrc.modtrig.used(); + p->isp.full = param->isp.full; + p->isp.VolumeLast = param->pcw.Volume != 0; + p->first = vdrc.modtrig.used(); } __forceinline static void AppendModVolVertexA(TA_ModVolA* mvv) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index c882dbd2f..85134e80e 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -12,16 +12,14 @@ GLuint atomic_buffer; PipelineShader g_abuffer_final_shader; PipelineShader g_abuffer_final_nosort_shader; PipelineShader g_abuffer_clear_shader; -PipelineShader g_abuffer_tr_modvol_shader; -PipelineShader g_abuffer_tr_modvol_final_shader; -static GLuint volume_mode_uniform; +PipelineShader g_abuffer_tr_modvol_shaders[ModeCount]; static GLuint g_quadBuffer = 0; static GLuint g_quadVertexArray = 0; static int g_imageWidth = 0; static int g_imageHeight = 0; -GLuint pixel_buffer_size = 512 * 1024 * 1024; // Initial size 64 MB +GLuint pixel_buffer_size = 512 * 1024 * 1024; // Initial size 512 MB #define MAX_PIXELS_PER_FRAGMENT "32" @@ -229,12 +227,16 @@ void main(void) \n\ "; static const char *tr_modvol_shader_source = SHADER_HEADER "\ -#define LAST_PASS %d \n\ +#define MV_MODE %d \n\ #define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT " \n\ -uniform int volume_mode; \n\ +// Must match ModifierVolumeMode enum values \n\ +#define MV_XOR 0 \n\ +#define MV_OR 1 \n\ +#define MV_INCLUSION 2 \n\ +#define MV_EXCLUSION 3 \n\ void main(void) \n\ { \n\ -#if LAST_PASS == 0 \n\ +#if MV_MODE == MV_XOR || MV_MODE == MV_OR \n\ setFragDepth(); \n\ #endif \n\ ivec2 coords = ivec2(gl_FragCoord.xy); \n\ @@ -249,24 +251,20 @@ void main(void) \n\ uint stencil = pixels[idx].blend_stencil; \n\ if ((stencil & 0x80u) == 0x80u) \n\ { \n\ -#if LAST_PASS == 0 \n\ +#if MV_MODE == MV_XOR \n\ if (gl_FragDepth <= pixels[idx].depth) \n\ atomicXor(pixels[idx].blend_stencil, 2u); \n\ -#else \n\ - uint prev_val; \n\ - switch (volume_mode) \n\ - { \n\ - case 1: // Inclusion volume \n\ - prev_val = atomicAnd(pixels[idx].blend_stencil, 0xFFFFFFFDu); \n\ - if ((prev_val & 3u) == 2u) \n\ - pixels[idx].blend_stencil = bitfieldInsert(stencil, 1u, 0, 1); \n\ - break; \n\ - case 2: // Exclusion volume \n\ - prev_val = atomicAnd(pixels[idx].blend_stencil, 0xFFFFFFFCu); \n\ - if ((prev_val & 3u) == 1u) \n\ - pixels[idx].blend_stencil = bitfieldInsert(stencil, 1u, 0, 1); \n\ - break; \n\ - } \n\ +#elif MV_MODE == MV_OR \n\ + if (gl_FragDepth <= pixels[idx].depth) \n\ + atomicOr(pixels[idx].blend_stencil, 2u); \n\ +#elif MV_MODE == MV_INCLUSION \n\ + uint prev_val = atomicAnd(pixels[idx].blend_stencil, 0xFFFFFFFDu); \n\ + if ((prev_val & 3u) == 2u) \n\ + pixels[idx].blend_stencil = bitfieldInsert(stencil, 1u, 0, 1); \n\ +#elif MV_MODE == MV_EXCLUSION \n\ + uint prev_val = atomicAnd(pixels[idx].blend_stencil, 0xFFFFFFFCu); \n\ + if ((prev_val & 3u) == 1u) \n\ + pixels[idx].blend_stencil = bitfieldInsert(stencil, 1u, 0, 1); \n\ #endif \n\ } \n\ idx = pixels[idx].next; \n\ @@ -341,18 +339,14 @@ void initABuffer() } if (g_abuffer_clear_shader.program == 0) CompilePipelineShader(&g_abuffer_clear_shader, clear_shader_source); - if (g_abuffer_tr_modvol_shader.program == 0) + if (g_abuffer_tr_modvol_shaders[0].program == 0) { char source[16384]; - sprintf(source, tr_modvol_shader_source, 0); - CompilePipelineShader(&g_abuffer_tr_modvol_shader, source); - } - if (g_abuffer_tr_modvol_final_shader.program == 0) - { - char source[16384]; - sprintf(source, tr_modvol_shader_source, 1); - CompilePipelineShader(&g_abuffer_tr_modvol_final_shader, source); - volume_mode_uniform = glGetUniformLocation(g_abuffer_tr_modvol_final_shader.program, "volume_mode"); + for (int mode = 0; mode < ModeCount; mode++) + { + sprintf(source, tr_modvol_shader_source, mode); + CompilePipelineShader(&g_abuffer_tr_modvol_shaders[mode], source); + } } if (g_quadVertexArray == 0) @@ -448,56 +442,47 @@ void DrawTranslucentModVols(int first, int count) glcache.Disable(GL_DEPTH_TEST); glcache.Disable(GL_STENCIL_TEST); - glcache.UseProgram(g_abuffer_tr_modvol_final_shader.program); - ShaderUniforms.Set(&g_abuffer_tr_modvol_final_shader); - glcache.UseProgram(g_abuffer_tr_modvol_shader.program); - ShaderUniforms.Set(&g_abuffer_tr_modvol_shader); glCheck(); - u32 mod_base = 0; //cur start triangle - u32 mod_last = 0; //last merge - - u32 cmv_count = count - 1; - ISP_Modvol* params = &pvrrc.global_param_mvo_tr.head()[first]; + ModifierVolumeParam* params = &pvrrc.global_param_mvo_tr.head()[first]; glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); - //ISP_Modvol - for (u32 cmv = 0; cmv < cmv_count; cmv++) + for (u32 cmv = 0; cmv < count; cmv++) { + ModifierVolumeParam& param = params[cmv]; - ISP_Modvol ispc = params[cmv]; - mod_base = ispc.id; - if (mod_last == 0) - // FIXME Will this work if no OP modvols are drawn? - mod_last = mod_base; - - u32 sz = params[cmv + 1].id - mod_base; - if (sz == 0) + if (param.count == 0) continue; - u32 mv_mode = ispc.DepthMode; + u32 mv_mode = param.isp.DepthMode; - verify(mod_base > 0 && mod_base + sz <= pvrrc.modtrig.used()); + verify(param.first >= 0 && param.first + param.count <= pvrrc.modtrig.used()); - glcache.UseProgram(g_abuffer_tr_modvol_shader.program); - SetCull(ispc.CullMode); glCheck(); + PipelineShader *shader; + if (!param.isp.VolumeLast && mv_mode > 0) + shader = &g_abuffer_tr_modvol_shaders[Or]; // OR'ing (open volume or quad) + else + shader = &g_abuffer_tr_modvol_shaders[Xor]; // XOR'ing (closed volume) + glcache.UseProgram(shader->program); + ShaderUniforms.Set(shader); + + SetCull(param.isp.CullMode); glCheck(); glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - glDrawArrays(GL_TRIANGLES, mod_base * 3, sz * 3); glCheck(); + + glDrawArrays(GL_TRIANGLES, param.first * 3, param.count * 3); glCheck(); if (mv_mode == 1 || mv_mode == 2) { //Sum the area - glcache.UseProgram(g_abuffer_tr_modvol_final_shader.program); - glUniform1i(volume_mode_uniform, mv_mode); + shader = &g_abuffer_tr_modvol_shaders[mv_mode == 1 ? Inclusion : Exclusion]; + glcache.UseProgram(shader->program); + ShaderUniforms.Set(shader); glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); DrawQuad(); SetupModvolVBO(); - - //update pointers - mod_last = mod_base + 1; } } } diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index acbab18d7..c5c234b18 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -385,35 +385,46 @@ void DrawList(const List& gply, int first, int count, int pass) 10 -> 00 11 -> 01 */ -void SetMVS_Mode(u32 mv_mode,ISP_Modvol ispc) +void SetMVS_Mode(ModifierVolumeMode mv_mode, ISP_Modvol ispc) { - if (mv_mode==0) //normal trigs + if (mv_mode == Xor) { - //set states + // set states glcache.Enable(GL_DEPTH_TEST); - //write only bit 1 + // write only bit 1 glcache.StencilMask(2); - //no stencil testing - glcache.StencilFunc(GL_ALWAYS,0,2); - //count the number of pixels in front of the Z buffer (xor zpass) - glcache.StencilOp(GL_KEEP,GL_KEEP,GL_INVERT); + // no stencil testing + glcache.StencilFunc(GL_ALWAYS, 0, 2); + // count the number of pixels in front of the Z buffer (xor zpass) + glcache.StencilOp(GL_KEEP, GL_KEEP, GL_INVERT); - //Cull mode needs to be set + // Cull mode needs to be set + SetCull(ispc.CullMode); + } + else if (mv_mode == Or) + { + // set states + glcache.Enable(GL_DEPTH_TEST); + // write only bit 1 + glcache.StencilMask(2); + // no stencil testing + glcache.StencilFunc(GL_ALWAYS, 2, 2); + // Or'ing of all triangles + glcache.StencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + + // Cull mode needs to be set SetCull(ispc.CullMode); } else { - //1 (last in) or 2 (last out) - //each triangle forms the last of a volume + // Inclusion or Exclusion volume - //common states - - //no depth test + // no depth test glcache.Disable(GL_DEPTH_TEST); - //write bits 1:0 + // write bits 1:0 glcache.StencilMask(3); - if (mv_mode==1) + if (mv_mode == Inclusion) { // Inclusion volume //res : old : final @@ -422,9 +433,9 @@ void SetMVS_Mode(u32 mv_mode,ISP_Modvol ispc) //1 : 0 : 01 //1 : 1 : 01 - //if (1<=st) st=1; else st=0; + // if (1<=st) st=1; else st=0; glcache.StencilFunc(GL_LEQUAL,1,3); - glcache.StencilOp(GL_ZERO,GL_ZERO,GL_REPLACE); + glcache.StencilOp(GL_ZERO, GL_ZERO, GL_REPLACE); } else { @@ -442,7 +453,7 @@ void SetMVS_Mode(u32 mv_mode,ISP_Modvol ispc) // if (1 == st) st = 1; else st = 0; glcache.StencilFunc(GL_EQUAL, 1, 3); - glcache.StencilOp(GL_ZERO, GL_ZERO, GL_REPLACE); + glcache.StencilOp(GL_ZERO, GL_ZERO, GL_KEEP); } } } @@ -520,85 +531,35 @@ void DrawModVols(int first, int count) } else { - /* - mode : - normal trig : flip - last *in* : flip, merge*in* &clear from last merge - last *out* : flip, merge*out* &clear from last merge - */ + //Full emulation - /* + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - Do not write to color - Do not write to depth + ModifierVolumeParam* params = &pvrrc.global_param_mvo.head()[first]; - read from stencil bits 1:0 - write to stencil bits 1:0 - */ - - glColorMask(GL_FALSE,GL_FALSE,GL_FALSE,GL_FALSE); - - if (0) + for (u32 cmv = 0; cmv < count; cmv++) { - //simple single level stencil - glcache.Enable(GL_STENCIL_TEST); - glcache.StencilFunc(GL_ALWAYS,0x1,0x1); - glcache.StencilOp(GL_KEEP,GL_KEEP,GL_INVERT); - glcache.StencilMask(0x1); - SetCull(0); - glDrawArrays(GL_TRIANGLES, first, count * 3); - } - else if (true) - { - //Full emulation - //the *out* mode is buggy + ModifierVolumeParam& param = params[cmv]; - u32 mod_base=0; //cur start triangle - u32 mod_last=0; //last merge + if (param.count == 0) + continue; - u32 cmv_count = count - 1; - ISP_Modvol* params = &pvrrc.global_param_mvo.head()[first]; + u32 mv_mode = param.isp.DepthMode; - //ISP_Modvol - for (u32 cmv=0;cmv 0) + SetMVS_Mode(Or, param.isp); // OR'ing (open volume or quad) + else + SetMVS_Mode(Xor, param.isp); // XOR'ing (closed volume) + glDrawArrays(GL_TRIANGLES, param.first * 3, param.count * 3); + + if (mv_mode == 1 || mv_mode == 2) { - - ISP_Modvol ispc=params[cmv]; - mod_base=ispc.id; - u32 sz=params[cmv+1].id-mod_base; - if (sz == 0) - continue; - - u32 mv_mode = ispc.DepthMode; - - - if (mv_mode==0) //normal trigs - { - SetMVS_Mode(0,ispc); - //Render em (counts intersections) - //verifyc(dev->DrawPrimitiveUP(D3DPT_TRIANGLELIST,sz,pvrrc.modtrig.data+mod_base,3*4)); - glDrawArrays(GL_TRIANGLES,mod_base*3,sz*3); - } - else if (mv_mode<3) - { - while(sz) - { - //merge and clear all the prev. stencil bits - - //Count Intersections (last poly) - SetMVS_Mode(0,ispc); - glDrawArrays(GL_TRIANGLES,mod_base*3,3); - - //Sum the area - SetMVS_Mode(mv_mode,ispc); - glDrawArrays(GL_TRIANGLES,mod_last*3,(mod_base-mod_last+1)*3); - - //update pointers - mod_last=mod_base+1; - sz--; - mod_base++; - } - } + // Sum the area + SetMVS_Mode(mv_mode == 1 ? Inclusion : Exclusion, param.isp); + // Use the background poly as a quad to do the sum up + SetupMainVBO(); + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); + SetupModvolVBO(); } } diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index de96cf2af..698ddb4a7 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -275,3 +275,4 @@ void setFragDepth(void) \n\ " void SetupModvolVBO(); +enum ModifierVolumeMode { Xor, Or, Inclusion, Exclusion, ModeCount }; From 4bc4391d971b824741c4af391d07671c00c62030 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Thu, 28 Jun 2018 12:25:25 +0200 Subject: [PATCH 38/65] Set depth to 0 for translucent polys that use always depth func It seems that autosort translucent polys support the always depth func. In that case their depth should be ignored for sorting and they should be drawn last (sorted by poly number if there is more than one). This fixes graphic glitches in Psyvariar. Also observed in V-Rally 2. --- core/rend/gles/gles.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 270414f6d..c59742641 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -275,6 +275,8 @@ void main() \n\ #elif pp_DepthFunc == 6 // Less or equal \n\ if (gl_FragDepth > frontDepth) \n\ discard; \n\ + #elif pp_DepthFunc == 7 // Always \n\ + gl_FragDepth = 0; // Set depth to 0 so that it's drawn in front of everything else \n\ #endif \n\ #endif \n\ \n\ From c120f21c0bee01cc0b75812db9505ccac8b955eb Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Fri, 29 Jun 2018 17:34:04 +0200 Subject: [PATCH 39/65] Dump texture utility for debugging --- core/rend/gles/gltex.cpp | 71 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index e2897b33c..094499691 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -109,6 +109,75 @@ static void dumpRtTexture(u32 name, u32 w, u32 h) { free(rows); } +static void dumpTexture(int texID, int w, int h, GLuint textype) +{ + // Dump + char sname[256]; + sprintf(sname, "texdump/%d.png", texID); + FILE *fp = fopen(sname, "wb"); + if (fp == NULL) + return; + + u16 *src = (u16 *)temp_tex_buffer; + + png_bytepp rows = (png_bytepp)malloc(h * sizeof(png_bytep)); + for (int y = 0; y < h; y++) { + rows[y] = (png_bytep)malloc(w * 4); // 32-bit per pixel + u8 *dst = (u8 *)rows[y]; + if (textype == GL_UNSIGNED_SHORT_4_4_4_4) + for (int x = 0; x < w; x++) + { + *dst++ = ((*src >> 12) & 0xF) << 4; + *dst++ = ((*src >> 8) & 0xF) << 4; + *dst++ = ((*src >> 4) & 0xF) << 4; + *dst++ = (*src & 0xF) << 4; + src++; + } + else if (textype == GL_UNSIGNED_SHORT_5_6_5) + for (int x = 0; x < w; x++) + { + *dst++ = ((*src >> 11) & 0x1F) << 3; + *dst++ = ((*src >> 5) & 0x3F) << 3; + *dst++ = (*src & 0x1F) << 3; + *dst++ = 255; + src++; + } + if (textype == GL_UNSIGNED_SHORT_5_5_5_1) + for (int x = 0; x < w; x++) + { + *dst++ = ((*src >> 11) & 0x1F) << 3; + *dst++ = ((*src >> 6) & 0x1F) << 3; + *dst++ = ((*src >> 1) & 0x1F) << 3; + *dst++ = (*src & 1) ? 255 : 0; + src++; + } + } + + png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); + png_infop info_ptr = png_create_info_struct(png_ptr); + + png_init_io(png_ptr, fp); + + + // write header + png_set_IHDR(png_ptr, info_ptr, w, h, + 8, PNG_COLOR_TYPE_RGBA, PNG_INTERLACE_NONE, + PNG_COMPRESSION_TYPE_BASE, PNG_FILTER_TYPE_BASE); + + png_write_info(png_ptr, info_ptr); + + + // write bytes + png_write_image(png_ptr, rows); + + // end write + png_write_end(png_ptr, NULL); + fclose(fp); + + for (int y = 0; y < h; y++) + free(rows[y]); + free(rows); +} //Texture Cache :) struct TextureCacheData @@ -317,6 +386,8 @@ struct TextureCacheData glTexImage2D(GL_TEXTURE_2D, 0,comps , w, h, 0, comps, textype, temp_tex_buffer); if (tcw.MipMapped && settings.rend.UseMipmaps) glGenerateMipmap(GL_TEXTURE_2D); + + //dumpTexture(texID, w, h, textype); } else { #if FEAT_HAS_SOFTREND From e570a72c3d146ec864e5962025c9e29de6fda461 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Fri, 29 Jun 2018 17:39:38 +0200 Subject: [PATCH 40/65] Flat shading (non-gouraud) support --- core/rend/gles/gldraw.cpp | 9 ++++-- core/rend/gles/gles.cpp | 61 +++++++++++++++++++++++++++------------ core/rend/gles/gles.h | 3 +- 3 files changed, 51 insertions(+), 22 deletions(-) diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 5e07180ce..9d287349e 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -178,6 +178,7 @@ template 2, false, // TODO Can PT have two different textures for area 0 and 1 ?? 0, + false, pass); CurrentShader = gl.getShader(shaderId); if (CurrentShader->program == -1) { @@ -191,6 +192,7 @@ template CurrentShader->pp_FogCtrl = 2; CurrentShader->pp_TwoVolumes = false; CurrentShader->pp_DepthFunc = 0; + CurrentShader->pp_Gouraud = false; CurrentShader->pass = pass; CompilePipelineShader(CurrentShader); } @@ -220,6 +222,7 @@ template gp->tsp.FogCtrl, two_volumes_mode, depth_func, + gp->pcw.Gouraud, pass); CurrentShader = gl.getShader(shaderId); if (CurrentShader->program == -1) { @@ -233,6 +236,7 @@ template CurrentShader->pp_FogCtrl = gp->tsp.FogCtrl; CurrentShader->pp_TwoVolumes = two_volumes_mode; CurrentShader->pp_DepthFunc = depth_func; + CurrentShader->pp_Gouraud = gp->pcw.Gouraud; CurrentShader->pass = pass; CompilePipelineShader(CurrentShader); } @@ -652,6 +656,7 @@ void DrawStrips(GLuint output_fbo) //We use sampler 0 glActiveTexture(GL_TEXTURE0); glcache.Disable(GL_BLEND); + glProvokingVertex(GL_LAST_VERTEX_CONVENTION); RenderPass previous_pass = {0}; int render_pass_count = pvrrc.render_passes.used(); @@ -661,8 +666,8 @@ void DrawStrips(GLuint output_fbo) const RenderPass& current_pass = pvrrc.render_passes.head()[render_pass]; // Check if we can skip this pass, in part or completely, in case nothing is drawn (Cosmic Smash) - bool skip_op_pt = false; // true; - bool skip_tr = false; // true; + bool skip_op_pt = true; + bool skip_tr = true; for (int j = previous_pass.op_count; skip_op_pt && j < current_pass.op_count; j++) { if (pvrrc.global_param_op.head()[j].count > 2) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index c59742641..f11538aed 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -76,6 +76,14 @@ const char* VertexShaderSource = "#version 140 \n" #endif "\ +#define pp_Gouraud %d \n\ + \n\ +#if pp_Gouraud == 0 \n\ +#define INTERPOLATION flat \n\ +#else \n\ +#define INTERPOLATION smooth \n\ +#endif \n\ + \n\ /* Vertex constants*/ \n\ uniform highp vec4 scale; \n\ uniform highp vec4 depth_scale; \n\ @@ -88,13 +96,13 @@ uniform highp vec4 depth_scale; \n\ " attr " lowp vec4 in_offs1; \n\ " attr " mediump vec2 in_uv1; \n\ /* output */ \n\ -" vary " lowp vec4 vtx_base; \n\ -" vary " lowp vec4 vtx_offs; \n\ -" vary " mediump vec2 vtx_uv; \n\ -" vary " lowp vec4 vtx_base1; \n\ -" vary " lowp vec4 vtx_offs1; \n\ -" vary " mediump vec2 vtx_uv1; \n\ -" vary " mediump float vtx_z; \n\ +INTERPOLATION " vary " lowp vec4 vtx_base; \n\ +INTERPOLATION " vary " lowp vec4 vtx_offs; \n\ + " vary " mediump vec2 vtx_uv; \n\ +INTERPOLATION " vary " lowp vec4 vtx_base1; \n\ +INTERPOLATION " vary " lowp vec4 vtx_offs1; \n\ + " vary " mediump vec2 vtx_uv1; \n\ + " vary " mediump float vtx_z; \n\ void main() \n\ { \n\ vtx_base=in_base; \n\ @@ -194,6 +202,7 @@ const char* PixelPipelineShader = SHADER_HEADER #define pp_FogCtrl %d \n\ #define pp_TwoVolumes %d \n\ #define pp_DepthFunc %d \n\ +#define pp_Gouraud %d \n\ #define PASS %d \n" #ifndef GLES "\ @@ -206,6 +215,12 @@ const char* PixelPipelineShader = SHADER_HEADER #define IF(x) if (x) \n\ #else \n\ #define IF(x) \n\ +#endif \n\ + \n\ +#if pp_Gouraud == 0 \n\ +#define INTERPOLATION flat \n\ +#else \n\ +#define INTERPOLATION smooth \n\ #endif \n\ \n\ /* Shader program params*/ \n\ @@ -232,13 +247,13 @@ uniform int fog_control[2]; \n\ #endif \n\ \n\ /* Vertex input*/ \n\ -" vary " lowp vec4 vtx_base; \n\ -" vary " lowp vec4 vtx_offs; \n\ -" vary " mediump vec2 vtx_uv; \n\ -" vary " lowp vec4 vtx_base1; \n\ -" vary " lowp vec4 vtx_offs1; \n\ -" vary " mediump vec2 vtx_uv1; \n\ -" vary " mediump float vtx_z; \n\ +INTERPOLATION " vary " lowp vec4 vtx_base; \n\ +INTERPOLATION " vary " lowp vec4 vtx_offs; \n\ + " vary " mediump vec2 vtx_uv; \n\ +INTERPOLATION " vary " lowp vec4 vtx_base1; \n\ +INTERPOLATION " vary " lowp vec4 vtx_offs1; \n\ + " vary " mediump vec2 vtx_uv1; \n\ + " vary " mediump float vtx_z; \n\ lowp float fog_mode2(highp float w) \n\ { \n\ highp float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999); \n\ @@ -893,7 +908,7 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, - u32 pp_FogCtrl, bool pp_TwoVolumes, u32 pp_DepthFunc, int pass) + u32 pp_FogCtrl, bool pp_TwoVolumes, u32 pp_DepthFunc, bool pp_Gouraud, int pass) { u32 rv=0; @@ -907,6 +922,7 @@ int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, rv<<=2; rv|=pp_FogCtrl; rv <<= 1; rv |= (int)pp_TwoVolumes; rv <<= 3; rv |= pp_DepthFunc; + rv <<= 1; rv |= (int)pp_Gouraud; rv <<= 2; rv |= pass; return rv; @@ -914,13 +930,17 @@ int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipelineShader */) { + char vshader[16384]; + + sprintf(vshader, VertexShaderSource, s->pp_Gouraud); + char pshader[16384]; sprintf(pshader, source, s->cp_AlphaTest,s->pp_ClipTestMode,s->pp_UseAlpha, - s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pp_TwoVolumes, s->pp_DepthFunc, s->pass); + s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pp_TwoVolumes, s->pp_DepthFunc, s->pp_Gouraud, s->pass); - s->program=gl_CompileAndLink(VertexShaderSource,pshader); + s->program = gl_CompileAndLink(vshader, pshader); //setup texture 0 as the input for the shader GLint gu = glGetUniformLocation(s->program, "tex0"); @@ -999,12 +1019,15 @@ bool gl_create_resources() glGenBuffers(1, &gl.vbo.idxs); glGenBuffers(1, &gl.vbo.idxs2); - gl.modvol_shader.program=gl_CompileAndLink(VertexShaderSource,ModifierVolumeShader); + char vshader[16384]; + sprintf(vshader, VertexShaderSource, 1); + + gl.modvol_shader.program=gl_CompileAndLink(vshader, ModifierVolumeShader); gl.modvol_shader.scale = glGetUniformLocation(gl.modvol_shader.program, "scale"); gl.modvol_shader.depth_scale = glGetUniformLocation(gl.modvol_shader.program, "depth_scale"); - gl.OSD_SHADER.program=gl_CompileAndLink(VertexShaderSource,OSD_Shader); + gl.OSD_SHADER.program=gl_CompileAndLink(vshader, OSD_Shader); gl.OSD_SHADER.scale=glGetUniformLocation(gl.OSD_SHADER.program, "scale"); gl.OSD_SHADER.depth_scale=glGetUniformLocation(gl.OSD_SHADER.program, "depth_scale"); glUniform1i(glGetUniformLocation(gl.OSD_SHADER.program, "tex"),0); //bind osd texture to slot 0 diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 698ddb4a7..7d12b371a 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -74,6 +74,7 @@ struct PipelineShader u32 pp_DepthFunc; int pass; bool pp_TwoVolumes; + bool pp_Gouraud; }; @@ -144,7 +145,7 @@ GLuint BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt); void ReadRTTBuffer(); int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, - u32 pp_FogCtrl, bool two_volumes, u32 pp_DepthFunc, int pass); + u32 pp_FogCtrl, bool two_volumes, u32 pp_DepthFunc, bool pp_Gouraud, int pass); void SetCull(u32 CulliMode); struct ShaderUniforms_t From 0a4cdfb973f520911ab9e51bc3512b45e885f66f Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Fri, 29 Jun 2018 22:30:56 +0200 Subject: [PATCH 41/65] Support for dst/src select on TR polys Copy TR poly params to shader memory. Support src/dst select on TR polys. Fixes white areas in Evil Dead - Hail to the King. Limited support for two-volumes TR polys. --- core/rend/gles/abuffer.cpp | 97 ++++++++++++++++++++++++-------------- core/rend/gles/gldraw.cpp | 4 +- core/rend/gles/gles.cpp | 18 +++++-- core/rend/gles/gles.h | 90 ++++++++++++++++++++++++++++++++++- 4 files changed, 167 insertions(+), 42 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 85134e80e..811466de5 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -52,10 +52,10 @@ void bubbleSort(int array_size) { \n\ #if DEPTH_SORTED == 1 \n\ // depth then poly number \n\ if (pixel_list[j].depth < pixel_list[j + 1].depth \n\ - || (pixel_list[j].depth == pixel_list[j + 1].depth && pixel_list[j].seq_num > pixel_list[j + 1].seq_num)) { \n\ + || (pixel_list[j].depth == pixel_list[j + 1].depth && getPolyNumber(pixel_list[j]) > getPolyNumber(pixel_list[j + 1]))) { \n\ #else \n\ // poly number only \n\ - if (pixel_list[j].seq_num > pixel_list[j + 1].seq_num) { \n\ + if (getPolyNumber(pixel_list[j]) > getPolyNumber(pixel_list[j + 1])) { \n\ #endif \n\ Pixel p = pixel_list[j + 1]; \n\ pixel_list[j + 1] = pixel_list[j]; \n\ @@ -72,9 +72,9 @@ void insertionSort(int array_size) { \n\ Pixel p = pixel_list[i]; \n\ int j = i - 1; \n\ #if DEPTH_SORTED == 1 \n\ - for (; j >= 0 && (pixel_list[j].depth < p.depth || (pixel_list[j].depth == p.depth && pixel_list[j].seq_num > p.seq_num)); j--) { \n\ + for (; j >= 0 && (pixel_list[j].depth < p.depth || (pixel_list[j].depth == p.depth && getPolyNumber(pixel_list[j]) > getPolyNumber(p))); j--) { \n\ #else \n\ - for (; j >= 0 && pixel_list[j].seq_num > p.seq_num; j--) { \n\ + for (; j >= 0 && getPolyNumber(pixel_list[j]) > getPolyNumber(p); j--) { \n\ #endif \n\ pixel_list[j + 1] = pixel_list[j]; \n\ } \n\ @@ -92,11 +92,16 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ bubbleSort(num_frag); \n\ \n\ vec4 finalColor = texture(tex, gl_FragCoord.xy / textureSize(tex, 0)); \n\ + vec4 secondaryBuffer = vec4(0.0); // Secondary accumulation buffer \n\ float depth = 1.0; \n\ - for (int i = 0; i < num_frag; i++) { \n\ + \n\ + for (int i = 0; i < num_frag; i++) \n\ + { \n\ + const Pixel pixel = pixel_list[i]; \n\ + const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ #if DEPTH_SORTED != 1 \n\ - float frag_depth = pixel_list[i].depth; \n\ - switch ((int(pixel_list[i].blend_stencil) >> 16) & 7) \n\ + const float frag_depth = pixel.depth; \n\ + switch (getDepthFunc(pp)) \n\ { \n\ case 0: // Never \n\ continue; \n\ @@ -127,19 +132,33 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ case 7: // Always \n\ break; \n\ } \n\ - bool depth_mask = ((int(pixel_list[i].blend_stencil) >> 19) & 1) == 1; \n\ - if (depth_mask) \n\ + \n\ + if (getDepthMask(pp)) \n\ depth = frag_depth; \n\ #endif \n\ - vec4 srcColor = pixel_list[i].color; \n\ - if ((pixel_list[i].blend_stencil & 0x81u) == 0x81u) \n\ - srcColor.rgb *= shade_scale_factor; \n\ - float srcAlpha = srcColor.a; \n\ - float dstAlpha = finalColor.a; \n\ + bool area1 = false; \n\ + bool shadowed = false; \n\ + if (getShadowEnable(pp) && isShadowed(pixel)) \n\ + { \n\ + if (isTwoVolumes(pp)) \n\ + area1 = true; \n\ + else \n\ + shadowed = true; \n\ + } \n\ + vec4 srcColor; \n\ + if (getSrcSelect(pp, area1)) \n\ + srcColor = secondaryBuffer; \n\ + else \n\ + { \n\ + srcColor = pixel.color; \n\ + if (shadowed) \n\ + srcColor.rgb *= shade_scale_factor; \n\ + } \n\ + vec4 dstColor = getDstSelect(pp, area1) ? secondaryBuffer : finalColor; \n\ vec4 srcCoef; \n\ vec4 dstCoef; \n\ \n\ - int srcBlend = (int(pixel_list[i].blend_stencil) >> 11) & 7; \n\ + int srcBlend = getSrcBlendFunc(pp, area1); \n\ switch (srcBlend) \n\ { \n\ case ZERO: \n\ @@ -152,22 +171,22 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ srcCoef = finalColor; \n\ break; \n\ case INVERSE_OTHER_COLOR: \n\ - srcCoef = vec4(1.0) - finalColor; \n\ + srcCoef = vec4(1.0) - dstColor; \n\ break; \n\ case SRC_ALPHA: \n\ - srcCoef = vec4(srcAlpha); \n\ + srcCoef = vec4(srcColor.a); \n\ break; \n\ case INVERSE_SRC_ALPHA: \n\ - srcCoef = vec4(1.0 - srcAlpha); \n\ + srcCoef = vec4(1.0 - srcColor.a); \n\ break; \n\ case DST_ALPHA: \n\ - srcCoef = vec4(dstAlpha); \n\ + srcCoef = vec4(dstColor.a); \n\ break; \n\ case INVERSE_DST_ALPHA: \n\ - srcCoef = vec4(1.0 - dstAlpha); \n\ + srcCoef = vec4(1.0 - dstColor.a); \n\ break; \n\ } \n\ - int dstBlend = (int(pixel_list[i].blend_stencil) >> 8) & 7; \n\ + int dstBlend = getDstBlendFunc(pp, area1); \n\ switch (dstBlend) \n\ { \n\ case ZERO: \n\ @@ -183,19 +202,23 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ dstCoef = vec4(1.0) - srcColor; \n\ break; \n\ case SRC_ALPHA: \n\ - dstCoef = vec4(srcAlpha); \n\ + dstCoef = vec4(srcColor.a); \n\ break; \n\ case INVERSE_SRC_ALPHA: \n\ - dstCoef = vec4(1.0 - srcAlpha); \n\ + dstCoef = vec4(1.0 - srcColor.a); \n\ break; \n\ case DST_ALPHA: \n\ - dstCoef = vec4(dstAlpha); \n\ + dstCoef = vec4(dstColor.a); \n\ break; \n\ case INVERSE_DST_ALPHA: \n\ - dstCoef = vec4(1.0 - dstAlpha); \n\ + dstCoef = vec4(1.0 - dstColor.a); \n\ break; \n\ } \n\ - finalColor = clamp(finalColor * dstCoef + srcColor * srcCoef, 0.0, 1.0); \n\ + vec4 result = clamp(dstColor * dstCoef + srcColor * srcCoef, 0.0, 1.0); \n\ + if (getDstSelect(pp, area1)) \n\ + secondaryBuffer = result; \n\ + else \n\ + finalColor = result; \n\ } \n\ \n\ return finalColor; \n\ @@ -229,11 +252,13 @@ void main(void) \n\ static const char *tr_modvol_shader_source = SHADER_HEADER "\ #define MV_MODE %d \n\ #define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT " \n\ + \n\ // Must match ModifierVolumeMode enum values \n\ #define MV_XOR 0 \n\ #define MV_OR 1 \n\ #define MV_INCLUSION 2 \n\ #define MV_EXCLUSION 3 \n\ + \n\ void main(void) \n\ { \n\ #if MV_MODE == MV_XOR || MV_MODE == MV_OR \n\ @@ -248,23 +273,23 @@ void main(void) \n\ discard; \n\ int list_len = 0; \n\ while (idx != EOL) { \n\ - uint stencil = pixels[idx].blend_stencil; \n\ - if ((stencil & 0x80u) == 0x80u) \n\ + const PolyParam pp = tr_poly_params[getPolyNumber(pixels[idx])]; \n\ + if (getShadowEnable(pp)) \n\ { \n\ #if MV_MODE == MV_XOR \n\ if (gl_FragDepth <= pixels[idx].depth) \n\ - atomicXor(pixels[idx].blend_stencil, 2u); \n\ + atomicXor(pixels[idx].seq_num, 0x40000000); \n\ #elif MV_MODE == MV_OR \n\ if (gl_FragDepth <= pixels[idx].depth) \n\ - atomicOr(pixels[idx].blend_stencil, 2u); \n\ + atomicOr(pixels[idx].seq_num, 0x40000000); \n\ #elif MV_MODE == MV_INCLUSION \n\ - uint prev_val = atomicAnd(pixels[idx].blend_stencil, 0xFFFFFFFDu); \n\ - if ((prev_val & 3u) == 2u) \n\ - pixels[idx].blend_stencil = bitfieldInsert(stencil, 1u, 0, 1); \n\ + uint prev_val = atomicAnd(pixels[idx].seq_num, 0xBFFFFFFF); \n\ + if ((prev_val & 0xC0000000) == 0x40000000) \n\ + pixels[idx].seq_num = bitfieldInsert(pixels[idx].seq_num, 1, 31, 1); \n\ #elif MV_MODE == MV_EXCLUSION \n\ - uint prev_val = atomicAnd(pixels[idx].blend_stencil, 0xFFFFFFFCu); \n\ - if ((prev_val & 3u) == 1u) \n\ - pixels[idx].blend_stencil = bitfieldInsert(stencil, 1u, 0, 1); \n\ + uint prev_val = atomicAnd(pixels[idx].seq_num, 0x3FFFFFFF); \n\ + if ((prev_val & 0xC0000000) == 0x80000000) \n\ + pixels[idx].seq_num = bitfieldInsert(pixels[idx].seq_num, 1, 31, 1); \n\ #endif \n\ } \n\ idx = pixels[idx].next; \n\ diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 5e07180ce..c542faed1 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -661,8 +661,8 @@ void DrawStrips(GLuint output_fbo) const RenderPass& current_pass = pvrrc.render_passes.head()[render_pass]; // Check if we can skip this pass, in part or completely, in case nothing is drawn (Cosmic Smash) - bool skip_op_pt = false; // true; - bool skip_tr = false; // true; + bool skip_op_pt = true; + bool skip_tr = true; for (int j = previous_pass.op_count; skip_op_pt && j < current_pass.op_count; j++) { if (pvrrc.global_param_op.head()[j].count > 2) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index c59742641..89b9a0ec2 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -458,7 +458,6 @@ void main() \n\ pixel.color = color; \n\ pixel.depth = gl_FragDepth; \n\ pixel.seq_num = pp_Number; \n\ - pixel.blend_stencil = uint(depth_mask << 19) + uint(pp_DepthFunc << 16) + uint(((cur_blend_mode.x << 3) + cur_blend_mode.y) << 8) + pp_Stencil; \n\ pixel.next = imageAtomicExchange(abufferPointerImg, coords, idx); \n\ pixels[idx] = pixel; \n\ \n\ @@ -837,8 +836,8 @@ GLuint gl_CompileShader(const char* shader,GLuint type) GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) { //create shaders - GLuint vs=gl_CompileShader(VertexShader ,GL_VERTEX_SHADER); - GLuint ps=gl_CompileShader(FragmentShader ,GL_FRAGMENT_SHADER); + GLuint vs=gl_CompileShader(VertexShader, GL_VERTEX_SHADER); + GLuint ps=gl_CompileShader(FragmentShader, GL_FRAGMENT_SHADER); GLuint program = glCreateProgram(); glAttachShader(program, vs); @@ -1028,6 +1027,14 @@ bool gl_create_resources() osd_font = loadPNG(get_readonly_data_path("/font.png"), w, h); #endif + // Create the buffer for Translucent poly params + glGenBuffers(1, &gl.vbo.tr_poly_params); + // Bind it + glBindBuffer(GL_SHADER_STORAGE_BUFFER, gl.vbo.tr_poly_params); + // Declare storage + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, gl.vbo.tr_poly_params); + glCheck(); + return true; } @@ -1857,6 +1864,11 @@ bool RenderFrame() glBufferData(GL_ARRAY_BUFFER,pvrrc.modtrig.bytes(),pvrrc.modtrig.head(),GL_STREAM_DRAW); glCheck(); } + // TR PolyParam data + glBindBuffer(GL_SHADER_STORAGE_BUFFER, gl.vbo.tr_poly_params); + glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(struct PolyParam) * pvrrc.global_param_tr.used(), pvrrc.global_param_tr.head(), GL_STATIC_DRAW); + glCheck(); + int offs_x=ds2s_offs_x+0.5f; //this needs to be scaled diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 698ddb4a7..3b8057e23 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -109,6 +109,7 @@ struct gl_ctx #ifndef GLES GLuint vao; #endif + GLuint tr_poly_params; } vbo; PipelineShader *getShader(int programId) { @@ -239,7 +240,6 @@ struct Pixel { \n\ mediump vec4 color; \n\ mediump float depth; \n\ int seq_num; \n\ - uint blend_stencil; \n\ uint next; \n\ }; \n\ #define EOL 0xFFFFFFFFu \n\ @@ -272,6 +272,94 @@ void setFragDepth(void) \n\ highp float w = 100000.0 * gl_FragCoord.w; \n\ gl_FragDepth = 1.0 - log2(1.0 + w) / 34.0; \n\ } \n\ +struct PolyParam { \n\ + int first; \n\ + int count; \n\ + int texid; \n\ + int tsp; \n\ + int tcw; \n\ + int pcw; \n\ + int isp; \n\ + float zvZ; \n\ + int tileclip; \n\ + int tsp1; \n\ + int tcw1; \n\ + int texid1; \n\ +}; \n\ +layout (binding = 1, std430) readonly buffer TrPolyParamBuffer { \n\ + PolyParam tr_poly_params[]; \n\ +}; \n\ + \n\ +int getSrcBlendFunc(const in PolyParam pp, bool area1) \n\ +{ \n\ + return ((area1 ? pp.tsp1 : pp.tsp) >> 29) & 7; \n\ +} \n\ +\n\ +int getDstBlendFunc(const in PolyParam pp, bool area1) \n\ +{ \n\ + return ((area1 ? pp.tsp1 : pp.tsp) >> 26) & 7; \n\ +} \n\ +\n\ +bool getSrcSelect(const in PolyParam pp, bool area1) \n\ +{ \n\ + return (((area1 ? pp.tsp1 : pp.tsp) >> 25) & 1) != 0; \n\ +} \n\ +\n\ +bool getDstSelect(const in PolyParam pp, bool area1) \n\ +{ \n\ + return (((area1 ? pp.tsp1 : pp.tsp) >> 24) & 1) != 0; \n\ +} \n\ +\n\ +int getFogControl(const in PolyParam pp, bool area1) \n\ +{ \n\ + return ((area1 ? pp.tsp1 : pp.tsp) >> 22) & 3; \n\ +} \n\ +\n\ +bool getUseAlpha(const in PolyParam pp, bool area1) \n\ +{ \n\ + return (((area1 ? pp.tsp1 : pp.tsp) >> 20) & 1) != 0; \n\ +} \n\ +\n\ +bool getIgnoreTexAlpha(const in PolyParam pp, bool area1) \n\ +{ \n\ + return (((area1 ? pp.tsp1 : pp.tsp) >> 19) & 1) != 0; \n\ +} \n\ +\n\ +int getShadingInstruction(const in PolyParam pp, bool area1) \n\ +{ \n\ + return ((area1 ? pp.tsp1 : pp.tsp) >> 6) & 3; \n\ +} \n\ +\n\ +int getDepthFunc(const in PolyParam pp) \n\ +{ \n\ + return (pp.isp >> 29) & 7; \n\ +} \n\ +\n\ +bool getDepthMask(const in PolyParam pp) \n\ +{ \n\ + return ((pp.isp >> 26) & 1) != 1; \n\ +} \n\ +\n\ +bool getShadowEnable(const in PolyParam pp) \n\ +{ \n\ + return ((pp.pcw >> 7) & 1) != 0; \n\ +} \n\ +\n\ +int getPolyNumber(const in Pixel pixel) \n\ +{ \n\ + return pixel.seq_num & 0x3FFFFFFF; \n\ +} \n\ +\n\ +bool isShadowed(const in Pixel pixel) \n\ +{ \n\ + return (pixel.seq_num & 0x80000000) == 0x80000000; \n\ +} \n\ +\n\ +bool isTwoVolumes(const in PolyParam pp) \n\ +{ \n\ + return pp.tsp1 != 0xFFFFFFFF || pp.tcw1 != 0xFFFFFFFF; \n\ +} \n\ + \n\ " void SetupModvolVBO(); From 6436587c738e3cbd58f07764e85c2a09a96b4b11 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 30 Jun 2018 12:48:30 +0200 Subject: [PATCH 42/65] Don't add vertices at end of strip if not merging strips together. --- core/hw/pvr/ta_vtx.cpp | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index c313da6a3..fdfffb1aa 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -900,13 +900,6 @@ public: { CurrentPP->count=vdrc.idx.used() - CurrentPP->first; - int vbase=vdrc.verts.used(); - - *vdrc.idx.Append()=vbase-1; - *vdrc.idx.Append()=vbase; - - if (CurrentPP->count&1) - *vdrc.idx.Append()=vbase; #if STRIPS_AS_PPARAMS if (CurrentPPlist==&vdrc.global_param_tr) { @@ -916,7 +909,20 @@ public: d_pp->first=vdrc.idx.used(); d_pp->count=0; } + else + { #endif + int vbase=vdrc.verts.used(); + + *vdrc.idx.Append()=vbase-1; + *vdrc.idx.Append()=vbase; + + if (CurrentPP->count&1) + *vdrc.idx.Append()=vbase; +#if STRIPS_AS_PPARAMS + } +#endif + } From fac54519c078788441f449f785d1808f0b8c843f Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 30 Jun 2018 13:38:40 +0200 Subject: [PATCH 43/65] Fix autosort mode per render pass. Remove hacky Always depth on autosorted TR polys. --- core/rend/gles/gldraw.cpp | 11 ++++++++--- core/rend/gles/gles.cpp | 2 -- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index df34a3d9c..be7ff4824 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -773,7 +773,10 @@ void DrawStrips(GLuint output_fbo) glActiveTexture(GL_TEXTURE0); //Alpha blended - DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); // 3 because pass 2 is no more + if (current_pass.autosort) + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); // 3 because pass 2 is no more + else + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); // 3 because pass 2 is no more glCheck(); // Translucent modifier volumes @@ -790,7 +793,10 @@ void DrawStrips(GLuint output_fbo) glActiveTexture(GL_TEXTURE0); glcache.Enable(GL_DEPTH_TEST); - DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 0); + if (current_pass.autosort) + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 0); + else + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 0); // // PASS 3c: Render a-buffer to temporary texture @@ -832,7 +838,6 @@ void DrawStrips(GLuint output_fbo) glActiveTexture(GL_TEXTURE0); glBindSampler(0, 0); glBindTexture(GL_TEXTURE_2D, opaqueTexId); - renderABuffer(previous_pass.autosort); SetupMainVBO(); } diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 89b9a0ec2..07593a889 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -275,8 +275,6 @@ void main() \n\ #elif pp_DepthFunc == 6 // Less or equal \n\ if (gl_FragDepth > frontDepth) \n\ discard; \n\ - #elif pp_DepthFunc == 7 // Always \n\ - gl_FragDepth = 0; // Set depth to 0 so that it's drawn in front of everything else \n\ #endif \n\ #endif \n\ \n\ From 8e309b01aac0564005a6e22b00c04fc28a927fe8 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sat, 30 Jun 2018 16:33:46 +0200 Subject: [PATCH 44/65] Delete const and in qualifiers as they cause GL compile errors --- core/rend/gles/abuffer.cpp | 6 +++--- core/rend/gles/gles.h | 28 ++++++++++++++-------------- 2 files changed, 17 insertions(+), 17 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 811466de5..2eaa3b695 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -97,8 +97,8 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ \n\ for (int i = 0; i < num_frag; i++) \n\ { \n\ - const Pixel pixel = pixel_list[i]; \n\ - const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ + Pixel pixel = pixel_list[i]; \n\ + PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ #if DEPTH_SORTED != 1 \n\ const float frag_depth = pixel.depth; \n\ switch (getDepthFunc(pp)) \n\ @@ -273,7 +273,7 @@ void main(void) \n\ discard; \n\ int list_len = 0; \n\ while (idx != EOL) { \n\ - const PolyParam pp = tr_poly_params[getPolyNumber(pixels[idx])]; \n\ + PolyParam pp = tr_poly_params[getPolyNumber(pixels[idx])]; \n\ if (getShadowEnable(pp)) \n\ { \n\ #if MV_MODE == MV_XOR \n\ diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 3b8057e23..40bf9cd0a 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -290,72 +290,72 @@ layout (binding = 1, std430) readonly buffer TrPolyParamBuffer { \n\ PolyParam tr_poly_params[]; \n\ }; \n\ \n\ -int getSrcBlendFunc(const in PolyParam pp, bool area1) \n\ +int getSrcBlendFunc(PolyParam pp, bool area1) \n\ { \n\ return ((area1 ? pp.tsp1 : pp.tsp) >> 29) & 7; \n\ } \n\ \n\ -int getDstBlendFunc(const in PolyParam pp, bool area1) \n\ +int getDstBlendFunc(PolyParam pp, bool area1) \n\ { \n\ return ((area1 ? pp.tsp1 : pp.tsp) >> 26) & 7; \n\ } \n\ \n\ -bool getSrcSelect(const in PolyParam pp, bool area1) \n\ +bool getSrcSelect(PolyParam pp, bool area1) \n\ { \n\ return (((area1 ? pp.tsp1 : pp.tsp) >> 25) & 1) != 0; \n\ } \n\ \n\ -bool getDstSelect(const in PolyParam pp, bool area1) \n\ +bool getDstSelect(PolyParam pp, bool area1) \n\ { \n\ return (((area1 ? pp.tsp1 : pp.tsp) >> 24) & 1) != 0; \n\ } \n\ \n\ -int getFogControl(const in PolyParam pp, bool area1) \n\ +int getFogControl(PolyParam pp, bool area1) \n\ { \n\ return ((area1 ? pp.tsp1 : pp.tsp) >> 22) & 3; \n\ } \n\ \n\ -bool getUseAlpha(const in PolyParam pp, bool area1) \n\ +bool getUseAlpha(PolyParam pp, bool area1) \n\ { \n\ return (((area1 ? pp.tsp1 : pp.tsp) >> 20) & 1) != 0; \n\ } \n\ \n\ -bool getIgnoreTexAlpha(const in PolyParam pp, bool area1) \n\ +bool getIgnoreTexAlpha(PolyParam pp, bool area1) \n\ { \n\ return (((area1 ? pp.tsp1 : pp.tsp) >> 19) & 1) != 0; \n\ } \n\ \n\ -int getShadingInstruction(const in PolyParam pp, bool area1) \n\ +int getShadingInstruction(PolyParam pp, bool area1) \n\ { \n\ return ((area1 ? pp.tsp1 : pp.tsp) >> 6) & 3; \n\ } \n\ \n\ -int getDepthFunc(const in PolyParam pp) \n\ +int getDepthFunc(PolyParam pp) \n\ { \n\ return (pp.isp >> 29) & 7; \n\ } \n\ \n\ -bool getDepthMask(const in PolyParam pp) \n\ +bool getDepthMask(PolyParam pp) \n\ { \n\ return ((pp.isp >> 26) & 1) != 1; \n\ } \n\ \n\ -bool getShadowEnable(const in PolyParam pp) \n\ +bool getShadowEnable(PolyParam pp) \n\ { \n\ return ((pp.pcw >> 7) & 1) != 0; \n\ } \n\ \n\ -int getPolyNumber(const in Pixel pixel) \n\ +int getPolyNumber(Pixel pixel) \n\ { \n\ return pixel.seq_num & 0x3FFFFFFF; \n\ } \n\ \n\ -bool isShadowed(const in Pixel pixel) \n\ +bool isShadowed(Pixel pixel) \n\ { \n\ return (pixel.seq_num & 0x80000000) == 0x80000000; \n\ } \n\ \n\ -bool isTwoVolumes(const in PolyParam pp) \n\ +bool isTwoVolumes(PolyParam pp) \n\ { \n\ return pp.tsp1 != 0xFFFFFFFF || pp.tcw1 != 0xFFFFFFFF; \n\ } \n\ From f08ea640122eb5e1d8afe298a5b4d34a4087495b Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 1 Jul 2018 10:07:41 +0200 Subject: [PATCH 45/65] Added back const qualifiers. Workaround for GLSL compile error with NVidia drivers 396. --- core/rend/gles/abuffer.cpp | 17 +++++++++-------- core/rend/gles/gles.h | 28 ++++++++++++++-------------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 2eaa3b695..ddf5f4e8e 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -57,7 +57,7 @@ void bubbleSort(int array_size) { \n\ // poly number only \n\ if (getPolyNumber(pixel_list[j]) > getPolyNumber(pixel_list[j + 1])) { \n\ #endif \n\ - Pixel p = pixel_list[j + 1]; \n\ + const Pixel p = pixel_list[j + 1]; \n\ pixel_list[j + 1] = pixel_list[j]; \n\ pixel_list[j] = p; \n\ } \n\ @@ -69,7 +69,7 @@ void bubbleSort(int array_size) { \n\ // Insertion sort used to sort fragments \n\ void insertionSort(int array_size) { \n\ for (int i = 1; i < array_size; i++) { \n\ - Pixel p = pixel_list[i]; \n\ + const Pixel p = pixel_list[i]; \n\ int j = i - 1; \n\ #if DEPTH_SORTED == 1 \n\ for (; j >= 0 && (pixel_list[j].depth < p.depth || (pixel_list[j].depth == p.depth && getPolyNumber(pixel_list[j]) > getPolyNumber(p))); j--) { \n\ @@ -97,8 +97,8 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ \n\ for (int i = 0; i < num_frag; i++) \n\ { \n\ - Pixel pixel = pixel_list[i]; \n\ - PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ + const Pixel pixel = pixel_list[i]; \n\ + const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ #if DEPTH_SORTED != 1 \n\ const float frag_depth = pixel.depth; \n\ switch (getDepthFunc(pp)) \n\ @@ -214,7 +214,7 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ dstCoef = vec4(1.0 - dstColor.a); \n\ break; \n\ } \n\ - vec4 result = clamp(dstColor * dstCoef + srcColor * srcCoef, 0.0, 1.0); \n\ + const vec4 result = clamp(dstColor * dstCoef + srcColor * srcCoef, 0.0, 1.0); \n\ if (getDstSelect(pp, area1)) \n\ secondaryBuffer = result; \n\ else \n\ @@ -273,7 +273,8 @@ void main(void) \n\ discard; \n\ int list_len = 0; \n\ while (idx != EOL) { \n\ - PolyParam pp = tr_poly_params[getPolyNumber(pixels[idx])]; \n\ + const Pixel pixel = pixels[idx]; \n\ + const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ if (getShadowEnable(pp)) \n\ { \n\ #if MV_MODE == MV_XOR \n\ @@ -283,11 +284,11 @@ void main(void) \n\ if (gl_FragDepth <= pixels[idx].depth) \n\ atomicOr(pixels[idx].seq_num, 0x40000000); \n\ #elif MV_MODE == MV_INCLUSION \n\ - uint prev_val = atomicAnd(pixels[idx].seq_num, 0xBFFFFFFF); \n\ + int prev_val = atomicAnd(pixels[idx].seq_num, 0xBFFFFFFF); \n\ if ((prev_val & 0xC0000000) == 0x40000000) \n\ pixels[idx].seq_num = bitfieldInsert(pixels[idx].seq_num, 1, 31, 1); \n\ #elif MV_MODE == MV_EXCLUSION \n\ - uint prev_val = atomicAnd(pixels[idx].seq_num, 0x3FFFFFFF); \n\ + int prev_val = atomicAnd(pixels[idx].seq_num, 0x3FFFFFFF); \n\ if ((prev_val & 0xC0000000) == 0x80000000) \n\ pixels[idx].seq_num = bitfieldInsert(pixels[idx].seq_num, 1, 31, 1); \n\ #endif \n\ diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 40bf9cd0a..bd15614d6 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -290,72 +290,72 @@ layout (binding = 1, std430) readonly buffer TrPolyParamBuffer { \n\ PolyParam tr_poly_params[]; \n\ }; \n\ \n\ -int getSrcBlendFunc(PolyParam pp, bool area1) \n\ +int getSrcBlendFunc(const PolyParam pp, bool area1) \n\ { \n\ return ((area1 ? pp.tsp1 : pp.tsp) >> 29) & 7; \n\ } \n\ \n\ -int getDstBlendFunc(PolyParam pp, bool area1) \n\ +int getDstBlendFunc(const PolyParam pp, bool area1) \n\ { \n\ return ((area1 ? pp.tsp1 : pp.tsp) >> 26) & 7; \n\ } \n\ \n\ -bool getSrcSelect(PolyParam pp, bool area1) \n\ +bool getSrcSelect(const PolyParam pp, bool area1) \n\ { \n\ return (((area1 ? pp.tsp1 : pp.tsp) >> 25) & 1) != 0; \n\ } \n\ \n\ -bool getDstSelect(PolyParam pp, bool area1) \n\ +bool getDstSelect(const PolyParam pp, bool area1) \n\ { \n\ return (((area1 ? pp.tsp1 : pp.tsp) >> 24) & 1) != 0; \n\ } \n\ \n\ -int getFogControl(PolyParam pp, bool area1) \n\ +int getFogControl(const PolyParam pp, bool area1) \n\ { \n\ return ((area1 ? pp.tsp1 : pp.tsp) >> 22) & 3; \n\ } \n\ \n\ -bool getUseAlpha(PolyParam pp, bool area1) \n\ +bool getUseAlpha(const PolyParam pp, bool area1) \n\ { \n\ return (((area1 ? pp.tsp1 : pp.tsp) >> 20) & 1) != 0; \n\ } \n\ \n\ -bool getIgnoreTexAlpha(PolyParam pp, bool area1) \n\ +bool getIgnoreTexAlpha(const PolyParam pp, bool area1) \n\ { \n\ return (((area1 ? pp.tsp1 : pp.tsp) >> 19) & 1) != 0; \n\ } \n\ \n\ -int getShadingInstruction(PolyParam pp, bool area1) \n\ +int getShadingInstruction(const PolyParam pp, bool area1) \n\ { \n\ return ((area1 ? pp.tsp1 : pp.tsp) >> 6) & 3; \n\ } \n\ \n\ -int getDepthFunc(PolyParam pp) \n\ +int getDepthFunc(const PolyParam pp) \n\ { \n\ return (pp.isp >> 29) & 7; \n\ } \n\ \n\ -bool getDepthMask(PolyParam pp) \n\ +bool getDepthMask(const PolyParam pp) \n\ { \n\ return ((pp.isp >> 26) & 1) != 1; \n\ } \n\ \n\ -bool getShadowEnable(PolyParam pp) \n\ +bool getShadowEnable(const PolyParam pp) \n\ { \n\ return ((pp.pcw >> 7) & 1) != 0; \n\ } \n\ \n\ -int getPolyNumber(Pixel pixel) \n\ +int getPolyNumber(const Pixel pixel) \n\ { \n\ return pixel.seq_num & 0x3FFFFFFF; \n\ } \n\ \n\ -bool isShadowed(Pixel pixel) \n\ +bool isShadowed(const Pixel pixel) \n\ { \n\ return (pixel.seq_num & 0x80000000) == 0x80000000; \n\ } \n\ \n\ -bool isTwoVolumes(PolyParam pp) \n\ +bool isTwoVolumes(const PolyParam pp) \n\ { \n\ return pp.tsp1 != 0xFFFFFFFF || pp.tcw1 != 0xFFFFFFFF; \n\ } \n\ From eea20d9942d7e481d994161c1e1f05eda0fe6dbc Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 1 Jul 2018 21:18:21 +0200 Subject: [PATCH 46/65] Remove unneeded shader uniforms and params --- core/rend/gles/gldraw.cpp | 5 +---- core/rend/gles/gles.cpp | 7 ------- core/rend/gles/gles.h | 10 ---------- 3 files changed, 1 insertion(+), 21 deletions(-) diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index a98312f5c..9344d81a0 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -255,6 +255,7 @@ template ShaderUniforms.tsp1.SrcInstr = 1; ShaderUniforms.tsp1.DstInstr = 0; } + ShaderUniforms.Set(CurrentShader); SetTileClip(gp->tileclip,true); @@ -264,10 +265,6 @@ template glcache.StencilFunc(GL_ALWAYS,stencil,stencil); - ShaderUniforms.stencil = stencil; - ShaderUniforms.depth_mask = gp->isp.ZWriteDis == 0; - ShaderUniforms.Set(CurrentShader); - if (CurrentShader->pp_Texture) { for (int i = 0; i < 2; i++) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 7f4fdcef3..ff361d278 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -102,7 +102,6 @@ INTERPOLATION " vary " lowp vec4 vtx_offs; \n\ INTERPOLATION " vary " lowp vec4 vtx_base1; \n\ INTERPOLATION " vary " lowp vec4 vtx_offs1; \n\ " vary " mediump vec2 vtx_uv1; \n\ - " vary " mediump float vtx_z; \n\ void main() \n\ { \n\ vtx_base=in_base; \n\ @@ -112,7 +111,6 @@ void main() \n\ vtx_offs1 = in_offs1; \n\ vtx_uv1 = in_uv1; \n\ vec4 vpos=in_pos; \n\ - vtx_z = vpos.z; \n\ vpos.w=1.0/vpos.z; \n" #ifndef GLES "\ @@ -235,8 +233,6 @@ layout(binding = 5) uniform sampler2D fog_table; \n\ uniform int pp_Number; \n\ uniform usampler2D shadow_stencil; \n\ uniform sampler2D DepthTex; \n\ -uniform uint pp_Stencil; \n\ -uniform bool depth_mask; \n\ \n\ uniform ivec2 blend_mode[2]; \n\ #if pp_TwoVolumes == 1 \n\ @@ -253,7 +249,6 @@ INTERPOLATION " vary " lowp vec4 vtx_offs; \n\ INTERPOLATION " vary " lowp vec4 vtx_base1; \n\ INTERPOLATION " vary " lowp vec4 vtx_offs1; \n\ " vary " mediump vec2 vtx_uv1; \n\ - " vary " mediump float vtx_z; \n\ lowp float fog_mode2(highp float w) \n\ { \n\ highp float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999); \n\ @@ -985,14 +980,12 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe glUniform1i(gu, 3); // GL_TEXTURE3 s->pp_Number = glGetUniformLocation(s->program, "pp_Number"); - s->pp_Stencil = glGetUniformLocation(s->program, "pp_Stencil"); s->blend_mode = glGetUniformLocation(s->program, "blend_mode"); s->use_alpha = glGetUniformLocation(s->program, "use_alpha"); s->ignore_tex_alpha = glGetUniformLocation(s->program, "ignore_tex_alpha"); s->shading_instr = glGetUniformLocation(s->program, "shading_instr"); s->fog_control = glGetUniformLocation(s->program, "fog_control"); - s->depth_mask = glGetUniformLocation(s->program, "depth_mask"); return glIsProgram(s->program)==GL_TRUE; } diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index e8855b18b..ceb6f71e7 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -60,13 +60,11 @@ struct PipelineShader GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY; GLuint shade_scale_factor; GLuint pp_Number; - GLuint pp_Stencil; GLuint blend_mode; GLuint use_alpha; GLuint ignore_tex_alpha; GLuint shading_instr; GLuint fog_control; - GLuint depth_mask; // u32 cp_AlphaTest; s32 pp_ClipTestMode; @@ -158,12 +156,10 @@ struct ShaderUniforms_t float ps_FOG_COL_RAM[3]; float ps_FOG_COL_VERT[3]; int poly_number; - u32 stencil; TSP tsp0; TSP tsp1; TCW tcw0; TCW tcw1; - bool depth_mask; void setUniformArray(GLuint location, int v0, int v1) { @@ -213,12 +209,6 @@ struct ShaderUniforms_t if (s->pp_Number != -1) glUniform1i(s->pp_Number, poly_number); - - if (s->pp_Stencil != -1) - glUniform1ui(s->pp_Stencil, stencil); - - if (s->depth_mask != -1) - glUniform1i(s->depth_mask, depth_mask); } }; From 5d71987193c5152d6b56ae2e18accc3027d9f4ec Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 1 Jul 2018 22:03:00 +0200 Subject: [PATCH 47/65] Use correct data types for a-buffer pointers texture. Might help non-NVidia drivers. --- core/rend/gles/abuffer.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index ddf5f4e8e..c173bc0f9 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -319,8 +319,7 @@ void initABuffer() glBindTexture(GL_TEXTURE_2D, pixels_pointers); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - //Uses GL_R32F instead of GL_R32I that is not working in R257.15 - glTexImage2D(GL_TEXTURE_2D, 0, GL_R32F, g_imageWidth, g_imageHeight, 0, GL_RED, GL_FLOAT, 0); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, g_imageWidth, g_imageHeight, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, 0); glBindImageTexture(4, pixels_pointers, 0, false, 0, GL_READ_WRITE, GL_R32UI); glCheck(); } From 9fdfd7045836729f88ae511e40edbdb126f33174 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 1 Jul 2018 22:51:58 +0200 Subject: [PATCH 48/65] Might help non-NVidia GPU. --- core/rend/gles/gles.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index ceb6f71e7..9240a8a93 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -226,7 +226,7 @@ extern GLuint depthSaveTexId; #define SHADER_HEADER "#version 430 \n\ \n\ -layout(size1x32, binding = 4) uniform coherent restrict uimage2D abufferPointerImg; \n\ +layout(r32ui, binding = 4) uniform coherent restrict uimage2D abufferPointerImg; \n\ struct Pixel { \n\ mediump vec4 color; \n\ mediump float depth; \n\ From 292ff84e2253d77efaff906151dde786709eab39 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 3 Jul 2018 17:07:48 +0200 Subject: [PATCH 49/65] Improve performance of translucent modifier volumes. Same optimization as opaque modvols: use triangles instead of screen quad. Makes NBA 2K2 and NFL 2K2 playable. --- core/rend/gles/abuffer.cpp | 3 +-- core/rend/gles/gldraw.cpp | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index c173bc0f9..da396dd3f 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -506,8 +506,7 @@ void DrawTranslucentModVols(int first, int count) ShaderUniforms.Set(shader); glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - DrawQuad(); - SetupModvolVBO(); + glDrawArrays(GL_TRIANGLES, param.first * 3, param.count * 3); glCheck(); } } } diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index a2a993ed7..ddc0bbbc9 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -770,7 +770,6 @@ void DrawStrips(GLuint output_fbo) // // PASS 3: Render TR to a-buffers // - SetupMainVBO(); glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); glcache.Disable(GL_DEPTH_TEST); From c535e980992a12b40ca9634f382635a820ad5f90 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 3 Jul 2018 20:59:31 +0200 Subject: [PATCH 50/65] Fix previous merge --- core/rend/gles/gles.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 87fc40816..084d2d2ff 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -299,7 +299,18 @@ extern struct ShaderUniforms_t float fog_den_float; float ps_FOG_COL_RAM[3]; float ps_FOG_COL_VERT[3]; + int poly_number; float trilinear_alpha; + TSP tsp0; + TSP tsp1; + TCW tcw0; + TCW tcw1; + + void setUniformArray(GLuint location, int v0, int v1) + { + int array[] = { v0, v1 }; + glUniform1iv(location, 2, array); + } void Set(PipelineShader* s) { @@ -321,6 +332,29 @@ extern struct ShaderUniforms_t if (s->sp_FOG_COL_VERT!=-1) glUniform3fv( s->sp_FOG_COL_VERT, 1, ps_FOG_COL_VERT); + if (s->shade_scale_factor != -1) + glUniform1f(s->shade_scale_factor, FPU_SHAD_SCALE.scale_factor / 256.f); + + if (s->blend_mode != -1) { + u32 blend_mode[] = { tsp0.SrcInstr, tsp0.DstInstr, tsp1.SrcInstr, tsp1.DstInstr }; + glUniform2iv(s->blend_mode, 2, (GLint *)blend_mode); + } + + if (s->use_alpha != -1) + setUniformArray(s->use_alpha, tsp0.UseAlpha, tsp1.UseAlpha); + + if (s->ignore_tex_alpha != -1) + setUniformArray(s->ignore_tex_alpha, tsp0.IgnoreTexA, tsp1.IgnoreTexA); + + if (s->shading_instr != -1) + setUniformArray(s->shading_instr, tsp0.ShadInstr, tsp1.ShadInstr); + + if (s->fog_control != -1) + setUniformArray(s->fog_control, tsp0.FogCtrl, tsp1.FogCtrl); + + if (s->pp_Number != -1) + glUniform1i(s->pp_Number, poly_number); + if (s->trilinear_alpha != -1) glUniform1f(s->trilinear_alpha, trilinear_alpha); } From 4e5006fbfef24518f3a4a00f2d22912085b2feca Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 3 Jul 2018 21:05:11 +0200 Subject: [PATCH 51/65] Get rid of GLES #defines --- core/rend/gles/gldraw.cpp | 4 - core/rend/gles/gles.cpp | 486 +++++++++++++------------------------- core/rend/gles/gles.h | 37 --- 3 files changed, 164 insertions(+), 363 deletions(-) diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index f0743ea8d..f1f8f9fb8 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -472,9 +472,7 @@ void SetMVS_Mode(ModifierVolumeMode mv_mode, ISP_Modvol ispc) void SetupMainVBO() { -#ifndef GLES glBindVertexArray(gl.vbo.vao); -#endif glBindBuffer(GL_ARRAY_BUFFER, gl.vbo.geometry); glCheck(); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl.vbo.idxs); glCheck(); @@ -504,9 +502,7 @@ void SetupMainVBO() void SetupModvolVBO() { -#ifndef GLES glBindVertexArray(gl.vbo.vao); -#endif glBindBuffer(GL_ARRAY_BUFFER, gl.vbo.modvols); glCheck(); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index c0be6424c..7d1dc773b 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -15,12 +15,10 @@ int fbdev = -1; #endif -#ifndef GLES #if HOST_OS != OS_DARWIN #include #pragma comment(lib,"Opengl32.lib") #endif -#endif /* GL|ES 2 @@ -61,21 +59,14 @@ Tile clip float fb_scale_x,fb_scale_y; float scale_x, scale_y; -#ifndef GLES #define attr "in" #define vary "out" -#else -#define attr "attribute" -#define vary "varying" -#endif //Fragment and vertex shaders code //pretty much 1:1 copy of the d3d ones for now const char* VertexShaderSource = -#ifndef GLES - "#version 140 \n" -#endif "\ +#version 140 \n\ #define pp_Gouraud %d \n\ \n\ #if pp_Gouraud == 0 \n\ @@ -111,19 +102,12 @@ void main() \n\ vtx_offs1 = in_offs1; \n\ vtx_uv1 = in_uv1; \n\ vec4 vpos=in_pos; \n\ - vpos.w=1.0/vpos.z; \n" -#ifndef GLES - "\ + vpos.w=1.0/vpos.z; \n\ if (vpos.w < 0.0) { \n\ gl_Position = vec4(0.0, 0.0, 0.0, vpos.w); \n\ return; \n\ } \n\ - vpos.z = vpos.w; \n" -#else - "\ - vpos.z=depth_scale.x+depth_scale.y*vpos.w; \n" -#endif - "\ + vpos.z = vpos.w; \n\ vpos.xy=vpos.xy*scale.xy-scale.zw; \n\ vpos.xy*=vpos.w; \n\ gl_Position = vpos; \n\ @@ -178,14 +162,9 @@ lowp float fog_mode2(highp float invW) \n\ } \n\ */ -#ifndef GLES #define FRAGCOL "FragColor" #define TEXLOOKUP "texture" #define vary "in" -#else -#define FRAGCOL "gl_FragColor" -#define TEXLOOKUP "texture2D" -#endif const char* PixelPipelineShader = SHADER_HEADER @@ -204,14 +183,11 @@ const char* PixelPipelineShader = SHADER_HEADER #define pp_BumpMap %d \n\ #define PASS %d \n\ #define PI 3.1415926 \n\ - \n" -#ifndef GLES - "\ - #if PASS <= 1 \n\ - out vec4 FragColor; \n\ - #endif \n" -#endif -"\ + \n\ +#if PASS <= 1 \n\ +out vec4 FragColor; \n\ +#endif \n\ + \n\ #if pp_TwoVolumes == 1 \n\ #define IF(x) if (x) \n\ #else \n\ @@ -225,7 +201,6 @@ const char* PixelPipelineShader = SHADER_HEADER #endif \n\ \n\ /* Shader program params*/ \n\ -/* gles has no alpha test stage, so its emulated on the shader */ \n\ uniform lowp float cp_AlphaTestValue; \n\ uniform lowp vec4 pp_ClipTest; \n\ uniform lowp vec3 sp_FOG_COL_RAM,sp_FOG_COL_VERT; \n\ @@ -497,11 +472,10 @@ void main() \n\ }"; const char* OSD_Shader = -#ifndef GLES - "#version 140 \n" - "out vec4 FragColor; \n" -#endif " \ +#version 140 \n\ +out vec4 FragColor; \n\ + \n\ " vary " lowp vec4 vtx_base; \n\ " vary " mediump vec2 vtx_uv; \n\ /* Vertex input*/ \n\ @@ -521,304 +495,187 @@ int screen_height; GLuint fogTextureId; #if (HOST_OS != OS_DARWIN) && !defined(TARGET_NACL32) -#if defined(GLES) && !defined(USE_SDL) - // Create a basic GLES context - bool gl_init(void* wind, void* disp) + +#if HOST_OS == OS_WINDOWS + #define WGL_DRAW_TO_WINDOW_ARB 0x2001 + #define WGL_ACCELERATION_ARB 0x2003 + #define WGL_SWAP_METHOD_ARB 0x2007 + #define WGL_SUPPORT_OPENGL_ARB 0x2010 + #define WGL_DOUBLE_BUFFER_ARB 0x2011 + #define WGL_PIXEL_TYPE_ARB 0x2013 + #define WGL_COLOR_BITS_ARB 0x2014 + #define WGL_DEPTH_BITS_ARB 0x2022 + #define WGL_STENCIL_BITS_ARB 0x2023 + #define WGL_FULL_ACCELERATION_ARB 0x2027 + #define WGL_SWAP_EXCHANGE_ARB 0x2028 + #define WGL_TYPE_RGBA_ARB 0x202B + #define WGL_CONTEXT_MAJOR_VERSION_ARB 0x2091 + #define WGL_CONTEXT_MINOR_VERSION_ARB 0x2092 + #define WGL_CONTEXT_FLAGS_ARB 0x2094 + + #define WGL_CONTEXT_PROFILE_MASK_ARB 0x9126 + #define WGL_CONTEXT_MAJOR_VERSION_ARB 0x2091 + #define WGL_CONTEXT_MINOR_VERSION_ARB 0x2092 + #define WGL_CONTEXT_LAYER_PLANE_ARB 0x2093 + #define WGL_CONTEXT_FLAGS_ARB 0x2094 + #define WGL_CONTEXT_DEBUG_BIT_ARB 0x0001 + #define WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x0002 + #define ERROR_INVALID_VERSION_ARB 0x2095 + #define WGL_CONTEXT_CORE_PROFILE_BIT_ARB 0x00000001 + + typedef BOOL (WINAPI * PFNWGLCHOOSEPIXELFORMATARBPROC) (HDC hdc, const int *piAttribIList, const FLOAT *pfAttribFList, UINT nMaxFormats, + int *piFormats, UINT *nNumFormats); + typedef HGLRC (WINAPI * PFNWGLCREATECONTEXTATTRIBSARBPROC) (HDC hDC, HGLRC hShareContext, const int *attribList); + typedef BOOL (WINAPI * PFNWGLSWAPINTERVALEXTPROC) (int interval); + + PFNWGLCHOOSEPIXELFORMATARBPROC wglChoosePixelFormatARB; + PFNWGLCREATECONTEXTATTRIBSARBPROC wglCreateContextAttribsARB; + PFNWGLSWAPINTERVALEXTPROC wglSwapIntervalEXT; + + + HDC ourWindowHandleToDeviceContext; + bool gl_init(void* hwnd, void* hdc) { - #if !defined(_ANDROID) - gl.setup.native_wind=(EGLNativeWindowType)wind; - gl.setup.native_disp=(EGLNativeDisplayType)disp; - - //try to get a display - gl.setup.display = eglGetDisplay(gl.setup.native_disp); - - //if failed, get the default display (this will not happen in win32) - if(gl.setup.display == EGL_NO_DISPLAY) - gl.setup.display = eglGetDisplay((EGLNativeDisplayType) EGL_DEFAULT_DISPLAY); - - - // Initialise EGL - EGLint maj, min; - if (!eglInitialize(gl.setup.display, &maj, &min)) + PIXELFORMATDESCRIPTOR pfd = { - printf("EGL Error: eglInitialize failed\n"); - return false; + sizeof(PIXELFORMATDESCRIPTOR), + 1, + PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER, //Flags + PFD_TYPE_RGBA, //The kind of framebuffer. RGBA or palette. + 32, //Colordepth of the framebuffer. + 0, 0, 0, 0, 0, 0, + 0, + 0, + 0, + 0, 0, 0, 0, + 24, //Number of bits for the depthbuffer + 8, //Number of bits for the stencilbuffer + 0, //Number of Aux buffers in the framebuffer. + PFD_MAIN_PLANE, + 0, + 0, 0, 0 + }; + + /*HDC*/ ourWindowHandleToDeviceContext = (HDC)hdc;//GetDC((HWND)hwnd); + + int letWindowsChooseThisPixelFormat; + letWindowsChooseThisPixelFormat = ChoosePixelFormat(ourWindowHandleToDeviceContext, &pfd); + SetPixelFormat(ourWindowHandleToDeviceContext,letWindowsChooseThisPixelFormat, &pfd); + + HGLRC ourOpenGLRenderingContext = wglCreateContext(ourWindowHandleToDeviceContext); + wglMakeCurrent (ourWindowHandleToDeviceContext, ourOpenGLRenderingContext); + + bool rv = true; + + if (rv) { + + wglChoosePixelFormatARB = (PFNWGLCHOOSEPIXELFORMATARBPROC)wglGetProcAddress("wglChoosePixelFormatARB"); + if(!wglChoosePixelFormatARB) + { + return false; + } + + wglCreateContextAttribsARB = (PFNWGLCREATECONTEXTATTRIBSARBPROC)wglGetProcAddress("wglCreateContextAttribsARB"); + if(!wglCreateContextAttribsARB) + { + return false; + } + + wglSwapIntervalEXT = (PFNWGLSWAPINTERVALEXTPROC)wglGetProcAddress("wglSwapIntervalEXT"); + if(!wglSwapIntervalEXT) + { + return false; + } + + int attribs[] = + { + WGL_CONTEXT_MAJOR_VERSION_ARB, 3, + WGL_CONTEXT_MINOR_VERSION_ARB, 1, + WGL_CONTEXT_FLAGS_ARB, WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB, + WGL_CONTEXT_PROFILE_MASK_ARB, WGL_CONTEXT_CORE_PROFILE_BIT_ARB, + 0 + }; + + HGLRC m_hrc = wglCreateContextAttribsARB(ourWindowHandleToDeviceContext,0, attribs); + + if (m_hrc) + wglMakeCurrent(ourWindowHandleToDeviceContext,m_hrc); + else + rv = false; + + wglDeleteContext(ourOpenGLRenderingContext); } - printf("Info: EGL version %d.%d\n",maj,min); - - - - EGLint pi32ConfigAttribs[] = { EGL_SURFACE_TYPE, EGL_WINDOW_BIT, EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT , EGL_DEPTH_SIZE, 24, EGL_STENCIL_SIZE, 8, EGL_NONE }; - EGLint pi32ContextAttribs[] = { EGL_CONTEXT_CLIENT_VERSION, 2 , EGL_NONE }; - - int num_config; - - EGLConfig config; - if (!eglChooseConfig(gl.setup.display, pi32ConfigAttribs, &config, 1, &num_config) || (num_config != 1)) - { - printf("EGL Error: eglChooseConfig failed\n"); - return false; + if (rv) { + rv = gl3wInit() != -1 && gl3wIsSupported(3, 1); } - gl.setup.surface = eglCreateWindowSurface(gl.setup.display, config, (EGLNativeWindowType)wind, NULL); + RECT r; + GetClientRect((HWND)hwnd, &r); + screen_width = r.right - r.left; + screen_height = r.bottom - r.top; - if (eglCheck()) - return false; - - eglBindAPI(EGL_OPENGL_ES_API); - if (eglCheck()) - return false; - - gl.setup.context = eglCreateContext(gl.setup.display, config, NULL, pi32ContextAttribs); - - if (eglCheck()) - return false; - - #endif - - eglMakeCurrent(gl.setup.display, gl.setup.surface, gl.setup.surface, gl.setup.context); - - if (eglCheck()) - return false; - - EGLint w,h; - eglQuerySurface(gl.setup.display, gl.setup.surface, EGL_WIDTH, &w); - eglQuerySurface(gl.setup.display, gl.setup.surface, EGL_HEIGHT, &h); - - screen_width=w; - screen_height=h; - - printf("EGL config: %08X, %08X, %08X %dx%d\n",gl.setup.context,gl.setup.display,gl.setup.surface,w,h); - return true; + return rv; } - - void egl_stealcntx() - { - gl.setup.context=eglGetCurrentContext(); - gl.setup.display=eglGetCurrentDisplay(); - gl.setup.surface=eglGetCurrentSurface(EGL_DRAW); - } - - //swap buffers + #include void gl_swap() { - #ifdef TARGET_PANDORA0 - if (fbdev >= 0) - { - int arg = 0; - ioctl(fbdev,FBIO_WAITFORVSYNC,&arg); - } - #endif - eglSwapBuffers(gl.setup.display, gl.setup.surface); - } - - //destroy the gles context and free resources - void gl_term() - { - #if HOST_OS==OS_WINDOWS - ReleaseDC((HWND)gl.setup.native_wind,(HDC)gl.setup.native_disp); - #endif - #ifdef TARGET_PANDORA - eglMakeCurrent( gl.setup.display, NULL, NULL, EGL_NO_CONTEXT ); - if (gl.setup.context) - eglDestroyContext(gl.setup.display, gl.setup.context); - if (gl.setup.surface) - eglDestroySurface(gl.setup.display, gl.setup.surface); - if (gl.setup.display) - eglTerminate(gl.setup.display); - if (fbdev>=0) - close( fbdev ); - - fbdev=-1; - gl.setup.context=0; - gl.setup.surface=0; - gl.setup.display=0; - #endif + wglSwapLayerBuffers(ourWindowHandleToDeviceContext,WGL_SWAP_MAIN_PLANE); + //SwapBuffers(ourWindowHandleToDeviceContext); } #else + #if defined(SUPPORT_X11) + //! windows && X11 + //let's assume glx for now - #if HOST_OS == OS_WINDOWS - #define WGL_DRAW_TO_WINDOW_ARB 0x2001 - #define WGL_ACCELERATION_ARB 0x2003 - #define WGL_SWAP_METHOD_ARB 0x2007 - #define WGL_SUPPORT_OPENGL_ARB 0x2010 - #define WGL_DOUBLE_BUFFER_ARB 0x2011 - #define WGL_PIXEL_TYPE_ARB 0x2013 - #define WGL_COLOR_BITS_ARB 0x2014 - #define WGL_DEPTH_BITS_ARB 0x2022 - #define WGL_STENCIL_BITS_ARB 0x2023 - #define WGL_FULL_ACCELERATION_ARB 0x2027 - #define WGL_SWAP_EXCHANGE_ARB 0x2028 - #define WGL_TYPE_RGBA_ARB 0x202B - #define WGL_CONTEXT_MAJOR_VERSION_ARB 0x2091 - #define WGL_CONTEXT_MINOR_VERSION_ARB 0x2092 - #define WGL_CONTEXT_FLAGS_ARB 0x2094 - - #define WGL_CONTEXT_PROFILE_MASK_ARB 0x9126 - #define WGL_CONTEXT_MAJOR_VERSION_ARB 0x2091 - #define WGL_CONTEXT_MINOR_VERSION_ARB 0x2092 - #define WGL_CONTEXT_LAYER_PLANE_ARB 0x2093 - #define WGL_CONTEXT_FLAGS_ARB 0x2094 - #define WGL_CONTEXT_DEBUG_BIT_ARB 0x0001 - #define WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x0002 - #define ERROR_INVALID_VERSION_ARB 0x2095 - #define WGL_CONTEXT_CORE_PROFILE_BIT_ARB 0x00000001 - - typedef BOOL (WINAPI * PFNWGLCHOOSEPIXELFORMATARBPROC) (HDC hdc, const int *piAttribIList, const FLOAT *pfAttribFList, UINT nMaxFormats, - int *piFormats, UINT *nNumFormats); - typedef HGLRC (WINAPI * PFNWGLCREATECONTEXTATTRIBSARBPROC) (HDC hDC, HGLRC hShareContext, const int *attribList); - typedef BOOL (WINAPI * PFNWGLSWAPINTERVALEXTPROC) (int interval); - - PFNWGLCHOOSEPIXELFORMATARBPROC wglChoosePixelFormatARB; - PFNWGLCREATECONTEXTATTRIBSARBPROC wglCreateContextAttribsARB; - PFNWGLSWAPINTERVALEXTPROC wglSwapIntervalEXT; + #include + #include + #include + #include - HDC ourWindowHandleToDeviceContext; - bool gl_init(void* hwnd, void* hdc) + bool gl_init(void* wind, void* disp) { - PIXELFORMATDESCRIPTOR pfd = - { - sizeof(PIXELFORMATDESCRIPTOR), - 1, - PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER, //Flags - PFD_TYPE_RGBA, //The kind of framebuffer. RGBA or palette. - 32, //Colordepth of the framebuffer. - 0, 0, 0, 0, 0, 0, - 0, - 0, - 0, - 0, 0, 0, 0, - 24, //Number of bits for the depthbuffer - 8, //Number of bits for the stencilbuffer - 0, //Number of Aux buffers in the framebuffer. - PFD_MAIN_PLANE, - 0, - 0, 0, 0 - }; + extern void* x11_glc; - /*HDC*/ ourWindowHandleToDeviceContext = (HDC)hdc;//GetDC((HWND)hwnd); + glXMakeCurrent((Display*)libPvr_GetRenderSurface(), + (GLXDrawable)libPvr_GetRenderTarget(), + (GLXContext)x11_glc); - int letWindowsChooseThisPixelFormat; - letWindowsChooseThisPixelFormat = ChoosePixelFormat(ourWindowHandleToDeviceContext, &pfd); - SetPixelFormat(ourWindowHandleToDeviceContext,letWindowsChooseThisPixelFormat, &pfd); - - HGLRC ourOpenGLRenderingContext = wglCreateContext(ourWindowHandleToDeviceContext); - wglMakeCurrent (ourWindowHandleToDeviceContext, ourOpenGLRenderingContext); - - bool rv = true; - - if (rv) { - - wglChoosePixelFormatARB = (PFNWGLCHOOSEPIXELFORMATARBPROC)wglGetProcAddress("wglChoosePixelFormatARB"); - if(!wglChoosePixelFormatARB) - { - return false; - } - - wglCreateContextAttribsARB = (PFNWGLCREATECONTEXTATTRIBSARBPROC)wglGetProcAddress("wglCreateContextAttribsARB"); - if(!wglCreateContextAttribsARB) - { - return false; - } - - wglSwapIntervalEXT = (PFNWGLSWAPINTERVALEXTPROC)wglGetProcAddress("wglSwapIntervalEXT"); - if(!wglSwapIntervalEXT) - { - return false; - } - - int attribs[] = - { - WGL_CONTEXT_MAJOR_VERSION_ARB, 3, - WGL_CONTEXT_MINOR_VERSION_ARB, 1, - WGL_CONTEXT_FLAGS_ARB, WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB, - WGL_CONTEXT_PROFILE_MASK_ARB, WGL_CONTEXT_CORE_PROFILE_BIT_ARB, - 0 - }; - - HGLRC m_hrc = wglCreateContextAttribsARB(ourWindowHandleToDeviceContext,0, attribs); - - if (m_hrc) - wglMakeCurrent(ourWindowHandleToDeviceContext,m_hrc); - else - rv = false; - - wglDeleteContext(ourOpenGLRenderingContext); - } - - if (rv) { - rv = gl3wInit() != -1 && gl3wIsSupported(3, 1); - } - - RECT r; - GetClientRect((HWND)hwnd, &r); - screen_width = r.right - r.left; - screen_height = r.bottom - r.top; - - return rv; + screen_width = 640; + screen_height = 480; + return gl3wInit() != -1 && gl3wIsSupported(3, 1); } - #include + void gl_swap() { - wglSwapLayerBuffers(ourWindowHandleToDeviceContext,WGL_SWAP_MAIN_PLANE); - //SwapBuffers(ourWindowHandleToDeviceContext); - } - #else - #if defined(SUPPORT_X11) - //! windows && X11 - //let's assume glx for now + glXSwapBuffers((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget()); - #include - #include - #include - #include + Window win; + int temp; + unsigned int tempu, new_w, new_h; + XGetGeometry((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget(), + &win, &temp, &temp, &new_w, &new_h,&tempu,&tempu); - - bool gl_init(void* wind, void* disp) - { - extern void* x11_glc; - - glXMakeCurrent((Display*)libPvr_GetRenderSurface(), - (GLXDrawable)libPvr_GetRenderTarget(), - (GLXContext)x11_glc); - - screen_width = 640; - screen_height = 480; - return gl3wInit() != -1 && gl3wIsSupported(3, 1); + //if resized, clear up the draw buffers, to avoid out-of-draw-area junk data + if (new_w != screen_width || new_h != screen_height) { + screen_width = new_w; + screen_height = new_h; } - void gl_swap() - { + #if 0 + //handy to debug really stupid render-not-working issues ... + + glcache.ClearColor( 0, 0.5, 1, 1 ); + glClear( GL_COLOR_BUFFER_BIT ); glXSwapBuffers((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget()); - Window win; - int temp; - unsigned int tempu, new_w, new_h; - XGetGeometry((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget(), - &win, &temp, &temp, &new_w, &new_h,&tempu,&tempu); - //if resized, clear up the draw buffers, to avoid out-of-draw-area junk data - if (new_w != screen_width || new_h != screen_height) { - screen_width = new_w; - screen_height = new_h; - } - - #if 0 - //handy to debug really stupid render-not-working issues ... - - glcache.ClearColor( 0, 0.5, 1, 1 ); - glClear( GL_COLOR_BUFFER_BIT ); - glXSwapBuffers((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget()); - - - glcache.ClearColor ( 1, 0.5, 0, 1 ); - glClear ( GL_COLOR_BUFFER_BIT ); - glXSwapBuffers((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget()); - #endif - } - #endif + glcache.ClearColor ( 1, 0.5, 0, 1 ); + glClear ( GL_COLOR_BUFFER_BIT ); + glXSwapBuffers((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget()); + #endif + } #endif #endif @@ -873,9 +730,7 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) glBindAttribLocation(program, VERTEX_COL_OFFS1_ARRAY, "in_offs1"); glBindAttribLocation(program, VERTEX_UV1_ARRAY, "in_uv1"); -#ifndef GLES glBindFragDataLocation(program, 0, "FragColor"); -#endif glLinkProgram(program); @@ -1012,12 +867,10 @@ GLuint osd_font; bool gl_create_resources() { -#ifndef GLES //create vao //This is really not "proper", vaos are supposed to be defined once //i keep updating the same one to make the es2 code work in 3.1 context glGenVertexArrays(1, &gl.vbo.vao); -#endif //create vbos glGenBuffers(1, &gl.vbo.geometry); @@ -1143,14 +996,6 @@ bool gles_init() if (!gl_create_resources()) return false; -#if defined(GLES) && HOST_OS != OS_DARWIN && !defined(TARGET_NACL32) - #ifdef TARGET_PANDORA - fbdev=open("/dev/fb0", O_RDONLY); - #else - eglSwapInterval(gl.setup.display,1); - #endif -#endif - // glEnable(GL_DEBUG_OUTPUT); // glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); // glDebugMessageCallback(gl_DebugOutput, NULL); @@ -1162,9 +1007,6 @@ bool gles_init() glClear(GL_COLOR_BUFFER_BIT); gl_swap(); -#ifdef GLES - glHint(GL_GENERATE_MIPMAP_HINT, GL_FASTEST); -#endif initABuffer(); return true; diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 084d2d2ff..a5b6d556a 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -2,35 +2,11 @@ #include "rend/rend.h" #include -#ifdef GLES -#if defined(TARGET_IPHONE) //apple-specific ogles2 headers -//#include -#include -#include -#else -#if !defined(TARGET_NACL32) -#include -#endif -#include -#include -#endif - -#ifndef GL_NV_draw_path -//IMGTEC GLES emulation -#pragma comment(lib,"libEGL.lib") -#pragma comment(lib,"libGLESv2.lib") -#else /* NV gles emulation*/ -#pragma comment(lib,"libGLES20.lib") -#endif - -#else #if HOST_OS == OS_DARWIN #include #else #include #endif -#endif - #define glCheck() do { if (unlikely(settings.validate.OpenGlChecks)) { verify(glGetError()==GL_NO_ERROR); } } while(0) #define eglCheck() false @@ -80,17 +56,6 @@ struct PipelineShader struct gl_ctx { -#if defined(GLES) && HOST_OS != OS_DARWIN && !defined(TARGET_NACL32) - struct - { - EGLNativeWindowType native_wind; - EGLNativeDisplayType native_disp; - EGLDisplay display; - EGLSurface surface; - EGLContext context; - } setup; -#endif - struct { GLuint program; @@ -107,9 +72,7 @@ struct gl_ctx struct { GLuint geometry,modvols,idxs,idxs2; -#ifndef GLES GLuint vao; -#endif GLuint tr_poly_params; } vbo; From ad1963262fca74c6feeb7424015ff6c38431696e Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Thu, 5 Jul 2018 22:33:48 +0200 Subject: [PATCH 52/65] Fix translucent shadows (Xtreme Sports) and remove undeeded code --- core/rend/gles/abuffer.cpp | 56 ++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 27 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index da396dd3f..8f8ea9014 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -265,39 +265,35 @@ void main(void) \n\ setFragDepth(); \n\ #endif \n\ ivec2 coords = ivec2(gl_FragCoord.xy); \n\ - if (all(greaterThanEqual(coords, ivec2(0))) && all(lessThan(coords, imageSize(abufferPointerImg)))) \n\ + \n\ + uint idx = imageLoad(abufferPointerImg, coords).x; \n\ + int list_len = 0; \n\ + while (idx != EOL) \n\ { \n\ - \n\ - uint idx = imageLoad(abufferPointerImg, coords).x; \n\ - if (idx >= pixels.length()) // FIXME Shouldn't be necessary \n\ - discard; \n\ - int list_len = 0; \n\ - while (idx != EOL) { \n\ - const Pixel pixel = pixels[idx]; \n\ - const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ - if (getShadowEnable(pp)) \n\ - { \n\ + const Pixel pixel = pixels[idx]; \n\ + const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ + if (getShadowEnable(pp)) \n\ + { \n\ #if MV_MODE == MV_XOR \n\ - if (gl_FragDepth <= pixels[idx].depth) \n\ - atomicXor(pixels[idx].seq_num, 0x40000000); \n\ + if (gl_FragDepth <= pixels[idx].depth) \n\ + atomicXor(pixels[idx].seq_num, 0x40000000); \n\ #elif MV_MODE == MV_OR \n\ - if (gl_FragDepth <= pixels[idx].depth) \n\ - atomicOr(pixels[idx].seq_num, 0x40000000); \n\ + if (gl_FragDepth <= pixels[idx].depth) \n\ + atomicOr(pixels[idx].seq_num, 0x40000000); \n\ #elif MV_MODE == MV_INCLUSION \n\ - int prev_val = atomicAnd(pixels[idx].seq_num, 0xBFFFFFFF); \n\ - if ((prev_val & 0xC0000000) == 0x40000000) \n\ - pixels[idx].seq_num = bitfieldInsert(pixels[idx].seq_num, 1, 31, 1); \n\ + int prev_val = atomicAnd(pixels[idx].seq_num, 0xBFFFFFFF); \n\ + if ((prev_val & 0xC0000000) == 0x40000000) \n\ + pixels[idx].seq_num = bitfieldInsert(pixels[idx].seq_num, 1, 31, 1); \n\ #elif MV_MODE == MV_EXCLUSION \n\ - int prev_val = atomicAnd(pixels[idx].seq_num, 0x3FFFFFFF); \n\ - if ((prev_val & 0xC0000000) == 0x80000000) \n\ - pixels[idx].seq_num = bitfieldInsert(pixels[idx].seq_num, 1, 31, 1); \n\ + int prev_val = atomicAnd(pixels[idx].seq_num, 0x3FFFFFFF); \n\ + if ((prev_val & 0xC0000000) == 0x80000000) \n\ + pixels[idx].seq_num = bitfieldInsert(pixels[idx].seq_num, 1, 31, 1); \n\ #endif \n\ - } \n\ - idx = pixels[idx].next; \n\ - list_len++; \n\ - if (list_len >= MAX_PIXELS_PER_FRAGMENT) \n\ - break; \n\ } \n\ + idx = pixels[idx].next; \n\ + list_len++; \n\ + if (list_len >= MAX_PIXELS_PER_FRAGMENT) \n\ + break; \n\ } \n\ \n\ discard; \n\ @@ -473,6 +469,8 @@ void DrawTranslucentModVols(int first, int count) glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); + int mod_base = -1; + for (u32 cmv = 0; cmv < count; cmv++) { ModifierVolumeParam& param = params[cmv]; @@ -484,6 +482,9 @@ void DrawTranslucentModVols(int first, int count) verify(param.first >= 0 && param.first + param.count <= pvrrc.modtrig.used()); + if (mod_base == -1) + mod_base = param.first; + PipelineShader *shader; if (!param.isp.VolumeLast && mv_mode > 0) shader = &g_abuffer_tr_modvol_shaders[Or]; // OR'ing (open volume or quad) @@ -506,7 +507,8 @@ void DrawTranslucentModVols(int first, int count) ShaderUniforms.Set(shader); glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); - glDrawArrays(GL_TRIANGLES, param.first * 3, param.count * 3); glCheck(); + glDrawArrays(GL_TRIANGLES, mod_base * 3, (param.first + param.count - mod_base) * 3); glCheck(); + mod_base = -1; } } } From cfe163b2fdfdfc5f3615a6b59718371c2476b338 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 9 Jul 2018 15:08:16 +0200 Subject: [PATCH 53/65] gitignore --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 6ec10c49d..54dc59647 100644 --- a/.gitignore +++ b/.gitignore @@ -48,5 +48,6 @@ Workdir/lib*ant.properties shell/linux/.map shell/linux/nosym-reicast.elf shell/linux/reicast.elf +shell/linux/reicast_naomi.elf reicast-ios.xccheckout From cb738943255720376ff800fe138820089cca079c Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 11 Jul 2018 11:37:34 +0200 Subject: [PATCH 54/65] Fix dumpTexture --- core/rend/gles/gltex.cpp | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index 5fedfdb77..b9d1eafb8 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -117,7 +117,6 @@ static void dumpRtTexture(u32 name, u32 w, u32 h) { static void dumpTexture(int texID, int w, int h, GLuint textype) { - // Dump char sname[256]; sprintf(sname, "texdump/%d.png", texID); FILE *fp = fopen(sname, "wb"); @@ -127,10 +126,13 @@ static void dumpTexture(int texID, int w, int h, GLuint textype) u16 *src = (u16 *)temp_tex_buffer; png_bytepp rows = (png_bytepp)malloc(h * sizeof(png_bytep)); - for (int y = 0; y < h; y++) { + for (int y = 0; y < h; y++) + { rows[y] = (png_bytep)malloc(w * 4); // 32-bit per pixel u8 *dst = (u8 *)rows[y]; - if (textype == GL_UNSIGNED_SHORT_4_4_4_4) + switch (textype) + { + case GL_UNSIGNED_SHORT_4_4_4_4: for (int x = 0; x < w; x++) { *dst++ = ((*src >> 12) & 0xF) << 4; @@ -139,16 +141,18 @@ static void dumpTexture(int texID, int w, int h, GLuint textype) *dst++ = (*src & 0xF) << 4; src++; } - else if (textype == GL_UNSIGNED_SHORT_5_6_5) + break; + case GL_UNSIGNED_SHORT_5_6_5: for (int x = 0; x < w; x++) { *dst++ = ((*src >> 11) & 0x1F) << 3; - *dst++ = ((*src >> 5) & 0x3F) << 3; + *dst++ = ((*src >> 5) & 0x3F) << 2; *dst++ = (*src & 0x1F) << 3; *dst++ = 255; src++; } - if (textype == GL_UNSIGNED_SHORT_5_5_5_1) + break; + case GL_UNSIGNED_SHORT_5_5_5_1: for (int x = 0; x < w; x++) { *dst++ = ((*src >> 11) & 0x1F) << 3; @@ -157,6 +161,23 @@ static void dumpTexture(int texID, int w, int h, GLuint textype) *dst++ = (*src & 1) ? 255 : 0; src++; } + break; + case GL_UNSIGNED_INT_8_8_8_8: + for (int x = 0; x < w; x++) + { + *dst++ = ((u8 *)src)[3]; + *dst++ = ((u8 *)src)[2]; + *dst++ = ((u8 *)src)[1]; + *dst++ = ((u8 *)src)[0]; + src += 2; + } + break; + default: + printf("dumpTexture: unsupported picture format %x\n", textype); + free(rows[0]); + free(rows); + return; + } } png_structp png_ptr = png_create_write_struct(PNG_LIBPNG_VER_STRING, NULL, NULL, NULL); From 21eac7d6b0f985ea596bb37952c7c54ed6759ea6 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 11 Jul 2018 11:45:57 +0200 Subject: [PATCH 55/65] Final fragment shader performance improvement Use an index list of pixels instead of copying all data into a local array. Uses less memory, which makes it faster. Also, build the index and insert-sort at the same time. Could benefit from sorting polygons back-to-front before rendering. Use #defs for modifier volume bitwise ops. --- core/rend/gles/abuffer.cpp | 96 ++++++++++++++------------------------ core/rend/gles/gles.cpp | 6 +-- core/rend/gles/gles.h | 9 ++-- 3 files changed, 45 insertions(+), 66 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 8f8ea9014..0aae8d398 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -32,64 +32,42 @@ uniform highp float shade_scale_factor; \n\ \n\ out vec4 FragColor; \n\ \n\ -Pixel pixel_list[MAX_PIXELS_PER_FRAGMENT]; \n\ +uint pixel_list[MAX_PIXELS_PER_FRAGMENT]; \n\ \n\ -int fillFragmentArray(ivec2 coords) { \n\ + \n\ +int fillAndSortFragmentArray(ivec2 coords) \n\ +{ \n\ // Load fragments into a local memory array for sorting \n\ uint idx = imageLoad(abufferPointerImg, coords).x; \n\ - int i = 0; \n\ - for (; idx != EOL && i < MAX_PIXELS_PER_FRAGMENT; i++) { \n\ - pixel_list[i] = pixels[idx]; \n\ - idx = pixel_list[i].next; \n\ - } \n\ - return i; \n\ -} \n\ - \n\ -// Bubble sort used to sort fragments \n\ -void bubbleSort(int array_size) { \n\ - for (int i = array_size - 2; i >= 0; i--) { \n\ - for (int j = 0; j <= i; j++) { \n\ + int count = 0; \n\ + for (; idx != EOL && count < MAX_PIXELS_PER_FRAGMENT; count++) \n\ + { \n\ + const Pixel p = pixels[idx]; \n\ + int j = count - 1; \n\ + Pixel jp = pixels[pixel_list[j]]; \n\ #if DEPTH_SORTED == 1 \n\ - // depth then poly number \n\ - if (pixel_list[j].depth < pixel_list[j + 1].depth \n\ - || (pixel_list[j].depth == pixel_list[j + 1].depth && getPolyNumber(pixel_list[j]) > getPolyNumber(pixel_list[j + 1]))) { \n\ + while (j >= 0 \n\ + && (jp.depth < p.depth \n\ + || (jp.depth == p.depth && getPolyNumber(jp) > getPolyNumber(p)))) \n\ #else \n\ - // poly number only \n\ - if (getPolyNumber(pixel_list[j]) > getPolyNumber(pixel_list[j + 1])) { \n\ -#endif \n\ - const Pixel p = pixel_list[j + 1]; \n\ - pixel_list[j + 1] = pixel_list[j]; \n\ - pixel_list[j] = p; \n\ - } \n\ - } \n\ - } \n\ -} \n\ - \n\ - \n\ -// Insertion sort used to sort fragments \n\ -void insertionSort(int array_size) { \n\ - for (int i = 1; i < array_size; i++) { \n\ - const Pixel p = pixel_list[i]; \n\ - int j = i - 1; \n\ -#if DEPTH_SORTED == 1 \n\ - for (; j >= 0 && (pixel_list[j].depth < p.depth || (pixel_list[j].depth == p.depth && getPolyNumber(pixel_list[j]) > getPolyNumber(p))); j--) { \n\ -#else \n\ - for (; j >= 0 && getPolyNumber(pixel_list[j]) > getPolyNumber(p); j--) { \n\ + while (j >= 0 && getPolyNumber(jp) > getPolyNumber(p)) \n\ #endif \n\ + { \n\ pixel_list[j + 1] = pixel_list[j]; \n\ + j--; \n\ + jp = pixels[pixel_list[j]]; \n\ } \n\ - pixel_list[j + 1] = p; \n\ + pixel_list[j + 1] = idx; \n\ + idx = p.next; \n\ } \n\ + return count; \n\ } \n\ \n\ // Blend fragments back-to-front \n\ vec4 resolveAlphaBlend(ivec2 coords) { \n\ \n\ - // Copy fragments in local array \n\ - int num_frag = fillFragmentArray(coords); \n\ - \n\ - // Sort fragments in local memory array \n\ - bubbleSort(num_frag); \n\ + // Copy and sort fragments into a local array \n\ + int num_frag = fillAndSortFragmentArray(coords); \n\ \n\ vec4 finalColor = texture(tex, gl_FragCoord.xy / textureSize(tex, 0)); \n\ vec4 secondaryBuffer = vec4(0.0); // Secondary accumulation buffer \n\ @@ -97,7 +75,7 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ \n\ for (int i = 0; i < num_frag; i++) \n\ { \n\ - const Pixel pixel = pixel_list[i]; \n\ + const Pixel pixel = pixels[pixel_list[i]]; \n\ const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ #if DEPTH_SORTED != 1 \n\ const float frag_depth = pixel.depth; \n\ @@ -138,7 +116,7 @@ vec4 resolveAlphaBlend(ivec2 coords) { \n\ #endif \n\ bool area1 = false; \n\ bool shadowed = false; \n\ - if (getShadowEnable(pp) && isShadowed(pixel)) \n\ + if (isShadowed(pixel)) \n\ { \n\ if (isTwoVolumes(pp)) \n\ area1 = true; \n\ @@ -268,32 +246,30 @@ void main(void) \n\ \n\ uint idx = imageLoad(abufferPointerImg, coords).x; \n\ int list_len = 0; \n\ - while (idx != EOL) \n\ + while (idx != EOL && list_len < MAX_PIXELS_PER_FRAGMENT) \n\ { \n\ const Pixel pixel = pixels[idx]; \n\ const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ if (getShadowEnable(pp)) \n\ { \n\ #if MV_MODE == MV_XOR \n\ - if (gl_FragDepth <= pixels[idx].depth) \n\ - atomicXor(pixels[idx].seq_num, 0x40000000); \n\ + if (gl_FragDepth <= pixel.depth) \n\ + atomicXor(pixels[idx].seq_num, SHADOW_STENCIL); \n\ #elif MV_MODE == MV_OR \n\ - if (gl_FragDepth <= pixels[idx].depth) \n\ - atomicOr(pixels[idx].seq_num, 0x40000000); \n\ + if (gl_FragDepth <= pixel.depth) \n\ + atomicOr(pixels[idx].seq_num, SHADOW_STENCIL); \n\ #elif MV_MODE == MV_INCLUSION \n\ - int prev_val = atomicAnd(pixels[idx].seq_num, 0xBFFFFFFF); \n\ - if ((prev_val & 0xC0000000) == 0x40000000) \n\ - pixels[idx].seq_num = bitfieldInsert(pixels[idx].seq_num, 1, 31, 1); \n\ + int prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL)); \n\ + if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_STENCIL) \n\ + pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1, 31, 1); \n\ #elif MV_MODE == MV_EXCLUSION \n\ - int prev_val = atomicAnd(pixels[idx].seq_num, 0x3FFFFFFF); \n\ - if ((prev_val & 0xC0000000) == 0x80000000) \n\ - pixels[idx].seq_num = bitfieldInsert(pixels[idx].seq_num, 1, 31, 1); \n\ + int prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL|SHADOW_ACC)); \n\ + if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_ACC) \n\ + pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1, 31, 1); \n\ #endif \n\ } \n\ - idx = pixels[idx].next; \n\ + idx = pixel.next; \n\ list_len++; \n\ - if (list_len >= MAX_PIXELS_PER_FRAGMENT) \n\ - break; \n\ } \n\ \n\ discard; \n\ diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index f00b9fc0f..b0670ca28 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -408,11 +408,11 @@ void main() \n\ discard; \n\ break; \n\ case SRC_ALPHA: \n\ - if (color.rgb == vec3(0.0) || color.a == 0.0) \n\ + if (color.a == 0.0 || color.rgb == vec3(0.0)) \n\ discard; \n\ break; \n\ case INVERSE_SRC_ALPHA: \n\ - if (color.rgb == vec3(0.0) || color.a == 1.0) \n\ + if (color.a == 1.0 || color.rgb == vec3(0.0)) \n\ discard; \n\ break; \n\ } \n\ @@ -1413,7 +1413,7 @@ bool ProcessFrame(TA_context* ctx) CollectCleanup(); if (ctx->rend.Overrun) - printf("TA context overrun\n"); + printf("ERROR: TA context overrun\n"); return !ctx->rend.Overrun; } diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index a5b6d556a..83f9816df 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -126,8 +126,8 @@ extern GLuint depthSaveTexId; \n\ layout(r32ui, binding = 4) uniform coherent restrict uimage2D abufferPointerImg; \n\ struct Pixel { \n\ - mediump vec4 color; \n\ - mediump float depth; \n\ + highp vec4 color; \n\ + highp float depth; \n\ int seq_num; \n\ uint next; \n\ }; \n\ @@ -239,9 +239,12 @@ int getPolyNumber(const Pixel pixel) \n\ return pixel.seq_num & 0x3FFFFFFF; \n\ } \n\ \n\ +#define SHADOW_STENCIL 0x40000000 \n\ +#define SHADOW_ACC 0x80000000 \n\ +\n\ bool isShadowed(const Pixel pixel) \n\ { \n\ - return (pixel.seq_num & 0x80000000) == 0x80000000; \n\ + return (pixel.seq_num & SHADOW_ACC) == SHADOW_ACC; \n\ } \n\ \n\ bool isTwoVolumes(const PolyParam pp) \n\ From 03a74ccdb7a7e4b6ade65ee7e47e049cbeb27eef Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 11 Jul 2018 12:03:07 +0200 Subject: [PATCH 56/65] Tentative fix for Mesa 18.2 driver error "opaque variables cannot be operands of the ?: operator" --- core/rend/gles/gles.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 83f9816df..48d874f00 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -179,44 +179,54 @@ layout (binding = 1, std430) readonly buffer TrPolyParamBuffer { \n\ PolyParam tr_poly_params[]; \n\ }; \n\ \n\ +#define GET_TSP_FOR_AREA int tsp; if (area1) tsp = pp.tsp1; else tsp = pp.tsp; \n\ + \n\ int getSrcBlendFunc(const PolyParam pp, bool area1) \n\ { \n\ - return ((area1 ? pp.tsp1 : pp.tsp) >> 29) & 7; \n\ + GET_TSP_FOR_AREA \n\ + return (tsp >> 29) & 7; \n\ } \n\ \n\ int getDstBlendFunc(const PolyParam pp, bool area1) \n\ { \n\ - return ((area1 ? pp.tsp1 : pp.tsp) >> 26) & 7; \n\ + GET_TSP_FOR_AREA \n\ + return (tsp >> 26) & 7; \n\ } \n\ \n\ bool getSrcSelect(const PolyParam pp, bool area1) \n\ { \n\ - return (((area1 ? pp.tsp1 : pp.tsp) >> 25) & 1) != 0; \n\ + GET_TSP_FOR_AREA \n\ + return ((tsp >> 25) & 1) != 0; \n\ } \n\ \n\ bool getDstSelect(const PolyParam pp, bool area1) \n\ { \n\ - return (((area1 ? pp.tsp1 : pp.tsp) >> 24) & 1) != 0; \n\ + GET_TSP_FOR_AREA \n\ + return ((tsp >> 24) & 1) != 0; \n\ } \n\ \n\ int getFogControl(const PolyParam pp, bool area1) \n\ { \n\ - return ((area1 ? pp.tsp1 : pp.tsp) >> 22) & 3; \n\ + GET_TSP_FOR_AREA \n\ + return (tsp >> 22) & 3; \n\ } \n\ \n\ bool getUseAlpha(const PolyParam pp, bool area1) \n\ { \n\ - return (((area1 ? pp.tsp1 : pp.tsp) >> 20) & 1) != 0; \n\ + GET_TSP_FOR_AREA \n\ + return ((tsp >> 20) & 1) != 0; \n\ } \n\ \n\ bool getIgnoreTexAlpha(const PolyParam pp, bool area1) \n\ { \n\ - return (((area1 ? pp.tsp1 : pp.tsp) >> 19) & 1) != 0; \n\ + GET_TSP_FOR_AREA \n\ + return ((tsp >> 19) & 1) != 0; \n\ } \n\ \n\ int getShadingInstruction(const PolyParam pp, bool area1) \n\ { \n\ - return ((area1 ? pp.tsp1 : pp.tsp) >> 6) & 3; \n\ + GET_TSP_FOR_AREA \n\ + return (tsp >> 6) & 3; \n\ } \n\ \n\ int getDepthFunc(const PolyParam pp) \n\ From b7f0b8a94421df290d6032f54d30decbf23f86fc Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 11 Jul 2018 17:53:37 +0200 Subject: [PATCH 57/65] Another potential Mesa driver fix. Got rid of deprecated OpenGL #defs --- core/rend/gles/gles.cpp | 120 +++++++++++----------------------------- 1 file changed, 33 insertions(+), 87 deletions(-) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index b0670ca28..5f3a76d11 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -59,11 +59,8 @@ Tile clip float fb_scale_x,fb_scale_y; float scale_x, scale_y; -#define attr "in" -#define vary "out" - //Fragment and vertex shaders code -//pretty much 1:1 copy of the d3d ones for now + const char* VertexShaderSource = "\ #version 140 \n\ @@ -79,20 +76,20 @@ const char* VertexShaderSource = uniform highp vec4 scale; \n\ uniform highp vec4 depth_scale; \n\ /* Vertex input */ \n\ -" attr " highp vec4 in_pos; \n\ -" attr " lowp vec4 in_base; \n\ -" attr " lowp vec4 in_offs; \n\ -" attr " mediump vec2 in_uv; \n\ -" attr " lowp vec4 in_base1; \n\ -" attr " lowp vec4 in_offs1; \n\ -" attr " mediump vec2 in_uv1; \n\ +in highp vec4 in_pos; \n\ +in lowp vec4 in_base; \n\ +in lowp vec4 in_offs; \n\ +in mediump vec2 in_uv; \n\ +in lowp vec4 in_base1; \n\ +in lowp vec4 in_offs1; \n\ +in mediump vec2 in_uv1; \n\ /* output */ \n\ -INTERPOLATION " vary " lowp vec4 vtx_base; \n\ -INTERPOLATION " vary " lowp vec4 vtx_offs; \n\ - " vary " mediump vec2 vtx_uv; \n\ -INTERPOLATION " vary " lowp vec4 vtx_base1; \n\ -INTERPOLATION " vary " lowp vec4 vtx_offs1; \n\ - " vary " mediump vec2 vtx_uv1; \n\ +INTERPOLATION out lowp vec4 vtx_base; \n\ +INTERPOLATION out lowp vec4 vtx_offs; \n\ + out mediump vec2 vtx_uv; \n\ +INTERPOLATION out lowp vec4 vtx_base1; \n\ +INTERPOLATION out lowp vec4 vtx_offs1; \n\ + out mediump vec2 vtx_uv1; \n\ void main() \n\ { \n\ vtx_base=in_base; \n\ @@ -113,60 +110,6 @@ void main() \n\ gl_Position = vpos; \n\ }"; -/* - -cp_AlphaTest 0 1 2 2 -pp_ClipTestMode -1 0 1 3 6 -pp_UseAlpha 0 1 2 12 -pp_Texture 1 - pp_IgnoreTexA 0 1 2 2 - pp_ShadInstr 0 1 2 3 4 8 - pp_Offset 0 1 2 16 - pp_FogCtrl 0 1 2 3 4 64 -pp_Texture 0 - pp_FogCtrl 0 2 3 4 4 - -pp_Texture: off -> 12*4=48 shaders -pp_Texture: on -> 12*64=768 shaders -Total: 816 shaders - -highp float fdecp(highp float flt,out highp float e) \n\ -{ \n\ - highp float lg2=log2(flt); //ie , 2.5 \n\ - highp float frc=fract(lg2); //ie , 0.5 \n\ - e=lg2-frc; //ie , 2.5-0.5=2 (exp) \n\ - return pow(2.0,frc); //2^0.5 (manitsa) \n\ -} \n\ -lowp float fog_mode2(highp float invW) \n\ -{ \n\ - highp float foginvW=invW; \n\ - foginvW=clamp(foginvW,1.0,255.0); \n\ - \n\ - highp float fogexp; //0 ... 7 \n\ - highp float fogman=fdecp(foginvW, fogexp); //[1,2) mantissa bits. that is 1.m \n\ - \n\ - highp float fogman_hi=fogman*16.0-16.0; //[16,32) -16 -> [0,16) \n\ - highp float fogman_idx=floor(fogman_hi); //[0,15] \n\ - highp float fogman_blend=fract(fogman_hi); //[0,1) -- can also be fogman_idx-fogman_idx ! \n\ - highp float fog_idx_fr=fogexp*16.0+fogman_idx; //[0,127] \n\ - \n\ - highp float fog_idx_pixel_fr=fog_idx_fr+0.5; \n\ - highp float fog_idx_pixel_n=fog_idx_pixel_fr/128.0;//normalise to [0.5/128,127.5/128) coordinates \n\ - \n\ - //fog is 128x1 texure \n\ - lowp vec2 fog_coefs=texture2D(fog_table,vec2(fog_idx_pixel_n)).rg; \n\ - \n\ - lowp float fog_coef=mix(fog_coefs.r,fog_coefs.g,fogman_blend); \n\ - \n\ - return fog_coef; \n\ -} \n\ -*/ - -#define FRAGCOL "FragColor" -#define TEXLOOKUP "texture" -#define vary "in" - - const char* PixelPipelineShader = SHADER_HEADER "\ #define cp_AlphaTest %d \n\ @@ -222,19 +165,19 @@ uniform int fog_control[2]; \n\ #endif \n\ \n\ /* Vertex input*/ \n\ -INTERPOLATION " vary " lowp vec4 vtx_base; \n\ -INTERPOLATION " vary " lowp vec4 vtx_offs; \n\ - " vary " mediump vec2 vtx_uv; \n\ -INTERPOLATION " vary " lowp vec4 vtx_base1; \n\ -INTERPOLATION " vary " lowp vec4 vtx_offs1; \n\ - " vary " mediump vec2 vtx_uv1; \n\ +INTERPOLATION in lowp vec4 vtx_base; \n\ +INTERPOLATION in lowp vec4 vtx_offs; \n\ + in mediump vec2 vtx_uv; \n\ +INTERPOLATION in lowp vec4 vtx_base1; \n\ +INTERPOLATION in lowp vec4 vtx_offs1; \n\ + in mediump vec2 vtx_uv1; \n\ lowp float fog_mode2(highp float w) \n\ { \n\ highp float z = clamp(w * sp_FOG_DENSITY, 1.0, 255.9999); \n\ float exp = floor(log2(z)); \n\ highp float m = z * 16.0 / pow(2.0, exp) - 16.0; \n\ float idx = floor(m) + exp * 16.0 + 0.5; \n\ - vec4 fog_coef = " TEXLOOKUP "(fog_table, vec2(idx / 128.0, 0.75 - (m - floor(m)) / 2.0)); \n\ + vec4 fog_coef = texture(fog_table, vec2(idx / 128.0, 0.75 - (m - floor(m)) / 2.0)); \n\ return fog_coef.a; \n\ } \n\ void main() \n\ @@ -317,8 +260,11 @@ void main() \n\ #endif\n\ #if pp_Texture==1 \n\ { \n\ - lowp vec4 texcol=" TEXLOOKUP "(area1 ? tex1 : tex0, uv); \n\ - \n\ + highp vec4 texcol; \n\ + if (area1) \n\ + texcol = texture(tex1, uv); \n\ + else \n\ + texcol = texture(tex0, uv); \n\ #if pp_BumpMap == 1 \n\ float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0; \n\ float r = 2.0 * PI * (texcol.g * 15.0 * 16.0 + texcol.b * 15.0) / 255.0; \n\ @@ -390,8 +336,8 @@ void main() \n\ \n\ //color.rgb=vec3(gl_FragCoord.w * sp_FOG_DENSITY / 128.0); \n\ \n\ - #if PASS == 1 \n" - FRAGCOL " = color; \n\ + #if PASS == 1 \n\ + FragColor = color; \n\ #elif PASS > 1 \n\ // Discard as many pixels as possible \n\ switch (cur_blend_mode.y) // DST \n\ @@ -476,15 +422,15 @@ const char* OSD_Shader = #version 140 \n\ out vec4 FragColor; \n\ \n\ -" vary " lowp vec4 vtx_base; \n\ -" vary " mediump vec2 vtx_uv; \n\ +in lowp vec4 vtx_base; \n\ +in mediump vec2 vtx_uv; \n\ /* Vertex input*/ \n\ uniform sampler2D tex; \n\ void main() \n\ { \n\ - mediump vec2 uv=vtx_uv; \n\ - uv.y=1.0-uv.y; \n\ - " FRAGCOL "=vtx_base*" TEXLOOKUP "(tex,uv.st); \n\n\ + mediump vec2 uv = vtx_uv; \n\ + uv.y = 1.0 - uv.y; \n\ + FragColor = vtx_base * texture(tex, uv.st); \n\n\ }"; GLCache glcache; From 5f6597fb29813645cf5b9b924d1029fea15c20fd Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 11 Jul 2018 18:10:22 +0200 Subject: [PATCH 58/65] depth_scale is no longer used. More clean-up --- core/rend/gles/gles.cpp | 154 ---------------------------------------- core/rend/gles/gles.h | 10 +-- 2 files changed, 3 insertions(+), 161 deletions(-) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 5f3a76d11..fe95022f2 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -74,7 +74,6 @@ const char* VertexShaderSource = \n\ /* Vertex constants*/ \n\ uniform highp vec4 scale; \n\ -uniform highp vec4 depth_scale; \n\ /* Vertex input */ \n\ in highp vec4 in_pos; \n\ in lowp vec4 in_base; \n\ @@ -760,7 +759,6 @@ bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipe //get the uniform locations s->scale = glGetUniformLocation(s->program, "scale"); - s->depth_scale = glGetUniformLocation(s->program, "depth_scale"); s->pp_ClipTest = glGetUniformLocation(s->program, "pp_ClipTest"); @@ -829,12 +827,10 @@ bool gl_create_resources() gl.modvol_shader.program=gl_CompileAndLink(vshader, ModifierVolumeShader); gl.modvol_shader.scale = glGetUniformLocation(gl.modvol_shader.program, "scale"); - gl.modvol_shader.depth_scale = glGetUniformLocation(gl.modvol_shader.program, "depth_scale"); gl.OSD_SHADER.program=gl_CompileAndLink(vshader, OSD_Shader); gl.OSD_SHADER.scale=glGetUniformLocation(gl.OSD_SHADER.program, "scale"); - gl.OSD_SHADER.depth_scale=glGetUniformLocation(gl.OSD_SHADER.program, "depth_scale"); glUniform1i(glGetUniformLocation(gl.OSD_SHADER.program, "tex"),0); //bind osd texture to slot 0 //#define PRECOMPILE_SHADERS @@ -871,8 +867,6 @@ bool gl_init(void* wind, void* disp); //swap buffers void gl_swap(); -//destroy the gles context and free resources -void gl_term(); GLuint gl_CompileShader(const char* shader,GLuint type); @@ -1081,12 +1075,6 @@ static void DrawButton(float* xy, u32 state) osd_count+=4; } -static void ClearBG() -{ - -} - - void DrawButton2(float* xy, bool state) { DrawButton(xy,state?0:255); } static void DrawCenteredText(float yy, float scale, int transparency, const char* text) @@ -1378,90 +1366,6 @@ bool RenderFrame() //if (FrameCount&7) return; - //Setup the matrix - - //TODO: Make this dynamic - float vtx_min_fZ=0.f; //pvrrc.fZ_min; - float vtx_max_fZ=pvrrc.fZ_max; -//printf("Zmin %g Zmax %g\n", pvrrc.fZ_min, pvrrc.fZ_max); - //sanitise the values, now with NaN detection (for omap) - //0x49800000 is 1024*1024. Using integer math to avoid issues w/ infs and nans - if ((s32&)vtx_max_fZ<0 || (u32&)vtx_max_fZ>0x49800000) - vtx_max_fZ=10*1024; - - - //add some extra range to avoid clipping border cases - vtx_min_fZ*=0.98f; - vtx_max_fZ*=1.001f; - - //calculate a projection so that it matches the pvr x,y setup, and - //a) Z is linearly scaled between 0 ... 1 - //b) W is passed though for proper perspective calculations - - /* - PowerVR coords: - fx, fy (pixel coordinates) - fz=1/w - - (as a note, fx=x*fz;fy=y*fz) - - Clip space - -Wc .. Wc, xyz - x: left-right, y: bottom-top - NDC space - -1 .. 1, xyz - Window space: - translated NDC (viewport, glDepth) - - Attributes: - //this needs to be cleared up, been some time since I wrote my rasteriser and i'm starting - //to forget/mixup stuff - vaX -> VS output - iaX=vaX*W -> value to be interpolated - iaX',W' -> interpolated values - paX=iaX'/W' -> Per pixel interpolated value for attribute - - - Proper mappings: - Output from shader: - W=1/fz - x=fx*W -> maps to fx after perspective divide - y=fy*W -> fy -//- - z=-W for min, W for max. Needs to be linear. - - - - umodified W, perfect mapping: - Z mapping: - pz=z/W - pz=z/(1/fz) - pz=z*fz - z=zt_s+zt_o - pz=(zt_s+zt_o)*fz - pz=zt_s*fz+zt_o*fz - zt_s=scale - zt_s=2/(max_fz-min_fz) - zt_o*fz=-min_fz-1 - zt_o=(-min_fz-1)/fz == (-min_fz-1)*W - - - x=fx/(fx_range/2)-1 //0 to max -> -1 to 1 - y=fy/(-fy_range/2)+1 //0 to max -> 1 to -1 - z=-min_fz*W + (zt_s-1) //0 to +inf -> -1 to 1 - - o=a*z+c - 1=a*z_max+c - -1=a*z_min+c - - c=-a*z_min-1 - 1=a*z_max-a*z_min-1 - 2=a*(z_max-z_min) - a=2/(z_max-z_min) - */ - - //float B=2/(min_invW-max_invW); - //float A=-B*max_invW+vnear; - //these should be adjusted based on the current PVR scaling etc params float dc_width=640; float dc_height=480; @@ -1512,30 +1416,6 @@ bool RenderFrame() dc_width *= scale_x; dc_height *= scale_y; - /* - - float vnear=0; - float vfar =1; - - float max_invW=1/vtx_min_fZ; - float min_invW=1/vtx_max_fZ; - - float B=vfar/(min_invW-max_invW); - float A=-B*max_invW+vnear; - - - GLfloat dmatrix[16] = - { - (2.f/dc_width) ,0 ,-(640/dc_width) ,0 , - 0 ,-(2.f/dc_height) ,(480/dc_height) ,0 , - 0 ,0 ,A ,B , - 0 ,0 ,1 ,0 - }; - - glUniformMatrix4fv(gl.matrix, 1, GL_FALSE, dmatrix); - - */ - /* Handle Dc to screen scaling */ @@ -1548,12 +1428,6 @@ bool RenderFrame() ShaderUniforms.scale_coefs[2]=1-2*ds2s_offs_x/(screen_width); ShaderUniforms.scale_coefs[3]=(is_rtt?1:-1); - - ShaderUniforms.depth_coefs[0]=2/(vtx_max_fZ-vtx_min_fZ); - ShaderUniforms.depth_coefs[1]=-vtx_min_fZ-1; - ShaderUniforms.depth_coefs[2]=0; - ShaderUniforms.depth_coefs[3]=0; - //printf("scale: %f, %f, %f, %f\n",ShaderUniforms.scale_coefs[0],ShaderUniforms.scale_coefs[1],ShaderUniforms.scale_coefs[2],ShaderUniforms.scale_coefs[3]); if (!is_rtt) @@ -1586,14 +1460,12 @@ bool RenderFrame() glcache.UseProgram(gl.modvol_shader.program); glUniform4fv( gl.modvol_shader.scale, 1, ShaderUniforms.scale_coefs); - glUniform4fv( gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs); GLfloat td[4]={0.5,0,0,0}; glcache.UseProgram(gl.OSD_SHADER.program); glUniform4fv( gl.OSD_SHADER.scale, 1, ShaderUniforms.scale_coefs); - glUniform4fv( gl.OSD_SHADER.depth_scale, 1, td); ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; @@ -1747,32 +1619,6 @@ bool RenderFrame() extern u16 kcode[4]; -/* -bool rend_single_frame() -{ - //wait render start only if no frame pending - _pvrrc = DequeueRender(); - - while (!_pvrrc) - { - rs.Wait(); - _pvrrc = DequeueRender(); - } - - bool do_swp=false; - //if (kcode[0]&(1<<9)) - { - - - //clear up & free data .. - tactx_Recycle(_pvrrc); - _pvrrc=0; - - return do_swp; -} -*/ - - void rend_set_fb_scale(float x,float y) { fb_scale_x=x; diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 48d874f00..5cecf448d 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -31,7 +31,7 @@ struct PipelineShader { GLuint program; - GLuint scale,depth_scale; + GLuint scale; GLuint pp_ClipTest,cp_AlphaTestValue; GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY; GLuint shade_scale_factor; @@ -60,13 +60,13 @@ struct gl_ctx { GLuint program; - GLuint scale,depth_scale; + GLuint scale; } modvol_shader; std::map shaders; struct { - GLuint program,scale,depth_scale; + GLuint program,scale; } OSD_SHADER; struct @@ -271,7 +271,6 @@ extern struct ShaderUniforms_t { float PT_ALPHA; float scale_coefs[4]; - float depth_coefs[4]; float fog_den_float; float ps_FOG_COL_RAM[3]; float ps_FOG_COL_VERT[3]; @@ -296,9 +295,6 @@ extern struct ShaderUniforms_t if (s->scale!=-1) glUniform4fv( s->scale, 1, scale_coefs); - if (s->depth_scale!=-1) - glUniform4fv( s->depth_scale, 1, depth_coefs); - if (s->sp_FOG_DENSITY!=-1) glUniform1f( s->sp_FOG_DENSITY,fog_den_float); From 1c5bfe7869de0e1892b2367bff7a0efa3d60d0be Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Mon, 16 Jul 2018 14:26:56 +0200 Subject: [PATCH 59/65] Create OpenGL 4.3 core profile (was 3.1) --- core/linux-dist/x11.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/core/linux-dist/x11.cpp b/core/linux-dist/x11.cpp index a2e20848e..24fbf2d60 100644 --- a/core/linux-dist/x11.cpp +++ b/core/linux-dist/x11.cpp @@ -300,9 +300,11 @@ void x11_window_create() verify(glXCreateContextAttribsARB != 0); int context_attribs[] = { - GLX_CONTEXT_MAJOR_VERSION_ARB, 3, - GLX_CONTEXT_MINOR_VERSION_ARB, 1, + GLX_CONTEXT_MAJOR_VERSION_ARB, 4, + GLX_CONTEXT_MINOR_VERSION_ARB, 3, +#ifndef RELEASE GLX_CONTEXT_FLAGS_ARB, GLX_CONTEXT_DEBUG_BIT_ARB, +#endif GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_CORE_PROFILE_BIT_ARB, None }; @@ -312,7 +314,7 @@ void x11_window_create() if (!x11_glc) { - die("Failed to create GL3.1 context\n"); + die("Failed to create OpenGL 4.3 context\n"); } #endif From 4bd23f2c8e2ac9e15a78cd6a21c31328c6492829 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 24 Jul 2018 10:09:22 +0200 Subject: [PATCH 60/65] tentative fix for constant overflow GLSL error on Intel HD --- core/rend/gles/abuffer.cpp | 8 ++++---- core/rend/gles/gles.cpp | 2 +- core/rend/gles/gles.h | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp index 0aae8d398..f86209391 100644 --- a/core/rend/gles/abuffer.cpp +++ b/core/rend/gles/abuffer.cpp @@ -259,13 +259,13 @@ void main(void) \n\ if (gl_FragDepth <= pixel.depth) \n\ atomicOr(pixels[idx].seq_num, SHADOW_STENCIL); \n\ #elif MV_MODE == MV_INCLUSION \n\ - int prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL)); \n\ + uint prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL)); \n\ if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_STENCIL) \n\ - pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1, 31, 1); \n\ + pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1u, 31, 1); \n\ #elif MV_MODE == MV_EXCLUSION \n\ - int prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL|SHADOW_ACC)); \n\ + uint prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL|SHADOW_ACC)); \n\ if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_ACC) \n\ - pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1, 31, 1); \n\ + pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1u, 31, 1); \n\ #endif \n\ } \n\ idx = pixel.next; \n\ diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 68f107a0a..88e41a128 100755 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -398,7 +398,7 @@ void main() \n\ Pixel pixel; \n\ pixel.color = color; \n\ pixel.depth = gl_FragDepth; \n\ - pixel.seq_num = pp_Number; \n\ + pixel.seq_num = uint(pp_Number); \n\ pixel.next = imageAtomicExchange(abufferPointerImg, coords, idx); \n\ pixels[idx] = pixel; \n\ \n\ diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 5cecf448d..b58f8b493 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -128,7 +128,7 @@ layout(r32ui, binding = 4) uniform coherent restrict uimage2D abufferPointerImg; struct Pixel { \n\ highp vec4 color; \n\ highp float depth; \n\ - int seq_num; \n\ + uint seq_num; \n\ uint next; \n\ }; \n\ #define EOL 0xFFFFFFFFu \n\ @@ -244,13 +244,13 @@ bool getShadowEnable(const PolyParam pp) \n\ return ((pp.pcw >> 7) & 1) != 0; \n\ } \n\ \n\ -int getPolyNumber(const Pixel pixel) \n\ +uint getPolyNumber(const Pixel pixel) \n\ { \n\ - return pixel.seq_num & 0x3FFFFFFF; \n\ + return pixel.seq_num & 0x3FFFFFFFu; \n\ } \n\ \n\ -#define SHADOW_STENCIL 0x40000000 \n\ -#define SHADOW_ACC 0x80000000 \n\ +#define SHADOW_STENCIL 0x40000000u \n\ +#define SHADOW_ACC 0x80000000u \n\ \n\ bool isShadowed(const Pixel pixel) \n\ { \n\ @@ -259,7 +259,7 @@ bool isShadowed(const Pixel pixel) \n\ \n\ bool isTwoVolumes(const PolyParam pp) \n\ { \n\ - return pp.tsp1 != 0xFFFFFFFF || pp.tcw1 != 0xFFFFFFFF; \n\ + return pp.tsp1 != -1 || pp.tcw1 != -1; \n\ } \n\ \n\ " From 2157fd7cdaf11cc27481d3a6ccf8d4357a98c9aa Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Wed, 1 Aug 2018 21:20:37 +0200 Subject: [PATCH 61/65] Add abuffer.cpp to windows build --- shell/reicast.vcxproj | 1 + 1 file changed, 1 insertion(+) diff --git a/shell/reicast.vcxproj b/shell/reicast.vcxproj index 77352b637..e37e54161 100644 --- a/shell/reicast.vcxproj +++ b/shell/reicast.vcxproj @@ -192,6 +192,7 @@ + From 2ed88970edcf2dbf8d7a06af5e170cf111b7281b Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 26 Aug 2018 18:09:57 +0200 Subject: [PATCH 62/65] Fix compile error with last merge --- core/rend/gles/gldraw.cpp | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 02fe98511..28351aac2 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -876,14 +876,39 @@ void DrawFramebuffer(float w, float h) ShaderUniforms.trilinear_alpha = 1.0; - PipelineShader *shader = &gl.pogram_table[GetProgramID(0, 1, 1, 0, 1, 0, 0, 2, false, false)]; + int shaderId = GetProgramID(0, + 1, + 1, + 0, + 1, + 0, + 0, + 2, + false, + 0, + false, + false, + 1); + PipelineShader *shader = gl.getShader(shaderId); if (shader->program == -1) - CompilePipelineShader(shader); - else { - glcache.UseProgram(shader->program); - ShaderUniforms.Set(shader); + CurrentShader->cp_AlphaTest = 0; + CurrentShader->pp_ClipTestMode = 1; + CurrentShader->pp_Texture = 1; + CurrentShader->pp_UseAlpha = 0; + CurrentShader->pp_IgnoreTexA = 1; + CurrentShader->pp_ShadInstr = 0; + CurrentShader->pp_Offset = 0; + CurrentShader->pp_FogCtrl = 2; + CurrentShader->pp_TwoVolumes = false; + CurrentShader->pp_DepthFunc = 0; + CurrentShader->pp_Gouraud = false; + CurrentShader->pp_BumpMap = false; + CurrentShader->pass = 1; + CompilePipelineShader(shader); } + glcache.UseProgram(shader->program); + ShaderUniforms.Set(shader); glActiveTexture(GL_TEXTURE0); glcache.BindTexture(GL_TEXTURE_2D, fbTextureId); From 2a8f3d342700b0dcb6a1b8fe63232b96e1b78f06 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 2 Sep 2018 20:31:28 +0200 Subject: [PATCH 63/65] Direct framebuffer write fix --- core/rend/gles/gldraw.cpp | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index 5198af86f..18e2d8d2e 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -897,20 +897,20 @@ void DrawFramebuffer(float w, float h) PipelineShader *shader = gl.getShader(shaderId); if (shader->program == -1) { - CurrentShader->cp_AlphaTest = 0; - CurrentShader->pp_ClipTestMode = 1; - CurrentShader->pp_Texture = 1; - CurrentShader->pp_UseAlpha = 0; - CurrentShader->pp_IgnoreTexA = 1; - CurrentShader->pp_ShadInstr = 0; - CurrentShader->pp_Offset = 0; - CurrentShader->pp_FogCtrl = 2; - CurrentShader->pp_TwoVolumes = false; - CurrentShader->pp_DepthFunc = 0; - CurrentShader->pp_Gouraud = false; - CurrentShader->pp_BumpMap = false; - CurrentShader->fog_clamping = false; - CurrentShader->pass = 1; + shader->cp_AlphaTest = 0; + shader->pp_ClipTestMode = 0; + shader->pp_Texture = 1; + shader->pp_UseAlpha = 0; + shader->pp_IgnoreTexA = 1; + shader->pp_ShadInstr = 0; + shader->pp_Offset = 0; + shader->pp_FogCtrl = 2; + shader->pp_TwoVolumes = false; + shader->pp_DepthFunc = 0; + shader->pp_Gouraud = false; + shader->pp_BumpMap = false; + shader->fog_clamping = false; + shader->pass = 1; CompilePipelineShader(shader); } glcache.UseProgram(shader->program); @@ -919,9 +919,7 @@ void DrawFramebuffer(float w, float h) glActiveTexture(GL_TEXTURE0); glcache.BindTexture(GL_TEXTURE_2D, fbTextureId); -#ifndef GLES glBindVertexArray(gl.vbo.vao); -#endif // FIXME This make glDrawElements fails on OSX //glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); From 0a8eadb0b4bea2723f1a5a06dd96a3d52d289c03 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Sun, 2 Sep 2018 22:50:19 +0200 Subject: [PATCH 64/65] Fog color clamping must be done after shadowing and only when fog is on --- core/rend/gles/gles.cpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 6305a866c..eccc7b4c6 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -325,14 +325,7 @@ void main() \n\ #if pp_Offset==1 && pp_BumpMap == 0 \n\ { \n\ color.rgb += offset.rgb; \n\ - color = fog_clamp(color); \n\ - #if pp_FogCtrl == 1 || pp_TwoVolumes == 1 // Per vertex \n\ - IF(cur_fog_control == 1) \n\ - color.rgb=mix(color.rgb, sp_FOG_COL_VERT.rgb, offset.a); \n\ - #endif\n\ } \n\ - #else \n\ - color = fog_clamp(color); \n\ #endif\n\ } \n\ #endif\n\ @@ -342,10 +335,18 @@ void main() \n\ color.rgb *= shade_scale_factor; \n\ #endif\n\ #if pp_FogCtrl==0 || pp_TwoVolumes == 1 // LUT \n\ - IF(cur_fog_control == 0) \n\ - { \n\ - color.rgb=mix(color.rgb,sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ - } \n\ + IF(cur_fog_control == 0) \n\ + { \n\ + color = fog_clamp(color); \n\ + color.rgb=mix(color.rgb,sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ + } \n\ + #endif\n\ + #if pp_Offset==1 && pp_BumpMap == 0 && (pp_FogCtrl == 1 || pp_TwoVolumes == 1) // Per vertex \n\ + IF(cur_fog_control == 1) \n\ + { \n\ + color = fog_clamp(color); \n\ + color.rgb=mix(color.rgb, sp_FOG_COL_VERT.rgb, offset.a); \n\ + } \n\ #endif\n\ color *= trilinear_alpha; \n\ \n\ From 2798770879e47691f3118bbda6c5aa9f58b66af2 Mon Sep 17 00:00:00 2001 From: Flyinghead Date: Tue, 2 Oct 2018 09:31:00 -0400 Subject: [PATCH 65/65] GLSL compile error with mesa driver: need explicit smooth qualifier --- core/rend/gles/gles.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index 682d8786d..c7a7b0d33 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -446,8 +446,8 @@ const char* OSD_Shader = #version 140 \n\ out vec4 FragColor; \n\ \n\ -in lowp vec4 vtx_base; \n\ -in mediump vec2 vtx_uv; \n\ +smooth in lowp vec4 vtx_base; \n\ + in mediump vec2 vtx_uv; \n\ /* Vertex input*/ \n\ uniform sampler2D tex; \n\ void main() \n\