diff --git a/.gitignore b/.gitignore index 4c3ecc507..a20d4a36c 100644 --- a/.gitignore +++ b/.gitignore @@ -48,6 +48,7 @@ reicast-ios.xccheckout shell/linux/.map shell/linux/nosym-reicast.elf shell/linux/reicast.elf +shell/linux/reicast_naomi.elf # Visual Studio generated diff --git a/core/hw/pvr/Renderer_if.cpp b/core/hw/pvr/Renderer_if.cpp index e8aef73f0..d4f082aae 100644 --- a/core/hw/pvr/Renderer_if.cpp +++ b/core/hw/pvr/Renderer_if.cpp @@ -103,7 +103,7 @@ void dump_frame(const char* file, TA_context* ctx, u8* vram, u8* vram_ref = NULL u32 bytes = ctx->tad.End() - ctx->tad.thd_root; - fwrite("TAFRAME3", 1, 8, fw); + fwrite("TAFRAME4", 1, 8, fw); fwrite(&ctx->rend.isRTT, 1, sizeof(ctx->rend.isRTT), fw); u32 zero = 0; @@ -168,10 +168,17 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) { fread(id0, 1, 8, fw); - if (memcmp(id0, "TAFRAME3", 8) != 0) { + if (memcmp(id0, "TAFRAME", 7) != 0 || (id0[7] != '3' && id0[7] != '4')) { fclose(fw); return 0; } + int sizeofPolyParam = sizeof(PolyParam); + int sizeofVertex = sizeof(Vertex); + if (id0[7] == '3') + { + sizeofPolyParam -= 12; + sizeofVertex -= 16; + } TA_context* ctx = tactx_Alloc(); @@ -184,8 +191,10 @@ TA_context* read_frame(const char* file, u8* vram_ref = NULL) { fread(&ctx->rend.fb_X_CLIP.full, 1, sizeof(ctx->rend.fb_X_CLIP.full), fw); fread(&ctx->rend.fb_Y_CLIP.full, 1, sizeof(ctx->rend.fb_Y_CLIP.full), fw); - fread(ctx->rend.global_param_op.Append(), 1, sizeof(PolyParam), fw); - fread(ctx->rend.verts.Append(4), 1, 4 * sizeof(Vertex), fw); + fread(ctx->rend.global_param_op.Append(), 1, sizeofPolyParam, fw); + Vertex *vtx = ctx->rend.verts.Append(4); + for (int i = 0; i < 4; i++) + fread(vtx + i, 1, sizeofVertex, fw); fread(&t, 1, sizeof(t), fw); verify(t == VRAM_SIZE); diff --git a/core/hw/pvr/helper_classes.h b/core/hw/pvr/helper_classes.h index d52153d5b..2e1f01d63 100644 --- a/core/hw/pvr/helper_classes.h +++ b/core/hw/pvr/helper_classes.h @@ -8,6 +8,7 @@ struct List int size; bool* overrun; + const char *list_name; __forceinline int used() const { return size-avail; } __forceinline int bytes() const { return used()* sizeof(T); } @@ -17,6 +18,8 @@ struct List { *overrun |= true; Clear(); + if (list_name != NULL) + printf("List overrun for list %s\n", list_name); return daty; } @@ -45,7 +48,7 @@ struct List T* head() const { return daty-used(); } - void InitBytes(int maxbytes,bool* ovrn) + void InitBytes(int maxbytes,bool* ovrn, const char *name) { maxbytes-=maxbytes%sizeof(T); @@ -58,11 +61,12 @@ struct List overrun=ovrn; Clear(); + list_name = name; } - void Init(int maxsize,bool* ovrn) + void Init(int maxsize,bool* ovrn, const char *name) { - InitBytes(maxsize*sizeof(T),ovrn); + InitBytes(maxsize*sizeof(T),ovrn, name); } void Clear() @@ -76,4 +80,4 @@ struct List Clear(); free(daty); } -}; \ No newline at end of file +}; diff --git a/core/hw/pvr/pvr_mem.cpp b/core/hw/pvr/pvr_mem.cpp index 9b9a7c24e..0b7f578b7 100644 --- a/core/hw/pvr/pvr_mem.cpp +++ b/core/hw/pvr/pvr_mem.cpp @@ -37,9 +37,9 @@ void YUV_init() YUV_dest=TA_YUV_TEX_BASE&VRAM_MASK;//TODO : add the masking needed TA_YUV_TEX_CNT=0; - YUV_blockcount=(((TA_YUV_TEX_CTRL>>0)&0x3F)+1)*(((TA_YUV_TEX_CTRL>>8)&0x3F)+1); + YUV_blockcount = (TA_YUV_TEX_CTRL.yuv_u_size + 1) * (TA_YUV_TEX_CTRL.yuv_v_size + 1); - if ((TA_YUV_TEX_CTRL>>16 )&1) + if (TA_YUV_TEX_CTRL.yuv_tex != 0) { die ("YUV: Not supported configuration\n"); YUV_x_size=16; @@ -47,8 +47,8 @@ void YUV_init() } else // yesh!!! { - YUV_x_size=(((TA_YUV_TEX_CTRL>>0)&0x3F)+1)*16; - YUV_y_size=(((TA_YUV_TEX_CTRL>>8)&0x3F)+1)*16; + YUV_x_size = (TA_YUV_TEX_CTRL.yuv_u_size + 1) * 16; + YUV_y_size = (TA_YUV_TEX_CTRL.yuv_v_size + 1) * 16; } } @@ -164,7 +164,7 @@ void YUV_data(u32* data , u32 count) YUV_init(); } - u32 block_size=(TA_YUV_TEX_CTRL & (1<<24))==0?384:512; + u32 block_size = TA_YUV_TEX_CTRL.yuv_form == 0 ? 384 : 512; verify(block_size==384); //no support for 512 diff --git a/core/hw/pvr/pvr_regs.h b/core/hw/pvr/pvr_regs.h index 93969c8bc..94125bfc4 100644 --- a/core/hw/pvr/pvr_regs.h +++ b/core/hw/pvr/pvr_regs.h @@ -370,7 +370,22 @@ union TA_GLOB_TILE_CLIP_type }; u32 full; }; - + +union TA_YUV_TEX_CTRL_type +{ + struct + { + u32 yuv_u_size : 6; + u32 reserved1 : 2; + u32 yuv_v_size : 6; + u32 reserved2 : 2; + u32 yuv_tex : 1; + u32 reserved3 : 7; + u32 yuv_form : 1; + u32 reserved4 : 7; + }; + u32 full; +}; // TA REGS #define TA_OL_BASE_addr 0x00000124 // RW Object list write start address @@ -483,7 +498,7 @@ union TA_GLOB_TILE_CLIP_type #define TA_ALLOC_CTRL PvrReg(TA_ALLOC_CTRL_addr,u32) // RW Object list control #define TA_LIST_INIT PvrReg(TA_LIST_INIT_addr,u32) // RW TA initialization #define TA_YUV_TEX_BASE PvrReg(TA_YUV_TEX_BASE_addr,u32) // RW YUV422 texture write start address -#define TA_YUV_TEX_CTRL PvrReg(TA_YUV_TEX_CTRL_addr,u32) // RW YUV converter control +#define TA_YUV_TEX_CTRL PvrReg(TA_YUV_TEX_CTRL_addr, TA_YUV_TEX_CTRL_type) // RW YUV converter control #define TA_YUV_TEX_CNT PvrReg(TA_YUV_TEX_CNT_addr,u32) // R YUV converter macro block counter value #define TA_LIST_CONT PvrReg(TA_LIST_CONT_addr,u32) // RW TA continuation processing diff --git a/core/hw/pvr/ta.h b/core/hw/pvr/ta.h index 29b7c6d39..2347dbf62 100644 --- a/core/hw/pvr/ta.h +++ b/core/hw/pvr/ta.h @@ -23,10 +23,10 @@ bool ta_parse_vdrc(TA_context* ctx); #define STRIPS_AS_PPARAMS 1 -#define TRIG_SORT 1 +#define TRIG_SORT 0 #if TRIG_SORT #undef STRIPS_AS_PPARAMS #define STRIPS_AS_PPARAMS 1 -#endif \ No newline at end of file +#endif diff --git a/core/hw/pvr/ta_ctx.h b/core/hw/pvr/ta_ctx.h index a99348c67..3ada93c89 100644 --- a/core/hw/pvr/ta_ctx.h +++ b/core/hw/pvr/ta_ctx.h @@ -17,6 +17,12 @@ struct Vertex u8 spc[4]; float u,v; + + // Two volumes format + u8 col1[4]; + u8 spc1[4]; + + float u1,v1; }; struct PolyParam @@ -35,6 +41,9 @@ struct PolyParam float zvZ; u32 tileclip; //float zMin,zMax; + TSP tsp1; + TCW tcw1; + u32 texid1; }; struct ModifierVolumeParam @@ -98,6 +107,7 @@ struct RenderPass { u32 mvo_count; u32 pt_count; u32 tr_count; + u32 mvo_tr_count; }; struct rend_context @@ -124,6 +134,7 @@ struct rend_context List idx; List modtrig; List global_param_mvo; + List global_param_mvo_tr; List global_param_op; List global_param_pt; @@ -139,6 +150,7 @@ struct rend_context global_param_tr.Clear(); modtrig.Clear(); global_param_mvo.Clear(); + global_param_mvo_tr.Clear(); render_passes.Clear(); Overrun=false; @@ -189,16 +201,23 @@ struct TA_context { tad.Reset((u8*)OS_aligned_malloc(32, 8*1024*1024)); - rend.verts.InitBytes(2*1024*1024,&rend.Overrun); //up to 2 MB of vtx data/frame = ~ 75k vtx/frame - rend.idx.Init(120*1024,&rend.Overrun); //up to 120K indexes ( idx have stripification overhead ) - rend.global_param_op.Init(4096,&rend.Overrun); - rend.global_param_pt.Init(4096,&rend.Overrun); - rend.global_param_mvo.Init(4096,&rend.Overrun); - rend.global_param_tr.Init(8192,&rend.Overrun); + rend.verts.InitBytes(4 * 1024 * 1024, &rend.Overrun, "verts"); //up to 4 mb of vtx data/frame = ~ 96k vtx/frame + rend.idx.Init(120 * 1024, &rend.Overrun, "idx"); //up to 120K indexes ( idx have stripification overhead ) + rend.global_param_op.Init(4096, &rend.Overrun, "global_param_op"); + rend.global_param_pt.Init(4096, &rend.Overrun, "global_param_pt"); + rend.global_param_mvo.Init(4096, &rend.Overrun, "global_param_mvo"); +#if STRIPS_AS_PPARAMS + // That makes a lot of polyparams but this is required for proper sorting... + // Rez uses more than 8192 translucent polygons sometimes + rend.global_param_tr.Init(10240, &rend.Overrun, "global_param_tr"); +#else + rend.global_param_tr.Init(4096, &rend.Overrun, "global_param_tr"); +#endif + rend.global_param_mvo_tr.Init(4096, &rend.Overrun, "global_param_mvo_tr"); - rend.modtrig.Init(8192,&rend.Overrun); + rend.modtrig.Init(16384, &rend.Overrun, "modtrig"); - rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun); // 10 render passes + rend.render_passes.Init(sizeof(RenderPass) * 10, &rend.Overrun, "render_passes"); // 10 render passes Reset(); } @@ -222,6 +241,7 @@ struct TA_context rend.global_param_tr.Free(); rend.modtrig.Free(); rend.global_param_mvo.Free(); + rend.global_param_mvo_tr.Free(); rend.render_passes.Free(); } }; diff --git a/core/hw/pvr/ta_vtx.cpp b/core/hw/pvr/ta_vtx.cpp index 3b15230c2..9dd68c289 100644 --- a/core/hw/pvr/ta_vtx.cpp +++ b/core/hw/pvr/ta_vtx.cpp @@ -85,6 +85,8 @@ List* CurrentPPlist; //TA state vars DECL_ALIGN(4) u8 FaceBaseColor[4]; DECL_ALIGN(4) u8 FaceOffsColor[4]; +DECL_ALIGN(4) u8 FaceBaseColor1[4]; +DECL_ALIGN(4) u8 FaceOffsColor1[4]; DECL_ALIGN(4) u32 SFaceBaseColor; DECL_ALIGN(4) u32 SFaceOffsColor; @@ -769,7 +771,7 @@ public: CurrentPP=&nullPP; CurrentPPlist=0; - if (ListType == ListType_Opaque_Modifier_Volume) + if (ListType == ListType_Opaque_Modifier_Volume || ListType == ListType_Translucent_Modifier_Volume) EndModVol(); } @@ -808,6 +810,9 @@ public: if (d_pp->pcw.Texture) { d_pp->texid = renderer->GetTexture(d_pp->tsp,d_pp->tcw); } + d_pp->tsp1.full = -1; + d_pp->tcw1.full = -1; + d_pp->texid1 = -1; } } @@ -860,6 +865,11 @@ public: TA_PolyParam3* pp=(TA_PolyParam3*)vpp; glob_param_bdc(pp); + + CurrentPP->tsp1.full = pp->tsp1.full; + CurrentPP->tcw1.full = pp->tcw1.full; + if (pp->pcw.Texture) + CurrentPP->texid1 = renderer->GetTexture(pp->tsp1, pp->tcw1); } __forceinline static void TACALL AppendPolyParam4A(void* vpp) @@ -867,13 +877,19 @@ public: TA_PolyParam4A* pp=(TA_PolyParam4A*)vpp; glob_param_bdc(pp); + + CurrentPP->tsp1.full = pp->tsp1.full; + CurrentPP->tcw1.full = pp->tcw1.full; + if (pp->pcw.Texture) + CurrentPP->texid1 = renderer->GetTexture(pp->tsp1, pp->tcw1); } __forceinline static void TACALL AppendPolyParam4B(void* vpp) { TA_PolyParam4B* pp=(TA_PolyParam4B*)vpp; - poly_float_color(FaceBaseColor,FaceColor0); + poly_float_color(FaceBaseColor, FaceColor0); + poly_float_color(FaceBaseColor1, FaceColor1); } //Poly Strip handling @@ -884,13 +900,6 @@ public: { CurrentPP->count=vdrc.idx.used() - CurrentPP->first; - int vbase=vdrc.verts.used(); - - *vdrc.idx.Append()=vbase-1; - *vdrc.idx.Append()=vbase; - - if (CurrentPP->count&1) - *vdrc.idx.Append()=vbase; #if STRIPS_AS_PPARAMS if (CurrentPPlist==&vdrc.global_param_tr) { @@ -900,7 +909,20 @@ public: d_pp->first=vdrc.idx.used(); d_pp->count=0; } + else + { #endif + int vbase=vdrc.verts.used(); + + *vdrc.idx.Append()=vbase-1; + *vdrc.idx.Append()=vbase; + + if (CurrentPP->count&1) + *vdrc.idx.Append()=vbase; +#if STRIPS_AS_PPARAMS + } +#endif + } @@ -941,6 +963,14 @@ public: cv->u = f16(vtx->u_name);\ cv->v = f16(vtx->v_name); + #define vert_uv1_32(u_name,v_name) \ + cv->u1 = (vtx->u_name);\ + cv->v1 = (vtx->v_name); + + #define vert_uv1_16(u_name,v_name) \ + cv->u1 = f16(vtx->u_name);\ + cv->v1 = f16(vtx->v_name); + //Color conversions #define vert_packed_color_(to,src) \ { \ @@ -984,6 +1014,20 @@ public: cv->spc[2] = FaceOffsColor[2]*satint/256; \ cv->spc[3] = FaceOffsColor[3]; } + #define vert_face_base_color1(baseint) \ + { u32 satint=float_to_satu8(vtx->baseint); \ + cv->col1[0] = FaceBaseColor1[0]*satint/256; \ + cv->col1[1] = FaceBaseColor1[1]*satint/256; \ + cv->col1[2] = FaceBaseColor1[2]*satint/256; \ + cv->col1[3] = FaceBaseColor1[3]; } + + #define vert_face_offs_color1(offsint) \ + { u32 satint=float_to_satu8(vtx->offsint); \ + cv->spc1[0] = FaceOffsColor1[0]*satint/256; \ + cv->spc1[1] = FaceOffsColor1[1]*satint/256; \ + cv->spc1[2] = FaceOffsColor1[2]*satint/256; \ + cv->spc1[3] = FaceOffsColor1[3]; } + //vert_float_color_(cv->spc,FaceOffsColor[3],FaceOffsColor[0]*satint/256,FaceOffsColor[1]*satint/256,FaceOffsColor[2]*satint/256); } @@ -1109,6 +1153,7 @@ public: vert_cvt_base; vert_packed_color(col,BaseCol0); + vert_packed_color(col1, BaseCol1); } //(Non-Textured, Intensity, with Two Volumes) @@ -1118,6 +1163,7 @@ public: vert_cvt_base; vert_face_base_color(BaseInt0); + vert_face_base_color1(BaseInt1); } //(Textured, Packed Color, with Two Volumes) @@ -1136,6 +1182,10 @@ public: { vert_res_base; + vert_packed_color(col1, BaseCol1); + vert_packed_color(spc1, OffsCol1); + + vert_uv1_32(u1, v1); } //(Textured, Packed Color, 16bit UV, with Two Volumes) @@ -1154,6 +1204,10 @@ public: { vert_res_base; + vert_packed_color(col1, BaseCol1); + vert_packed_color(spc1, OffsCol1); + + vert_uv1_16(u1, v1); } //(Textured, Intensity, with Two Volumes) @@ -1172,6 +1226,10 @@ public: { vert_res_base; + vert_face_base_color1(BaseInt1); + vert_face_offs_color1(OffsInt1); + + vert_uv1_32(u1,v1); } //(Textured, Intensity, 16bit UV, with Two Volumes) @@ -1190,6 +1248,10 @@ public: { vert_res_base; + vert_face_base_color1(BaseInt1); + vert_face_offs_color1(OffsInt1); + + vert_uv1_16(u1, v1); } //Sprites @@ -1217,6 +1279,9 @@ public: if (d_pp->pcw.Texture) { d_pp->texid = renderer->GetTexture(d_pp->tsp,d_pp->tcw); } + d_pp->tcw1.full = -1; + d_pp->tsp1.full = -1; + d_pp->texid1 = -1; SFaceBaseColor=spr->BaseCol; SFaceOffsColor=spr->OffsCol; @@ -1374,6 +1439,8 @@ public: List *list = NULL; if (CurrentList == ListType_Opaque_Modifier_Volume) list = &vdrc.global_param_mvo; + else if (CurrentList == ListType_Translucent_Modifier_Volume) + list = &vdrc.global_param_mvo_tr; else return; if (list->used() > 0) @@ -1390,6 +1457,8 @@ public: ModifierVolumeParam *p = NULL; if (CurrentList == ListType_Opaque_Modifier_Volume) p = vdrc.global_param_mvo.Append(); + else if (CurrentList == ListType_Translucent_Modifier_Volume) + p = vdrc.global_param_mvo_tr.Append(); else return; p->isp.full = param->isp.full; @@ -1399,7 +1468,7 @@ public: __forceinline static void AppendModVolVertexA(TA_ModVolA* mvv) { - if (CurrentList!=ListType_Opaque_Modifier_Volume) + if (CurrentList != ListType_Opaque_Modifier_Volume && CurrentList != ListType_Translucent_Modifier_Volume) return; lmr=vdrc.modtrig.Append(); @@ -1419,7 +1488,7 @@ public: __forceinline static void AppendModVolVertexB(TA_ModVolB* mvv) { - if (CurrentList!=ListType_Opaque_Modifier_Volume) + if (CurrentList != ListType_Opaque_Modifier_Volume && CurrentList != ListType_Translucent_Modifier_Volume) return; lmr->y2=mvv->y2; lmr->z2=mvv->z2; @@ -1486,6 +1555,7 @@ bool ta_parse_vdrc(TA_context* ctx) render_pass->mvo_count = vd_rc.global_param_mvo.used(); render_pass->pt_count = vd_rc.global_param_pt.used(); render_pass->tr_count = vd_rc.global_param_tr.used(); + render_pass->mvo_tr_count = vd_rc.global_param_mvo_tr.used(); render_pass->autosort = UsingAutoSort(pass); render_pass->z_clear = ClearZBeforePass(pass); } @@ -1637,6 +1707,9 @@ void FillBGP(TA_context* ctx) bgpp->isp.full=vri(strip_base); bgpp->tsp.full=vri(strip_base+4); bgpp->tcw.full=vri(strip_base+8); + bgpp->tcw1.full = -1; + bgpp->tsp1.full = -1; + bgpp->texid1 = -1; bgpp->count=4; bgpp->first=0; bgpp->tileclip=0;//disabled ! HA ~ diff --git a/core/linux-dist/x11.cpp b/core/linux-dist/x11.cpp index 4e44f761d..f745a3050 100644 --- a/core/linux-dist/x11.cpp +++ b/core/linux-dist/x11.cpp @@ -704,9 +704,11 @@ void x11_window_create() verify(glXCreateContextAttribsARB != 0); int context_attribs[] = { - GLX_CONTEXT_MAJOR_VERSION_ARB, 3, - GLX_CONTEXT_MINOR_VERSION_ARB, 1, + GLX_CONTEXT_MAJOR_VERSION_ARB, 4, + GLX_CONTEXT_MINOR_VERSION_ARB, 3, +#ifndef RELEASE GLX_CONTEXT_FLAGS_ARB, GLX_CONTEXT_DEBUG_BIT_ARB, +#endif GLX_CONTEXT_PROFILE_MASK_ARB, GLX_CONTEXT_CORE_PROFILE_BIT_ARB, None }; @@ -716,7 +718,7 @@ void x11_window_create() if (!x11_glc) { - die("Failed to create GL3.1 context\n"); + die("Failed to create OpenGL 4.3 context\n"); } #endif diff --git a/core/oslib/audiobackend_alsa.cpp b/core/oslib/audiobackend_alsa.cpp index 0f2a5599c..6764b54a0 100644 --- a/core/oslib/audiobackend_alsa.cpp +++ b/core/oslib/audiobackend_alsa.cpp @@ -7,6 +7,8 @@ static bool pcm_blocking = true; static snd_pcm_uframes_t buffer_size; static snd_pcm_uframes_t period_size; +#define MAX_LATENCY 100 + // We're making these functions static - there's no need to pollute the global namespace static void alsa_init() { @@ -89,7 +91,7 @@ static void alsa_init() } else printf("ALSA: period size set to %ld\n", period_size); - buffer_size = (44100 * 100 /* settings.omx.Audio_Latency */ / 1000 / period_size + 1) * period_size; + buffer_size = (44100 * MAX_LATENCY / 1000 / period_size + 1) * period_size; rc=snd_pcm_hw_params_set_buffer_size_near(handle, params, &buffer_size); if (rc < 0) { diff --git a/core/rend/TexCache.h b/core/rend/TexCache.h index d87481260..877c60d9b 100644 --- a/core/rend/TexCache.h +++ b/core/rend/TexCache.h @@ -161,11 +161,7 @@ struct pp_8888 { __forceinline static u32 packRGB(u8 R,u8 G,u8 B) { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ && defined(GLES) - return (R << 0) | (G << 8) | (B << 16) | 0xFF000000; -#else return (R << 24) | (G << 16) | (B << 8) | 0xFF; -#endif } }; diff --git a/core/rend/gles/abuffer.cpp b/core/rend/gles/abuffer.cpp new file mode 100644 index 000000000..f86209391 --- /dev/null +++ b/core/rend/gles/abuffer.cpp @@ -0,0 +1,545 @@ +/* + * abuffer.cpp + * + * Created on: May 26, 2018 + * Author: raph + */ +#include "glcache.h" + +GLuint pixels_buffer; +GLuint pixels_pointers; +GLuint atomic_buffer; +PipelineShader g_abuffer_final_shader; +PipelineShader g_abuffer_final_nosort_shader; +PipelineShader g_abuffer_clear_shader; +PipelineShader g_abuffer_tr_modvol_shaders[ModeCount]; +static GLuint g_quadBuffer = 0; +static GLuint g_quadVertexArray = 0; + +static int g_imageWidth = 0; +static int g_imageHeight = 0; + +GLuint pixel_buffer_size = 512 * 1024 * 1024; // Initial size 512 MB + +#define MAX_PIXELS_PER_FRAGMENT "32" + +static const char *final_shader_source = SHADER_HEADER "\ +#define DEPTH_SORTED %d \n\ +#define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT " \n\ + \n\ +layout(binding = 0) uniform sampler2D tex; \n\ +uniform highp float shade_scale_factor; \n\ + \n\ +out vec4 FragColor; \n\ + \n\ +uint pixel_list[MAX_PIXELS_PER_FRAGMENT]; \n\ + \n\ + \n\ +int fillAndSortFragmentArray(ivec2 coords) \n\ +{ \n\ + // Load fragments into a local memory array for sorting \n\ + uint idx = imageLoad(abufferPointerImg, coords).x; \n\ + int count = 0; \n\ + for (; idx != EOL && count < MAX_PIXELS_PER_FRAGMENT; count++) \n\ + { \n\ + const Pixel p = pixels[idx]; \n\ + int j = count - 1; \n\ + Pixel jp = pixels[pixel_list[j]]; \n\ +#if DEPTH_SORTED == 1 \n\ + while (j >= 0 \n\ + && (jp.depth < p.depth \n\ + || (jp.depth == p.depth && getPolyNumber(jp) > getPolyNumber(p)))) \n\ +#else \n\ + while (j >= 0 && getPolyNumber(jp) > getPolyNumber(p)) \n\ +#endif \n\ + { \n\ + pixel_list[j + 1] = pixel_list[j]; \n\ + j--; \n\ + jp = pixels[pixel_list[j]]; \n\ + } \n\ + pixel_list[j + 1] = idx; \n\ + idx = p.next; \n\ + } \n\ + return count; \n\ +} \n\ + \n\ +// Blend fragments back-to-front \n\ +vec4 resolveAlphaBlend(ivec2 coords) { \n\ + \n\ + // Copy and sort fragments into a local array \n\ + int num_frag = fillAndSortFragmentArray(coords); \n\ + \n\ + vec4 finalColor = texture(tex, gl_FragCoord.xy / textureSize(tex, 0)); \n\ + vec4 secondaryBuffer = vec4(0.0); // Secondary accumulation buffer \n\ + float depth = 1.0; \n\ + \n\ + for (int i = 0; i < num_frag; i++) \n\ + { \n\ + const Pixel pixel = pixels[pixel_list[i]]; \n\ + const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ +#if DEPTH_SORTED != 1 \n\ + const float frag_depth = pixel.depth; \n\ + switch (getDepthFunc(pp)) \n\ + { \n\ + case 0: // Never \n\ + continue; \n\ + case 1: // Greater \n\ + if (frag_depth <= depth) \n\ + continue; \n\ + break; \n\ + case 2: // Equal \n\ + if (frag_depth != depth) \n\ + continue; \n\ + break; \n\ + case 3: // Greater or equal \n\ + if (frag_depth < depth) \n\ + continue; \n\ + break; \n\ + case 4: // Less \n\ + if (frag_depth >= depth) \n\ + continue; \n\ + break; \n\ + case 5: // Not equal \n\ + if (frag_depth == depth) \n\ + continue; \n\ + break; \n\ + case 6: // Less or equal \n\ + if (frag_depth > depth) \n\ + continue; \n\ + break; \n\ + case 7: // Always \n\ + break; \n\ + } \n\ + \n\ + if (getDepthMask(pp)) \n\ + depth = frag_depth; \n\ +#endif \n\ + bool area1 = false; \n\ + bool shadowed = false; \n\ + if (isShadowed(pixel)) \n\ + { \n\ + if (isTwoVolumes(pp)) \n\ + area1 = true; \n\ + else \n\ + shadowed = true; \n\ + } \n\ + vec4 srcColor; \n\ + if (getSrcSelect(pp, area1)) \n\ + srcColor = secondaryBuffer; \n\ + else \n\ + { \n\ + srcColor = pixel.color; \n\ + if (shadowed) \n\ + srcColor.rgb *= shade_scale_factor; \n\ + } \n\ + vec4 dstColor = getDstSelect(pp, area1) ? secondaryBuffer : finalColor; \n\ + vec4 srcCoef; \n\ + vec4 dstCoef; \n\ + \n\ + int srcBlend = getSrcBlendFunc(pp, area1); \n\ + switch (srcBlend) \n\ + { \n\ + case ZERO: \n\ + srcCoef = vec4(0.0); \n\ + break; \n\ + case ONE: \n\ + srcCoef = vec4(1.0); \n\ + break; \n\ + case OTHER_COLOR: \n\ + srcCoef = finalColor; \n\ + break; \n\ + case INVERSE_OTHER_COLOR: \n\ + srcCoef = vec4(1.0) - dstColor; \n\ + break; \n\ + case SRC_ALPHA: \n\ + srcCoef = vec4(srcColor.a); \n\ + break; \n\ + case INVERSE_SRC_ALPHA: \n\ + srcCoef = vec4(1.0 - srcColor.a); \n\ + break; \n\ + case DST_ALPHA: \n\ + srcCoef = vec4(dstColor.a); \n\ + break; \n\ + case INVERSE_DST_ALPHA: \n\ + srcCoef = vec4(1.0 - dstColor.a); \n\ + break; \n\ + } \n\ + int dstBlend = getDstBlendFunc(pp, area1); \n\ + switch (dstBlend) \n\ + { \n\ + case ZERO: \n\ + dstCoef = vec4(0.0); \n\ + break; \n\ + case ONE: \n\ + dstCoef = vec4(1.0); \n\ + break; \n\ + case OTHER_COLOR: \n\ + dstCoef = srcColor; \n\ + break; \n\ + case INVERSE_OTHER_COLOR: \n\ + dstCoef = vec4(1.0) - srcColor; \n\ + break; \n\ + case SRC_ALPHA: \n\ + dstCoef = vec4(srcColor.a); \n\ + break; \n\ + case INVERSE_SRC_ALPHA: \n\ + dstCoef = vec4(1.0 - srcColor.a); \n\ + break; \n\ + case DST_ALPHA: \n\ + dstCoef = vec4(dstColor.a); \n\ + break; \n\ + case INVERSE_DST_ALPHA: \n\ + dstCoef = vec4(1.0 - dstColor.a); \n\ + break; \n\ + } \n\ + const vec4 result = clamp(dstColor * dstCoef + srcColor * srcCoef, 0.0, 1.0); \n\ + if (getDstSelect(pp, area1)) \n\ + secondaryBuffer = result; \n\ + else \n\ + finalColor = result; \n\ + } \n\ + \n\ + return finalColor; \n\ + \n\ +} \n\ + \n\ +void main(void) \n\ +{ \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + // Compute and output final color for the frame buffer \n\ + // Visualize the number of layers in use \n\ + //FragColor = vec4(float(fillFragmentArray(coords)) / MAX_PIXELS_PER_FRAGMENT, 0, 0, 1); \n\ + FragColor = resolveAlphaBlend(coords); \n\ +} \n\ +"; + +static const char *clear_shader_source = SHADER_HEADER "\ + \n\ +void main(void) \n\ +{ \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + \n\ + // Reset pointers \n\ + imageStore(abufferPointerImg, coords, uvec4(EOL)); \n\ + \n\ + // Discard fragment so nothing is written to the framebuffer \n\ + discard; \n\ +} \n\ +"; + +static const char *tr_modvol_shader_source = SHADER_HEADER "\ +#define MV_MODE %d \n\ +#define MAX_PIXELS_PER_FRAGMENT " MAX_PIXELS_PER_FRAGMENT " \n\ + \n\ +// Must match ModifierVolumeMode enum values \n\ +#define MV_XOR 0 \n\ +#define MV_OR 1 \n\ +#define MV_INCLUSION 2 \n\ +#define MV_EXCLUSION 3 \n\ + \n\ +void main(void) \n\ +{ \n\ +#if MV_MODE == MV_XOR || MV_MODE == MV_OR \n\ + setFragDepth(); \n\ +#endif \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + \n\ + uint idx = imageLoad(abufferPointerImg, coords).x; \n\ + int list_len = 0; \n\ + while (idx != EOL && list_len < MAX_PIXELS_PER_FRAGMENT) \n\ + { \n\ + const Pixel pixel = pixels[idx]; \n\ + const PolyParam pp = tr_poly_params[getPolyNumber(pixel)]; \n\ + if (getShadowEnable(pp)) \n\ + { \n\ +#if MV_MODE == MV_XOR \n\ + if (gl_FragDepth <= pixel.depth) \n\ + atomicXor(pixels[idx].seq_num, SHADOW_STENCIL); \n\ +#elif MV_MODE == MV_OR \n\ + if (gl_FragDepth <= pixel.depth) \n\ + atomicOr(pixels[idx].seq_num, SHADOW_STENCIL); \n\ +#elif MV_MODE == MV_INCLUSION \n\ + uint prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL)); \n\ + if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_STENCIL) \n\ + pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1u, 31, 1); \n\ +#elif MV_MODE == MV_EXCLUSION \n\ + uint prev_val = atomicAnd(pixels[idx].seq_num, ~(SHADOW_STENCIL|SHADOW_ACC)); \n\ + if ((prev_val & (SHADOW_STENCIL|SHADOW_ACC)) == SHADOW_ACC) \n\ + pixels[idx].seq_num = bitfieldInsert(pixel.seq_num, 1u, 31, 1); \n\ +#endif \n\ + } \n\ + idx = pixel.next; \n\ + list_len++; \n\ + } \n\ + \n\ + discard; \n\ +} \n\ +"; + +void DrawQuad(); + +void initABuffer() +{ + g_imageWidth = screen_width; + g_imageHeight = screen_height; + + if (g_imageWidth > 0 && g_imageHeight > 0) + { + if (pixels_pointers == 0) + pixels_pointers = glcache.GenTexture(); + glActiveTexture(GL_TEXTURE4); + glBindTexture(GL_TEXTURE_2D, pixels_pointers); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_R32UI, g_imageWidth, g_imageHeight, 0, GL_RED_INTEGER, GL_UNSIGNED_INT, 0); + glBindImageTexture(4, pixels_pointers, 0, false, 0, GL_READ_WRITE, GL_R32UI); + glCheck(); + } + + if (pixels_buffer == 0 ) + { + // Create the buffer + glGenBuffers(1, &pixels_buffer); + // Bind it + glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer); + // Declare storage + glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY); + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer); + glCheck(); + } + + if (atomic_buffer == 0 ) + { + // Create the buffer + glGenBuffers(1, &atomic_buffer); + // Bind it + glBindBuffer(GL_ATOMIC_COUNTER_BUFFER, atomic_buffer); + // Declare storage + glBufferData(GL_ATOMIC_COUNTER_BUFFER, 4, NULL, GL_DYNAMIC_COPY); + glBindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, atomic_buffer); + GLint zero = 0; + glBufferSubData(GL_SHADER_STORAGE_BUFFER, 0, sizeof(GLint), &zero); + glCheck(); + } + + if (g_abuffer_final_shader.program == 0) + { + char source[16384]; + sprintf(source, final_shader_source, 1); + CompilePipelineShader(&g_abuffer_final_shader, source); + } + if (g_abuffer_final_nosort_shader.program == 0) + { + char source[16384]; + sprintf(source, final_shader_source, 0); + CompilePipelineShader(&g_abuffer_final_nosort_shader, source); + } + if (g_abuffer_clear_shader.program == 0) + CompilePipelineShader(&g_abuffer_clear_shader, clear_shader_source); + if (g_abuffer_tr_modvol_shaders[0].program == 0) + { + char source[16384]; + for (int mode = 0; mode < ModeCount; mode++) + { + sprintf(source, tr_modvol_shader_source, mode); + CompilePipelineShader(&g_abuffer_tr_modvol_shaders[mode], source); + } + } + + if (g_quadVertexArray == 0) + glGenVertexArrays(1, &g_quadVertexArray); + if (g_quadBuffer == 0) + glGenBuffers(1, &g_quadBuffer); + + glCheck(); + + // Clear A-buffer pointers + glcache.UseProgram(g_abuffer_clear_shader.program); + ShaderUniforms.Set(&g_abuffer_clear_shader); + + DrawQuad(); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + + glCheck(); +} + +void reshapeABuffer(int w, int h) +{ + if (w != g_imageWidth || h != g_imageHeight) { + if (pixels_pointers != 0) + { + glcache.DeleteTextures(1, &pixels_pointers); + pixels_pointers = 0; + } + + initABuffer(); + } +} + +void DrawQuad() +{ + glBindVertexArray(g_quadVertexArray); + + float xmin = (ShaderUniforms.scale_coefs[2] - 1) / ShaderUniforms.scale_coefs[0]; + float xmax = (ShaderUniforms.scale_coefs[2] + 1) / ShaderUniforms.scale_coefs[0]; + float ymin = (ShaderUniforms.scale_coefs[3] - 1) / ShaderUniforms.scale_coefs[1]; + float ymax = (ShaderUniforms.scale_coefs[3] + 1) / ShaderUniforms.scale_coefs[1]; + if (ymin > ymax) + { + float t = ymin; + ymin = ymax; + ymax = t; + } + struct Vertex vertices[] = { + { xmin, ymax, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 1 }, + { xmin, ymin, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 0, 0 }, + { xmax, ymax, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 1 }, + { xmax, ymin, 1, { 255, 255, 255, 255 }, { 0, 0, 0, 0 }, 1, 0 }, + }; + GLushort indices[] = { 0, 1, 2, 1, 3 }; + + glBindBuffer(GL_ARRAY_BUFFER, g_quadBuffer); glCheck(); + glBufferData(GL_ARRAY_BUFFER, sizeof(vertices), vertices, GL_STREAM_DRAW); glCheck(); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); glCheck(); + + glEnableVertexAttribArray(VERTEX_POS_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_POS_ARRAY, 3, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,x)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_BASE_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_BASE_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,col)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_OFFS_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_OFFS_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex,spc)); glCheck(); + + glEnableVertexAttribArray(VERTEX_UV_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u)); glCheck(); + + glDisableVertexAttribArray(VERTEX_UV1_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_OFFS1_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_BASE1_ARRAY); + + glDrawElements(GL_TRIANGLE_STRIP, 5, GL_UNSIGNED_SHORT, indices); glCheck(); +} + +void DrawTranslucentModVols(int first, int count) +{ + if (count == 0 || pvrrc.modtrig.used() == 0) + return; + SetupModvolVBO(); + + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE3); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE1); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + glBindTexture(GL_TEXTURE_2D, 0); + + glcache.Disable(GL_DEPTH_TEST); + glcache.Disable(GL_STENCIL_TEST); + + glCheck(); + + ModifierVolumeParam* params = &pvrrc.global_param_mvo_tr.head()[first]; + + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); + + int mod_base = -1; + + for (u32 cmv = 0; cmv < count; cmv++) + { + ModifierVolumeParam& param = params[cmv]; + + if (param.count == 0) + continue; + + u32 mv_mode = param.isp.DepthMode; + + verify(param.first >= 0 && param.first + param.count <= pvrrc.modtrig.used()); + + if (mod_base == -1) + mod_base = param.first; + + PipelineShader *shader; + if (!param.isp.VolumeLast && mv_mode > 0) + shader = &g_abuffer_tr_modvol_shaders[Or]; // OR'ing (open volume or quad) + else + shader = &g_abuffer_tr_modvol_shaders[Xor]; // XOR'ing (closed volume) + glcache.UseProgram(shader->program); + ShaderUniforms.Set(shader); + + SetCull(param.isp.CullMode); glCheck(); + + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + + glDrawArrays(GL_TRIANGLES, param.first * 3, param.count * 3); glCheck(); + + if (mv_mode == 1 || mv_mode == 2) + { + //Sum the area + shader = &g_abuffer_tr_modvol_shaders[mv_mode == 1 ? Inclusion : Exclusion]; + glcache.UseProgram(shader->program); + ShaderUniforms.Set(shader); + + glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT); + glDrawArrays(GL_TRIANGLES, mod_base * 3, (param.first + param.count - mod_base) * 3); glCheck(); + mod_base = -1; + } + } +} + +void checkOverflowAndReset() +{ + // Using atomic counter + GLuint max_pixel_index = 0; +// glGetBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0, sizeof(GLuint), &max_pixel_index); +//// printf("ABUFFER %d pixels used\n", max_pixel_index); +// if ((max_pixel_index + 1) * 32 - 1 >= pixel_buffer_size) +// { +// GLint64 size; +// glGetInteger64v(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &size); +// if (pixel_buffer_size == size) +// printf("A-buffer overflow: %d pixels. Buffer size already maxed out\n", max_pixel_index); +// else +// { +// pixel_buffer_size = (GLuint)min(2 * (GLint64)pixel_buffer_size, size); +// +// printf("A-buffer overflow: %d pixels. Resizing buffer to %d MB\n", max_pixel_index, pixel_buffer_size / 1024 / 1024); +// +// glBindBuffer(GL_SHADER_STORAGE_BUFFER, pixels_buffer); +// glBufferData(GL_SHADER_STORAGE_BUFFER, pixel_buffer_size, NULL, GL_DYNAMIC_COPY); +// glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, pixels_buffer); +// glCheck(); +// } +// } + // Reset counter + max_pixel_index = 0; + glBufferSubData(GL_ATOMIC_COUNTER_BUFFER, 0 , sizeof(GLuint), &max_pixel_index); +} + +void renderABuffer(bool sortFragments) +{ + // Render to output FBO + glcache.UseProgram(sortFragments ? g_abuffer_final_shader.program : g_abuffer_final_nosort_shader.program); + ShaderUniforms.Set(&g_abuffer_final_shader); + + glcache.Disable(GL_DEPTH_TEST); + glcache.Disable(GL_CULL_FACE); + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT | GL_BUFFER_UPDATE_BARRIER_BIT); + + DrawQuad(); + + glCheck(); + + // Clear A-buffer pointers + glcache.UseProgram(g_abuffer_clear_shader.program); + ShaderUniforms.Set(&g_abuffer_clear_shader); + + glMemoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT); + DrawQuad(); + + glActiveTexture(GL_TEXTURE0); + + glCheck(); +} diff --git a/core/rend/gles/glcache.h b/core/rend/gles/glcache.h index b72bd61fa..3d1abcad5 100644 --- a/core/rend/gles/glcache.h +++ b/core/rend/gles/glcache.h @@ -9,7 +9,7 @@ public: GLCache() { Reset(); } void BindTexture(GLenum target, GLuint texture) { - if (target == GL_TEXTURE_2D && texture != _texture) { + if ((target == GL_TEXTURE_2D && texture != _texture && !_disable_cache) || _disable_cache) { glBindTexture(target, texture); _texture = texture; } @@ -18,7 +18,7 @@ public: } void BlendFunc(GLenum sfactor, GLenum dfactor) { - if (sfactor != _src_blend_factor || dfactor != _dst_blend_factor) { + if (sfactor != _src_blend_factor || dfactor != _dst_blend_factor || _disable_cache) { _src_blend_factor = sfactor; _dst_blend_factor = dfactor; glBlendFunc(sfactor, dfactor); @@ -26,7 +26,7 @@ public: } void ClearColor(GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha) { - if (red != _clear_r || green != _clear_g || blue != _clear_b || alpha != _clear_a) { + if (red != _clear_r || green != _clear_g || blue != _clear_b || alpha != _clear_a || _disable_cache) { _clear_r = red; _clear_g = green; _clear_b = blue; @@ -36,7 +36,7 @@ public: } void CullFace(GLenum mode) { - if (mode != _cull_face) { + if (mode != _cull_face || _disable_cache) { _cull_face = mode; glCullFace(mode); } @@ -52,14 +52,14 @@ public: } void DepthFunc(GLenum func) { - if (func != _depth_func) { + if (func != _depth_func || _disable_cache) { _depth_func = func; glDepthFunc(func); } } void DepthMask(GLboolean flag) { - if (flag != _depth_mask) { + if (flag != _depth_mask || _disable_cache) { _depth_mask = flag; glDepthMask(flag); } @@ -74,14 +74,14 @@ public: } void UseProgram(GLuint program) { - if (program != _program) { + if (program != _program || _disable_cache) { _program = program; glUseProgram(program); } } void StencilFunc(GLenum func, GLint ref, GLuint mask) { - if (_stencil_func != func || _stencil_ref != ref || _stencil_fmask != mask) { + if (_stencil_func != func || _stencil_ref != ref || _stencil_fmask != mask || _disable_cache) { _stencil_func = func; _stencil_ref = ref; _stencil_fmask = mask; @@ -90,7 +90,7 @@ public: } void StencilOp(GLenum sfail, GLenum dpfail, GLenum dppass) { - if (_stencil_sfail != sfail ||_stencil_dpfail != dpfail || _stencil_dppass != dppass) { + if (_stencil_sfail != sfail ||_stencil_dpfail != dpfail || _stencil_dppass != dppass || _disable_cache) { _stencil_sfail = sfail; _stencil_dpfail = dpfail; _stencil_dppass = dppass; @@ -99,14 +99,14 @@ public: } void StencilMask(GLuint mask) { - if (_stencil_mask != mask) { + if (_stencil_mask != mask || _disable_cache) { _stencil_mask = mask; glStencilMask(mask); } } void TexParameteri(GLenum target, GLenum pname, GLint param) { - if (target == GL_TEXTURE_2D) + if (target == GL_TEXTURE_2D && !_disable_cache) { TextureParameters &cur_params = _texture_params[_texture]; switch (pname) { @@ -201,7 +201,7 @@ private: break; } if (pCap != NULL) { - if (*pCap == value) + if (*pCap == value && !_disable_cache) return; *pCap = value; } @@ -237,6 +237,7 @@ private: GLuint _texture_ids[TEXTURE_ID_CACHE_SIZE]; GLuint _texture_cache_size; std::map _texture_params; + bool _disable_cache = true; }; extern GLCache glcache; diff --git a/core/rend/gles/gldraw.cpp b/core/rend/gles/gldraw.cpp index c73675c10..ab8d0d44f 100644 --- a/core/rend/gles/gldraw.cpp +++ b/core/rend/gles/gldraw.cpp @@ -1,7 +1,6 @@ #include "glcache.h" #include "rend/rend.h" -#include /* Drawing and related state management @@ -21,23 +20,34 @@ const static u32 CullMode[]= GL_FRONT, //2 Cull if Negative Cull if ( |det| < 0 ) or ( |det| < fpu_cull_val ) GL_BACK, //3 Cull if Positive Cull if ( |det| > 0 ) or ( |det| < fpu_cull_val ) }; + +#define INVERT_DEPTH_FUNC const static u32 Zfunction[]= { GL_NEVER, //GL_NEVER, //0 Never +#ifndef INVERT_DEPTH_FUNC GL_LESS, //GL_LESS/*EQUAL*/, //1 Less GL_EQUAL, //GL_EQUAL, //2 Equal GL_LEQUAL, //GL_LEQUAL, //3 Less Or Equal GL_GREATER, //GL_GREATER/*EQUAL*/, //4 Greater GL_NOTEQUAL, //GL_NOTEQUAL, //5 Not Equal GL_GEQUAL, //GL_GEQUAL, //6 Greater Or Equal +#else + GL_GREATER, //1 Less + GL_EQUAL, //2 Equal + GL_GEQUAL, //3 Less Or Equal + GL_LESS, //4 Greater + GL_NOTEQUAL, //5 Not Equal + GL_LEQUAL, //6 Greater Or Equal +#endif GL_ALWAYS, //GL_ALWAYS, //7 Always }; /* 0 Zero (0, 0, 0, 0) 1 One (1, 1, 1, 1) -2 Dither Color (OR, OG, OB, OA) -3 Inverse Dither Color (1-OR, 1-OG, 1-OB, 1-OA) +2 Other Color (OR, OG, OB, OA) +3 Inverse Other Color (1-OR, 1-OG, 1-OB, 1-OA) 4 SRC Alpha (SA, SA, SA, SA) 5 Inverse SRC Alpha (1-SA, 1-SA, 1-SA, 1-SA) 6 DST Alpha (DA, DA, DA, DA) @@ -68,11 +78,15 @@ const static u32 SrcBlendGL[] = GL_ONE_MINUS_DST_ALPHA }; -extern int screen_width; -extern int screen_height; - PipelineShader* CurrentShader; u32 gcflip; +GLuint geom_fbo; +GLuint stencilTexId; +GLuint opaqueTexId; +GLuint depthTexId; +GLuint texSamplers[2]; +GLuint depth_fbo; +GLuint depthSaveTexId; s32 SetTileClip(u32 val, bool set) { @@ -149,17 +163,16 @@ void SetCull(u32 CulliMode) } } -static void SetTextureRepeatMode(GLuint dir, u32 clamp, u32 mirror) +static void SetTextureRepeatMode(int index, GLuint dir, u32 clamp, u32 mirror) { if (clamp) - glcache.TexParameteri(GL_TEXTURE_2D, dir, GL_CLAMP_TO_EDGE); + glSamplerParameteri(texSamplers[index], dir, GL_CLAMP_TO_EDGE); else - glcache.TexParameteri(GL_TEXTURE_2D, dir, mirror ? GL_MIRRORED_REPEAT : GL_REPEAT); + glSamplerParameteri(texSamplers[index], dir, mirror ? GL_MIRRORED_REPEAT : GL_REPEAT); } template -__forceinline - void SetGPState(const PolyParam* gp,u32 cflip=0) + void SetGPState(const PolyParam* gp, int pass, u32 cflip=0) { if (gp->pcw.Texture && gp->tsp.FilterMode > 1) { @@ -171,28 +184,109 @@ __forceinline else ShaderUniforms.trilinear_alpha = 1.0; - bool color_clamp = gp->tsp.ColorClamp && (pvrrc.fog_clamp_min != 0 || pvrrc.fog_clamp_max != 0xffffffff); + s32 clipping = SetTileClip(gp->tileclip, false); + int shaderId; - CurrentShader = &gl.pogram_table[ - GetProgramID(Type == ListType_Punch_Through ? 1 : 0, - SetTileClip(gp->tileclip, false) + 1, + if (pass == 0) + { + shaderId = GetProgramID(Type == ListType_Punch_Through ? 1 : 0, + clipping + 1, + Type == ListType_Punch_Through ? gp->pcw.Texture : 0, + 1, + gp->tsp.IgnoreTexA, + 0, + 0, + 2, + false, // TODO Can PT have two different textures for area 0 and 1 ?? + 0, + false, + false, + false, + pass); + CurrentShader = gl.getShader(shaderId); + if (CurrentShader->program == -1) { + CurrentShader->cp_AlphaTest = Type == ListType_Punch_Through ? 1 : 0; + CurrentShader->pp_ClipTestMode = clipping; + CurrentShader->pp_Texture = Type == ListType_Punch_Through ? gp->pcw.Texture : 0; + CurrentShader->pp_UseAlpha = 1; + CurrentShader->pp_IgnoreTexA = gp->tsp.IgnoreTexA; + CurrentShader->pp_ShadInstr = 0; + CurrentShader->pp_Offset = 0; + CurrentShader->pp_FogCtrl = 2; + CurrentShader->pp_TwoVolumes = false; + CurrentShader->pp_DepthFunc = 0; + CurrentShader->pp_Gouraud = false; + CurrentShader->pp_BumpMap = false; + CurrentShader->fog_clamping = false; + CurrentShader->pass = pass; + CompilePipelineShader(CurrentShader); + } + } + else + { + // Two volumes mode only supported for OP and PT + bool two_volumes_mode = (gp->tsp1.full != -1) && Type != ListType_Translucent; + bool color_clamp = gp->tsp.ColorClamp && (pvrrc.fog_clamp_min != 0 || pvrrc.fog_clamp_max != 0xffffffff); + + int depth_func = 0; + if (Type == ListType_Translucent) + { + if (SortingEnabled) + depth_func = 6; // GEQUAL + else + depth_func = gp->isp.DepthMode; + } + + shaderId = GetProgramID(Type == ListType_Punch_Through ? 1 : 0, + clipping + 1, gp->pcw.Texture, gp->tsp.UseAlpha, gp->tsp.IgnoreTexA, gp->tsp.ShadInstr, gp->pcw.Offset, gp->tsp.FogCtrl, + two_volumes_mode, + depth_func, gp->pcw.Gouraud, gp->tcw.PixelFmt == PixelBumpMap, - color_clamp)]; - - if (CurrentShader->program == -1) - CompilePipelineShader(CurrentShader); - else - { - glcache.UseProgram(CurrentShader->program); - ShaderUniforms.Set(CurrentShader); + color_clamp, + pass); + CurrentShader = gl.getShader(shaderId); + if (CurrentShader->program == -1) { + CurrentShader->cp_AlphaTest = Type == ListType_Punch_Through ? 1 : 0; + CurrentShader->pp_ClipTestMode = clipping; + CurrentShader->pp_Texture = gp->pcw.Texture; + CurrentShader->pp_UseAlpha = gp->tsp.UseAlpha; + CurrentShader->pp_IgnoreTexA = gp->tsp.IgnoreTexA; + CurrentShader->pp_ShadInstr = gp->tsp.ShadInstr; + CurrentShader->pp_Offset = gp->pcw.Offset; + CurrentShader->pp_FogCtrl = gp->tsp.FogCtrl; + CurrentShader->pp_TwoVolumes = two_volumes_mode; + CurrentShader->pp_DepthFunc = depth_func; + CurrentShader->pp_Gouraud = gp->pcw.Gouraud; + CurrentShader->pp_BumpMap = gp->tcw.PixelFmt == 4; + CurrentShader->fog_clamping = color_clamp; + CurrentShader->pass = pass; + CompilePipelineShader(CurrentShader); + } } + + glcache.UseProgram(CurrentShader->program); + + ShaderUniforms.tsp0 = gp->tsp; + ShaderUniforms.tsp1 = gp->tsp1; + ShaderUniforms.tcw0 = gp->tcw; + ShaderUniforms.tcw1 = gp->tcw1; + + if (Type == ListType_Opaque || Type == ListType_Punch_Through) // TODO Can PT have a >0 and <1 alpha? + { + ShaderUniforms.tsp0.SrcInstr = 1; + ShaderUniforms.tsp0.DstInstr = 0; + ShaderUniforms.tsp1.SrcInstr = 1; + ShaderUniforms.tsp1.DstInstr = 0; + } + ShaderUniforms.Set(CurrentShader); + SetTileClip(gp->tileclip,true); //This bit control which pixels are affected @@ -201,33 +295,42 @@ __forceinline glcache.StencilFunc(GL_ALWAYS,stencil,stencil); - glcache.BindTexture(GL_TEXTURE_2D, gp->texid == -1 ? 0 : gp->texid); - - SetTextureRepeatMode(GL_TEXTURE_WRAP_S, gp->tsp.ClampU, gp->tsp.FlipU); - SetTextureRepeatMode(GL_TEXTURE_WRAP_T, gp->tsp.ClampV, gp->tsp.FlipV); - - //set texture filter mode - if (gp->tsp.FilterMode == 0) + if (CurrentShader->pp_Texture) { - //disable filtering, mipmaps - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - } - else - { - //bilinear filtering - //PowerVR supports also trilinear via two passes, but we ignore that for now - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, (gp->tcw.MipMapped && settings.rend.UseMipmaps) ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR); - glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); - } + for (int i = 0; i < 2; i++) + { + glActiveTexture(GL_TEXTURE0 + i); + GLuint texid = i == 0 ? gp->texid : gp->texid1; - if (Type==ListType_Translucent) - { - glcache.Enable(GL_BLEND); - glcache.BlendFunc(SrcBlendGL[gp->tsp.SrcInstr],DstBlendGL[gp->tsp.DstInstr]); + glBindTexture(GL_TEXTURE_2D, texid == -1 ? 0 : texid); + + if (texid != -1) + { + TSP tsp = i == 0 ? gp->tsp : gp->tsp1; + TCW tcw = i == 0 ? gp->tcw : gp->tcw1; + + glBindSampler(i, texSamplers[i]); + SetTextureRepeatMode(i, GL_TEXTURE_WRAP_S, tsp.ClampU, tsp.FlipU); + SetTextureRepeatMode(i, GL_TEXTURE_WRAP_T, tsp.ClampV, tsp.FlipV); + + //set texture filter mode + if (tsp.FilterMode == 0) + { + //disable filtering, mipmaps + glSamplerParameteri(texSamplers[i], GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glSamplerParameteri(texSamplers[i], GL_TEXTURE_MAG_FILTER, GL_NEAREST); + } + else + { + //bilinear filtering + //PowerVR supports also trilinear via two passes, but we ignore that for now + glSamplerParameteri(texSamplers[i], GL_TEXTURE_MIN_FILTER, (tcw.MipMapped && settings.rend.UseMipmaps) ? GL_LINEAR_MIPMAP_NEAREST : GL_LINEAR); + glSamplerParameteri(texSamplers[i], GL_TEXTURE_MAG_FILTER, GL_LINEAR); + } + } + } + glActiveTexture(GL_TEXTURE0); } - else - glcache.Disable(GL_BLEND); //set cull mode ! //cflip is required when exploding triangles for triangle sorting @@ -237,23 +340,22 @@ __forceinline //set Z mode, only if required if (Type == ListType_Punch_Through || (Type == ListType_Translucent && SortingEnabled)) { - glcache.DepthFunc(GL_GEQUAL); + glcache.DepthFunc(Zfunction[6]); // Greater or equal } else { glcache.DepthFunc(Zfunction[gp->isp.DepthMode]); } -#if TRIG_SORT - if (SortingEnabled) - glcache.DepthMask(GL_FALSE); - else -#endif + // Depth buffer is updated in pass 0 (and also in pass 1 for OP PT) + if (pass < 2) glcache.DepthMask(!gp->isp.ZWriteDis); + else + glcache.DepthMask(GL_FALSE); } template -void DrawList(const List& gply, int first, int count) +void DrawList(const List& gply, int first, int count, int pass) { PolyParam* params = &gply.head()[first]; @@ -262,18 +364,21 @@ void DrawList(const List& gply, int first, int count) return; //we want at least 1 PParam - - //set some 'global' modes for all primitives - - glcache.Enable(GL_STENCIL_TEST); - glcache.StencilFunc(GL_ALWAYS,0,0); - glcache.StencilOp(GL_KEEP,GL_KEEP,GL_REPLACE); - while(count-->0) { if (params->count>2) //this actually happens for some games. No idea why .. { - SetGPState(params); + if (pass != 0) + { + // No need to draw this one + if (Type == ListType_Translucent && params->tsp.SrcInstr == 0 && params->tsp.DstInstr == 1) + { + params++; + continue; + } + } + ShaderUniforms.poly_number = params - gply.head(); + SetGPState(params, pass); glDrawElements(GL_TRIANGLE_STRIP, params->count, GL_UNSIGNED_SHORT, (GLvoid*)(2*params->first)); glCheck(); } @@ -281,542 +386,6 @@ void DrawList(const List& gply, int first, int count) } } -bool operator<(const PolyParam &left, const PolyParam &right) -{ -/* put any condition you want to sort on here */ - return left.zvZcount<2) - { - pp->zvZ=0; - } - else - { - u16* idx=idx_base+pp->first; - - Vertex* vtx=vtx_base+idx[0]; - Vertex* vtx_end=vtx_base + idx[pp->count-1]+1; - - u32 zv=0xFFFFFFFF; - while(vtx!=vtx_end) - { - zv=min(zv,(u32&)vtx->z); - vtx++; - } - - pp->zvZ=(f32&)zv; - } - pp++; - } - - std::stable_sort(pvrrc.global_param_tr.head() + first, pvrrc.global_param_tr.head() + first + count); -} - -Vertex* vtx_sort_base; - - -struct IndexTrig -{ - u16 id[3]; - u16 pid; - f32 z; -}; - - -struct SortTrigDrawParam -{ - PolyParam* ppid; - u16 first; - u16 count; -}; - -float min3(float v0,float v1,float v2) -{ - return min(min(v0,v1),v2); -} - -float max3(float v0,float v1,float v2) -{ - return max(max(v0,v1),v2); -} - - -float minZ(Vertex* v,u16* mod) -{ - return min(min(v[mod[0]].z,v[mod[1]].z),v[mod[2]].z); -} - -bool operator<(const IndexTrig &left, const IndexTrig &right) -{ - return left.zx-b->x; - float yd=a->y-b->y; - - return xd*xd+yd*yd; -} - -//was good idea, but not really working .. -bool Intersect(Vertex* a, Vertex* b) -{ - float a1=area_x2(a); - float a2=area_x2(b); - - float d = distance_apprx(a,b); - - return (a1+a1)>d; -} - -//root for quick-union -u16 rid(vector& v, u16 id) -{ - while(id!=v[id]) id=v[id]; - return id; -} - -struct TrigBounds -{ - float xs,xe; - float ys,ye; - float zs,ze; -}; - -//find 3d bounding box for triangle -TrigBounds bound(Vertex* v) -{ - TrigBounds rv = { min(min(v[0].x,v[1].x),v[2].x), max(max(v[0].x,v[1].x),v[2].x), - min(min(v[0].y,v[1].y),v[2].y), max(max(v[0].y,v[1].y),v[2].y), - min(min(v[0].z,v[1].z),v[2].z), max(max(v[0].z,v[1].z),v[2].z), - }; - - return rv; -} - -//bounding box 2d intersection -bool Intersect(TrigBounds& a, TrigBounds& b) -{ - return ( !(a.xeb.xe) && !(a.yeb.ye) /*&& !(a.zeb.ze)*/ ); -} - - -bool operator<(const IndexTrig &left, const IndexTrig &right) -{ - /* - TrigBounds l=bound(vtx_sort_base+left.id); - TrigBounds r=bound(vtx_sort_base+right.id); - - if (!Intersect(l,r)) - { - return true; - } - else - { - return (l.zs + l.ze) < (r.zs + r.ze); - }*/ - - return minZ(&vtx_sort_base[left.id])pcw.full&PCW_DRAW_MASK)==(pp1->pcw.full&PCW_DRAW_MASK) && pp0->isp.full==pp1->isp.full && pp0->tcw.full==pp1->tcw.full && pp0->tsp.full==pp1->tsp.full && pp0->tileclip==pp1->tileclip; -} - -static vector pidx_sort; - -void fill_id(u16* d, Vertex* v0, Vertex* v1, Vertex* v2, Vertex* vb) -{ - d[0]=v0-vb; - d[1]=v1-vb; - d[2]=v2-vb; -} - -void GenSorted(int first, int count) -{ - u32 tess_gen=0; - - pidx_sort.clear(); - - if (pvrrc.verts.used() == 0 || count <= 1) - return; - - Vertex* vtx_base=pvrrc.verts.head(); - u16* idx_base=pvrrc.idx.head(); - - PolyParam* pp_base = &pvrrc.global_param_tr.head()[first]; - PolyParam* pp = pp_base; - PolyParam* pp_end = pp + count; - - Vertex* vtx_arr=vtx_base+idx_base[pp->first]; - vtx_sort_base=vtx_base; - - static u32 vtx_cnt; - - int vtx_count=idx_base[pp_end[-1].first+pp_end[-1].count-1]-idx_base[pp->first]; - if (vtx_count>vtx_cnt) - vtx_cnt=vtx_count; - -#if PRINT_SORT_STATS - printf("TVTX: %d || %d\n",vtx_cnt,vtx_count); -#endif - - if (vtx_count<=0) - return; - - //make lists of all triangles, with their pid and vid - static vector lst; - - lst.resize(vtx_count*4); - - - int pfsti=0; - - while(pp!=pp_end) - { - u32 ppid=(pp-pp_base); - - if (pp->count>2) - { - u16* idx=idx_base+pp->first; - - Vertex* vtx=vtx_base+idx[0]; - Vertex* vtx_end=vtx_base + idx[pp->count-1]-1; - u32 flip=0; - while(vtx!=vtx_end) - { - Vertex* v0, * v1, * v2, * v3, * v4, * v5; - - if (flip) - { - v0=&vtx[1]; - v1=&vtx[0]; - v2=&vtx[2]; - } - else - { - v0=&vtx[0]; - v1=&vtx[1]; - v2=&vtx[2]; - } -#if 0 - if (settings.pvr.subdivide_transp) - { - u32 tess_x=(max3(v0->x,v1->x,v2->x)-min3(v0->x,v1->x,v2->x))/32; - u32 tess_y=(max3(v0->y,v1->y,v2->y)-min3(v0->y,v1->y,v2->y))/32; - - if (tess_x==1) tess_x=0; - if (tess_y==1) tess_y=0; - - //bool tess=(maxZ(v0,v1,v2)/minZ(v0,v1,v2))>=1.2; - - if (tess_x + tess_y) - { - v3=pvrrc.verts.Append(3); - v4=v3+1; - v5=v4+1; - - //xyz - for (int i=0;i<3;i++) - { - ((float*)&v3->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f; - ((float*)&v4->x)[i]=((float*)&v0->x)[i]*0.5f+((float*)&v1->x)[i]*0.5f; - ((float*)&v5->x)[i]=((float*)&v1->x)[i]*0.5f+((float*)&v2->x)[i]*0.5f; - } - - //*TODO* Make it perspective correct - - //uv - for (int i=0;i<2;i++) - { - ((float*)&v3->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f; - ((float*)&v4->u)[i]=((float*)&v0->u)[i]*0.5f+((float*)&v1->u)[i]*0.5f; - ((float*)&v5->u)[i]=((float*)&v1->u)[i]*0.5f+((float*)&v2->u)[i]*0.5f; - } - - //color - for (int i=0;i<4;i++) - { - v3->col[i]=v0->col[i]/2+v2->col[i]/2; - v4->col[i]=v0->col[i]/2+v1->col[i]/2; - v5->col[i]=v1->col[i]/2+v2->col[i]/2; - } - - fill_id(lst[pfsti].id,v0,v3,v4,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - fill_id(lst[pfsti].id,v2,v3,v5,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - fill_id(lst[pfsti].id,v3,v4,v5,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - fill_id(lst[pfsti].id,v5,v4,v1,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - - tess_gen+=3; - } - else - { - fill_id(lst[pfsti].id,v0,v1,v2,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - } - } - else -#endif - { - fill_id(lst[pfsti].id,v0,v1,v2,vtx_base); - lst[pfsti].pid= ppid ; - lst[pfsti].z = minZ(vtx_base,lst[pfsti].id); - pfsti++; - } - - flip ^= 1; - - vtx++; - } - } - pp++; - } - - u32 aused=pfsti; - - lst.resize(aused); - - //sort them -#if 1 - std::stable_sort(lst.begin(),lst.end()); - - //Merge pids/draw cmds if two different pids are actually equal - if (true) - { - for (u32 k=1;klst[k].pid) - { - //MOVE UP - for (int j=k;j>0 && lst[j].pid!=lst[j-1].pid && !Intersect(lst[j],lst[j-1]);j--) - { - swap(lst[j],lst[j-1]); - } - } - else - { - //move down - for (int j=k+1;j vidx_sort; - - vidx_sort.resize(aused*3); - - int idx=-1; - - for (u32 i=0; icount=stdp.first-last->first; - } - - pidx_sort.push_back(stdp); - idx=pid; - } - } - - SortTrigDrawParam* stdp=&pidx_sort[pidx_sort.size()-1]; - stdp->count=aused*3-stdp->first; - -#if PRINT_SORT_STATS - printf("Reassembled into %d from %d\n",pidx_sort.size(),pp_end-pp_base); -#endif - - //Upload to GPU if needed - if (pidx_sort.size()) - { - //Bind and upload sorted index buffer - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl.vbo.idxs2); glCheck(); - glBufferData(GL_ELEMENT_ARRAY_BUFFER,vidx_sort.size()*2,&vidx_sort[0],GL_STREAM_DRAW); - - if (tess_gen) printf("Generated %.2fK Triangles !\n",tess_gen/1000.0); - } -} - -void DrawSorted(bool multipass) -{ - //if any drawing commands, draw them - if (pidx_sort.size()) - { - u32 count=pidx_sort.size(); - - { - //set some 'global' modes for all primitives - - glcache.Enable(GL_STENCIL_TEST); - glcache.StencilFunc(GL_ALWAYS,0,0); - glcache.StencilOp(GL_KEEP,GL_KEEP,GL_REPLACE); - - for (u32 p=0; p2) //this actually happens for some games. No idea why .. - { - SetGPState(params); - glDrawElements(GL_TRIANGLES, pidx_sort[p].count, GL_UNSIGNED_SHORT, (GLvoid*)(2*pidx_sort[p].first)); glCheck(); - -#if 0 - //Verify restriping -- only valid if no sort - int fs=pidx_sort[p].first; - - for (u32 j=0; j<(params->count-2); j++) - { - for (u32 k=0; k<3; k++) - { - verify(idx_base[params->first+j+k]==vidx_sort[fs++]); - } - } - - verify(fs==(pidx_sort[p].first+pidx_sort[p].count)); -#endif - } - params++; - } - - if (multipass && settings.rend.TranslucentPolygonDepthMask) - { - // Write to the depth buffer now. The next render pass might need it. (Cosmic Smash) - glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); - glcache.Disable(GL_BLEND); - - glcache.StencilMask(0); - - // We use the modifier volumes shader because it's fast. We don't need textures, etc. - glcache.UseProgram(gl.modvol_shader.program); - glUniform1f(gl.modvol_shader.sp_ShaderColor, 1.f); - - glcache.DepthFunc(GL_GEQUAL); - glcache.DepthMask(GL_TRUE); - - for (u32 p = 0; p < count; p++) - { - PolyParam* params = pidx_sort[p].ppid; - if (pidx_sort[p].count > 2 && !params->isp.ZWriteDis) { - // FIXME no clipping in modvol shader - //SetTileClip(gp->tileclip,true); - - SetCull(params->isp.CullMode ^ gcflip); - - glDrawElements(GL_TRIANGLES, pidx_sort[p].count, GL_UNSIGNED_SHORT, (GLvoid*)(2 * pidx_sort[p].first)); - } - } - glcache.StencilMask(0xFF); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); - } - } - // Re-bind the previous index buffer for subsequent render passes - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl.vbo.idxs); - } -} - //All pixels are in area 0 by default. //If inside an 'in' volume, they are in area 1 //if inside an 'out' volume, they are in area 0 @@ -916,9 +485,7 @@ void SetMVS_Mode(ModifierVolumeMode mv_mode, ISP_Modvol ispc) void SetupMainVBO() { -#ifndef GLES glBindVertexArray(gl.vbo.vao); -#endif glBindBuffer(GL_ARRAY_BUFFER, gl.vbo.geometry); glCheck(); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, gl.vbo.idxs); glCheck(); @@ -935,13 +502,20 @@ void SetupMainVBO() glEnableVertexAttribArray(VERTEX_UV_ARRAY); glCheck(); glVertexAttribPointer(VERTEX_UV_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex,u)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_BASE1_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_BASE1_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex, col1)); glCheck(); + + glEnableVertexAttribArray(VERTEX_COL_OFFS1_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_COL_OFFS1_ARRAY, 4, GL_UNSIGNED_BYTE, GL_TRUE, sizeof(Vertex), (void*)offsetof(Vertex, spc1)); glCheck(); + + glEnableVertexAttribArray(VERTEX_UV1_ARRAY); glCheck(); + glVertexAttribPointer(VERTEX_UV1_ARRAY, 2, GL_FLOAT, GL_FALSE, sizeof(Vertex), (void*)offsetof(Vertex, u1)); glCheck(); } void SetupModvolVBO() { -#ifndef GLES glBindVertexArray(gl.vbo.vao); -#endif glBindBuffer(GL_ARRAY_BUFFER, gl.vbo.modvols); glCheck(); @@ -952,6 +526,9 @@ void SetupModvolVBO() glDisableVertexAttribArray(VERTEX_UV_ARRAY); glDisableVertexAttribArray(VERTEX_COL_OFFS_ARRAY); glDisableVertexAttribArray(VERTEX_COL_BASE_ARRAY); + glDisableVertexAttribArray(VERTEX_UV1_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_OFFS1_ARRAY); + glDisableVertexAttribArray(VERTEX_COL_BASE1_ARRAY); } void DrawModVols(int first, int count) { @@ -960,14 +537,10 @@ void DrawModVols(int first, int count) SetupModvolVBO(); - glcache.Enable(GL_BLEND); - glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glcache.UseProgram(gl.modvol_shader.program); - glUniform1f(gl.modvol_shader.sp_ShaderColor, 1 - FPU_SHAD_SCALE.scale_factor / 256.f); glcache.DepthMask(GL_FALSE); - glcache.DepthFunc(GL_GREATER); + glcache.DepthFunc(Zfunction[4]); if(0) { @@ -1013,80 +586,289 @@ void DrawModVols(int first, int count) mod_base = -1; } } - //disable culling - SetCull(0); - //enable color writes - glColorMask(GL_TRUE,GL_TRUE,GL_TRUE,GL_TRUE); - - //black out any stencil with '1' - glcache.Enable(GL_BLEND); - glcache.BlendFunc(GL_SRC_ALPHA,GL_ONE_MINUS_SRC_ALPHA); - - glcache.Enable(GL_STENCIL_TEST); - glcache.StencilFunc(GL_EQUAL,0x81,0x81); //only pixels that are Modvol enabled, and in area 1 - - //clear the stencil result bit - glcache.StencilMask(0x3); //write to lsb - glcache.StencilOp(GL_ZERO,GL_ZERO,GL_ZERO); - - //don't do depth testing - glcache.Disable(GL_DEPTH_TEST); SetupMainVBO(); - glDrawArrays(GL_TRIANGLE_STRIP,0,4); - - //Draw and blend - //glDrawArrays(GL_TRIANGLES,pvrrc.modtrig.used(),2); - } //restore states glcache.Enable(GL_DEPTH_TEST); + glcache.DepthMask(GL_TRUE); } -void DrawStrips() +void renderABuffer(bool sortFragments); +void DrawTranslucentModVols(int first, int count); +void checkOverflowAndReset(); + +GLuint CreateColorFBOTexture() { + GLuint texId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, texId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, screen_width, screen_height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texId, 0); + glCheck(); + + return texId; +} + +void CreateTextures() +{ + stencilTexId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, stencilTexId); glCheck(); + glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_STENCIL_INDEX); // OpenGL >= 4.3 + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + // Using glTexStorage2D instead of glTexImage2D to satisfy requirement GL_TEXTURE_IMMUTABLE_FORMAT=true, needed for glTextureView below + glTexStorage2D(GL_TEXTURE_2D, 1, GL_DEPTH32F_STENCIL8, screen_width, screen_height); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, stencilTexId, 0); glCheck(); + glCheck(); + + opaqueTexId = CreateColorFBOTexture(); + + depthTexId = glcache.GenTexture(); + glTextureView(depthTexId, GL_TEXTURE_2D, stencilTexId, GL_DEPTH32F_STENCIL8, 0, 1, 0, 1); + glCheck(); + glcache.BindTexture(GL_TEXTURE_2D, depthTexId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glCheck(); +} + +void DrawStrips(GLuint output_fbo) +{ + checkOverflowAndReset(); + + if (geom_fbo == 0) + { + glGenFramebuffers(1, &geom_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + + CreateTextures(); + + GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); + + verify(uStatus == GL_FRAMEBUFFER_COMPLETE); + } + else + { + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + if (stencilTexId == 0) + CreateTextures(); + } + if (texSamplers[0] == 0) + glGenSamplers(2, texSamplers); + + glcache.ClearColor(0, 0, 0, 0); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glcache.DepthMask(GL_TRUE); + glStencilMask(0xFF); + glClear(GL_COLOR_BUFFER_BIT | GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glCheck(); + SetupMainVBO(); //Draw the strips ! //We use sampler 0 glActiveTexture(GL_TEXTURE0); + glcache.Disable(GL_BLEND); + glProvokingVertex(GL_LAST_VERTEX_CONVENTION); RenderPass previous_pass = {0}; - for (int render_pass = 0; render_pass < pvrrc.render_passes.used(); render_pass++) { + int render_pass_count = pvrrc.render_passes.used(); + + for (int render_pass = 0; render_pass < render_pass_count; render_pass++) + { const RenderPass& current_pass = pvrrc.render_passes.head()[render_pass]; - //initial state - glcache.Enable(GL_DEPTH_TEST); - glcache.DepthMask(GL_TRUE); - - //Opaque - DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count); - - //Alpha tested - DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count); - - // Modifier volumes - if (settings.rend.ModifierVolumes) - DrawModVols(previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); - - //Alpha blended + // Check if we can skip this pass, in part or completely, in case nothing is drawn (Cosmic Smash) + bool skip_op_pt = true; + bool skip_tr = true; + for (int j = previous_pass.op_count; skip_op_pt && j < current_pass.op_count; j++) { - if (current_pass.autosort) - { -#if TRIG_SORT - GenSorted(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); - DrawSorted(render_pass < pvrrc.render_passes.used() - 1); -#else - SortPParams(previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); - DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); -#endif - } - else - DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count); + if (pvrrc.global_param_op.head()[j].count > 2) + skip_op_pt = false; } + for (int j = previous_pass.pt_count; skip_op_pt && j < current_pass.pt_count; j++) + { + if (pvrrc.global_param_pt.head()[j].count > 2) + skip_op_pt = false; + } + for (int j = previous_pass.tr_count; skip_tr && j < current_pass.tr_count; j++) + { + if (pvrrc.global_param_tr.head()[j].count > 2) + skip_tr = false; + } + if (skip_op_pt && skip_tr) + { + previous_pass = current_pass; + continue; + } + + if (!skip_op_pt) + { + // + // PASS 1: Geometry pass to update depth and stencil + // + if (render_pass > 0) + { + // Make a copy of the depth buffer that will be reused in pass 2 + if (depth_fbo == 0) + glGenFramebuffers(1, &depth_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, depth_fbo); + if (depthSaveTexId == 0) + { + depthSaveTexId = glcache.GenTexture(); + glcache.BindTexture(GL_TEXTURE_2D, depthSaveTexId); + glcache.TexParameteri(GL_TEXTURE_2D, GL_DEPTH_STENCIL_TEXTURE_MODE, GL_DEPTH_COMPONENT); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glcache.TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH32F_STENCIL8, screen_width, screen_height, 0, GL_DEPTH_STENCIL, GL_FLOAT_32_UNSIGNED_INT_24_8_REV, NULL); glCheck(); + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, depthSaveTexId, 0); glCheck(); + } + GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); + verify(uStatus == GL_FRAMEBUFFER_COMPLETE); + + glBindFramebuffer(GL_READ_FRAMEBUFFER, geom_fbo); + glBlitFramebuffer(0, 0, screen_width, screen_height, 0, 0, screen_width, screen_height, GL_DEPTH_BUFFER_BIT, GL_NEAREST); + glCheck(); + + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + } + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glcache.Enable(GL_DEPTH_TEST); + glcache.DepthMask(GL_TRUE); + glcache.Enable(GL_STENCIL_TEST); + glcache.StencilOp(GL_KEEP, GL_KEEP, GL_REPLACE); + + DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 0); + DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 0); + + // Modifier volumes + if (settings.rend.ModifierVolumes) + DrawModVols(previous_pass.mvo_count, current_pass.mvo_count - previous_pass.mvo_count); + + // + // PASS 2: Render OP and PT to fbo + // + if (render_pass == 0) + { + glcache.DepthMask(GL_TRUE); + glClear(GL_DEPTH_BUFFER_BIT); + } + else + { + // Restore the depth buffer from the last render pass + // FIXME This is pretty slow apparently (CS) + glBindFramebuffer(GL_DRAW_FRAMEBUFFER, geom_fbo); + glBindFramebuffer(GL_READ_FRAMEBUFFER, depth_fbo); + glBlitFramebuffer(0, 0, screen_width, screen_height, 0, 0, screen_width, screen_height, GL_DEPTH_BUFFER_BIT, GL_NEAREST); + glCheck(); + glBindFramebuffer(GL_FRAMEBUFFER, geom_fbo); + } + + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + glcache.Disable(GL_STENCIL_TEST); + + // Bind stencil buffer for the fragment shader (shadowing) + glActiveTexture(GL_TEXTURE3); + glBindTexture(GL_TEXTURE_2D, stencilTexId); + glActiveTexture(GL_TEXTURE0); + glCheck(); + + //Opaque + DrawList(pvrrc.global_param_op, previous_pass.op_count, current_pass.op_count - previous_pass.op_count, 1); + + //Alpha tested + DrawList(pvrrc.global_param_pt, previous_pass.pt_count, current_pass.pt_count - previous_pass.pt_count, 1); + + // Unbind stencil + glActiveTexture(GL_TEXTURE3); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + } + + if (!skip_tr) + { + // + // PASS 3: Render TR to a-buffers + // + glColorMask(GL_FALSE, GL_FALSE, GL_FALSE, GL_FALSE); + glcache.Disable(GL_DEPTH_TEST); + + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, depthTexId); + glActiveTexture(GL_TEXTURE0); + + //Alpha blended + if (current_pass.autosort) + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); // 3 because pass 2 is no more + else + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 3); // 3 because pass 2 is no more + glCheck(); + + // Translucent modifier volumes + if (settings.rend.ModifierVolumes) + DrawTranslucentModVols(previous_pass.mvo_tr_count, current_pass.mvo_tr_count - previous_pass.mvo_tr_count); + + if (render_pass < render_pass_count - 1) + { + // + // PASS 3b: Geometry pass with TR to update the depth for the next TA render pass + // + // Unbind depth texture + glActiveTexture(GL_TEXTURE2); + glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); + + glcache.Enable(GL_DEPTH_TEST); + if (current_pass.autosort) + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 0); + else + DrawList(pvrrc.global_param_tr, previous_pass.tr_count, current_pass.tr_count - previous_pass.tr_count, 0); + + // + // PASS 3c: Render a-buffer to temporary texture + // + GLuint texId = CreateColorFBOTexture(); + + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glActiveTexture(GL_TEXTURE0); + glBindSampler(0, 0); + glBindTexture(GL_TEXTURE_2D, opaqueTexId); + + renderABuffer(current_pass.autosort); + SetupMainVBO(); + + glcache.DeleteTextures(1, &opaqueTexId); + opaqueTexId = texId; + + glCheck(); + } + } + + if (!skip_op_pt && render_pass < render_pass_count - 1) + { + // Clear the stencil from this pass + glStencilMask(0xFF); + glClear(GL_STENCIL_BUFFER_BIT); + } + previous_pass = current_pass; } + + // + // PASS 4: Render a-buffers to screen + // + glBindFramebuffer(GL_FRAMEBUFFER, output_fbo); glCheck(); + glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); + + glActiveTexture(GL_TEXTURE0); + glBindSampler(0, 0); + glBindTexture(GL_TEXTURE_2D, opaqueTexId); + renderABuffer(previous_pass.autosort); + SetupMainVBO(); } void DrawFramebuffer(float w, float h) @@ -1107,14 +889,41 @@ void DrawFramebuffer(float w, float h) ShaderUniforms.trilinear_alpha = 1.0; - PipelineShader *shader = &gl.pogram_table[GetProgramID(0, 1, 1, 0, 1, 0, 0, 2, false, false, false)]; + int shaderId = GetProgramID(0, + 1, + 1, + 0, + 1, + 0, + 0, + 2, + false, + 0, + false, + false, + false, + 1); + PipelineShader *shader = gl.getShader(shaderId); if (shader->program == -1) - CompilePipelineShader(shader); - else { - glcache.UseProgram(shader->program); - ShaderUniforms.Set(shader); + shader->cp_AlphaTest = 0; + shader->pp_ClipTestMode = 0; + shader->pp_Texture = 1; + shader->pp_UseAlpha = 0; + shader->pp_IgnoreTexA = 1; + shader->pp_ShadInstr = 0; + shader->pp_Offset = 0; + shader->pp_FogCtrl = 2; + shader->pp_TwoVolumes = false; + shader->pp_DepthFunc = 0; + shader->pp_Gouraud = false; + shader->pp_BumpMap = false; + shader->fog_clamping = false; + shader->pass = 1; + CompilePipelineShader(shader); } + glcache.UseProgram(shader->program); + ShaderUniforms.Set(shader); glActiveTexture(GL_TEXTURE0); glcache.BindTexture(GL_TEXTURE_2D, fbTextureId); diff --git a/core/rend/gles/gles.cpp b/core/rend/gles/gles.cpp index e280c0867..de8958bff 100644 --- a/core/rend/gles/gles.cpp +++ b/core/rend/gles/gles.cpp @@ -15,19 +15,10 @@ int fbdev = -1; #endif -#ifndef GLES #if HOST_OS != OS_DARWIN #include #pragma comment(lib,"Opengl32.lib") #endif -#else -#ifndef GL_RED -#define GL_RED 0x1903 -#endif -#ifndef GL_MAJOR_VERSION -#define GL_MAJOR_VERSION 0x821B -#endif -#endif /* GL|ES 2 @@ -69,125 +60,61 @@ float fb_scale_x,fb_scale_y; float scale_x, scale_y; //Fragment and vertex shaders code + const char* VertexShaderSource = "\ -%s \n\ -#define TARGET_GL %s \n\ +#version 140 \n\ #define pp_Gouraud %d \n\ \n\ -#define GLES2 0 \n\ -#define GLES3 1 \n\ -#define GL 2 \n\ - \n\ -#if TARGET_GL == GLES2 \n\ -#define in attribute \n\ -#define out varying \n\ -#endif \n\ - \n\ - \n\ -#if TARGET_GL != GLES2 \n\ #if pp_Gouraud == 0 \n\ #define INTERPOLATION flat \n\ #else \n\ #define INTERPOLATION smooth \n\ -#endif \n\ -#else \n\ -#define INTERPOLATION \n\ #endif \n\ \n\ /* Vertex constants*/ \n\ uniform highp vec4 scale; \n\ -uniform highp vec4 depth_scale; \n\ uniform highp float extra_depth_scale; \n\ /* Vertex input */ \n\ in highp vec4 in_pos; \n\ in lowp vec4 in_base; \n\ in lowp vec4 in_offs; \n\ in mediump vec2 in_uv; \n\ +in lowp vec4 in_base1; \n\ +in lowp vec4 in_offs1; \n\ +in mediump vec2 in_uv1; \n\ /* output */ \n\ INTERPOLATION out lowp vec4 vtx_base; \n\ INTERPOLATION out lowp vec4 vtx_offs; \n\ - out mediump vec2 vtx_uv; \n\ + out mediump vec2 vtx_uv; \n\ +INTERPOLATION out lowp vec4 vtx_base1; \n\ +INTERPOLATION out lowp vec4 vtx_offs1; \n\ + out mediump vec2 vtx_uv1; \n\ void main() \n\ { \n\ vtx_base=in_base; \n\ vtx_offs=in_offs; \n\ vtx_uv=in_uv; \n\ + vtx_base1 = in_base1; \n\ + vtx_offs1 = in_offs1; \n\ + vtx_uv1 = in_uv1; \n\ vec4 vpos=in_pos; \n\ -#if TARGET_GL == GL \n\ - if (isinf(vpos.z)) \n\ - vpos.w = 1.18e-38; \n\ + if (isinf(vpos.z)) \n\ + vpos.w = 1.18e-38; \n\ else \n\ -#endif \n\ vpos.w = extra_depth_scale / vpos.z; \n\ -#if TARGET_GL != GLES2 \n\ if (vpos.w < 0.0) { \n\ gl_Position = vec4(0.0, 0.0, 0.0, vpos.w); \n\ return; \n\ } \n\ vpos.z = vpos.w; \n\ -#else \n\ - vpos.z=depth_scale.x+depth_scale.y*vpos.w; \n\ -#endif \n\ vpos.xy=vpos.xy*scale.xy-scale.zw; \n\ vpos.xy*=vpos.w; \n\ gl_Position = vpos; \n\ }"; -/* - -cp_AlphaTest 0 1 2 2 -pp_ClipTestMode -1 0 1 3 6 -pp_UseAlpha 0 1 2 12 -pp_Texture 1 - pp_IgnoreTexA 0 1 2 2 - pp_ShadInstr 0 1 2 3 4 8 - pp_Offset 0 1 2 16 - pp_FogCtrl 0 1 2 3 4 64 -pp_Texture 0 - pp_FogCtrl 0 2 3 4 4 - -pp_Texture: off -> 12*4=48 shaders -pp_Texture: on -> 12*64=768 shaders -Total: 816 shaders - -highp float fdecp(highp float flt,out highp float e) \n\ -{ \n\ - highp float lg2=log2(flt); //ie , 2.5 \n\ - highp float frc=fract(lg2); //ie , 0.5 \n\ - e=lg2-frc; //ie , 2.5-0.5=2 (exp) \n\ - return pow(2.0,frc); //2^0.5 (manitsa) \n\ -} \n\ -lowp float fog_mode2(highp float invW) \n\ -{ \n\ - highp float foginvW=invW; \n\ - foginvW=clamp(foginvW,1.0,255.0); \n\ - \n\ - highp float fogexp; //0 ... 7 \n\ - highp float fogman=fdecp(foginvW, fogexp); //[1,2) mantissa bits. that is 1.m \n\ - \n\ - highp float fogman_hi=fogman*16.0-16.0; //[16,32) -16 -> [0,16) \n\ - highp float fogman_idx=floor(fogman_hi); //[0,15] \n\ - highp float fogman_blend=fract(fogman_hi); //[0,1) -- can also be fogman_idx-fogman_idx ! \n\ - highp float fog_idx_fr=fogexp*16.0+fogman_idx; //[0,127] \n\ - \n\ - highp float fog_idx_pixel_fr=fog_idx_fr+0.5; \n\ - highp float fog_idx_pixel_n=fog_idx_pixel_fr/128.0;//normalise to [0.5/128,127.5/128) coordinates \n\ - \n\ - //fog is 128x1 texure \n\ - lowp vec2 fog_coefs=texture2D(fog_table,vec2(fog_idx_pixel_n)).rg; \n\ - \n\ - lowp float fog_coef=mix(fog_coefs.r,fog_coefs.g,fogman_blend); \n\ - \n\ - return fog_coef; \n\ -} \n\ -*/ - -const char* PixelPipelineShader = +const char* PixelPipelineShader = SHADER_HEADER "\ -%s \n\ -#define TARGET_GL %s \n\ -\n\ #define cp_AlphaTest %d \n\ #define pp_ClipTestMode %d \n\ #define pp_UseAlpha %d \n\ @@ -196,64 +123,70 @@ const char* PixelPipelineShader = #define pp_ShadInstr %d \n\ #define pp_Offset %d \n\ #define pp_FogCtrl %d \n\ +#define pp_TwoVolumes %d \n\ +#define pp_DepthFunc %d \n\ #define pp_Gouraud %d \n\ #define pp_BumpMap %d \n\ #define FogClamping %d \n\ +#define PASS %d \n\ #define PI 3.1415926 \n\ -\n\ -#define GLES2 0 \n\ -#define GLES3 1 \n\ -#define GL 2 \n\ \n\ -#if TARGET_GL == GLES3 \n\ -out highp vec4 FragColor; \n\ -#define gl_FragColor FragColor \n\ -#define FOG_CHANNEL a \n\ -#elif TARGET_GL == GL \n\ -out highp vec4 FragColor; \n\ -#define gl_FragColor FragColor \n\ -#define FOG_CHANNEL r \n\ -#else \n\ -#define in varying \n\ -#define texture texture2D \n\ -#define FOG_CHANNEL a \n\ +#if PASS <= 1 \n\ +out vec4 FragColor; \n\ #endif \n\ \n\ +#if pp_TwoVolumes == 1 \n\ +#define IF(x) if (x) \n\ +#else \n\ +#define IF(x) \n\ +#endif \n\ \n\ -#if TARGET_GL != GLES2 \n\ #if pp_Gouraud == 0 \n\ #define INTERPOLATION flat \n\ #else \n\ #define INTERPOLATION smooth \n\ -#endif \n\ -#else \n\ -#define INTERPOLATION \n\ #endif \n\ \n\ /* Shader program params*/ \n\ -/* gles has no alpha test stage, so its emulated on the shader */ \n\ uniform lowp float cp_AlphaTestValue; \n\ uniform lowp vec4 pp_ClipTest; \n\ uniform lowp vec3 sp_FOG_COL_RAM,sp_FOG_COL_VERT; \n\ uniform highp float sp_FOG_DENSITY; \n\ -uniform sampler2D tex,fog_table; \n\ +uniform highp float shade_scale_factor; \n\ +uniform sampler2D tex0, tex1; \n\ +layout(binding = 5) uniform sampler2D fog_table; \n\ +uniform int pp_Number; \n\ +uniform usampler2D shadow_stencil; \n\ +uniform sampler2D DepthTex; \n\ uniform lowp float trilinear_alpha; \n\ uniform lowp vec4 fog_clamp_min; \n\ uniform lowp vec4 fog_clamp_max; \n\ + \n\ +uniform ivec2 blend_mode[2]; \n\ +#if pp_TwoVolumes == 1 \n\ +uniform bool use_alpha[2]; \n\ +uniform bool ignore_tex_alpha[2]; \n\ +uniform int shading_instr[2]; \n\ +uniform int fog_control[2]; \n\ +#endif \n\ + \n\ uniform highp float extra_depth_scale; \n\ /* Vertex input*/ \n\ INTERPOLATION in lowp vec4 vtx_base; \n\ INTERPOLATION in lowp vec4 vtx_offs; \n\ in mediump vec2 vtx_uv; \n\ +INTERPOLATION in lowp vec4 vtx_base1; \n\ +INTERPOLATION in lowp vec4 vtx_offs1; \n\ + in mediump vec2 vtx_uv1; \n\ \n\ lowp float fog_mode2(highp float w) \n\ { \n\ highp float z = clamp(w * extra_depth_scale * sp_FOG_DENSITY, 1.0, 255.9999); \n\ highp float exp = floor(log2(z)); \n\ highp float m = z * 16.0 / pow(2.0, exp) - 16.0; \n\ - lowp float idx = floor(m) + exp * 16.0 + 0.5; \n\ - highp vec4 fog_coef = texture(fog_table, vec2(idx / 128.0, 0.75 - (m - floor(m)) / 2.0)); \n\ - return fog_coef.FOG_CHANNEL; \n\ + float idx = floor(m) + exp * 16.0 + 0.5; \n\ + vec4 fog_coef = texture(fog_table, vec2(idx / 128.0, 0.75 - (m - floor(m)) / 2.0)); \n\ + return fog_coef.r; \n\ } \n\ \n\ highp vec4 fog_clamp(highp vec4 col) \n\ @@ -267,6 +200,34 @@ highp vec4 fog_clamp(highp vec4 col) \n\ \n\ void main() \n\ { \n\ + setFragDepth(); \n\ + \n\ + #if PASS == 3 \n\ + // Manual depth testing \n\ + highp float frontDepth = texture(DepthTex, gl_FragCoord.xy / textureSize(DepthTex, 0)).r; \n\ + #if pp_DepthFunc == 0 // Never \n\ + discard; \n\ + #elif pp_DepthFunc == 1 // Greater \n\ + if (gl_FragDepth <= frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 2 // Equal \n\ + if (gl_FragDepth != frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 3 // Greater or equal \n\ + if (gl_FragDepth < frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 4 // Less \n\ + if (gl_FragDepth >= frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 5 // Not equal \n\ + if (gl_FragDepth == frontDepth) \n\ + discard; \n\ + #elif pp_DepthFunc == 6 // Less or equal \n\ + if (gl_FragDepth > frontDepth) \n\ + discard; \n\ + #endif \n\ + #endif \n\ + \n\ // Clip outside the box \n\ #if pp_ClipTestMode==1 \n\ if (gl_FragCoord.x < pp_ClipTest.x || gl_FragCoord.x > pp_ClipTest.z \n\ @@ -280,48 +241,84 @@ void main() \n\ discard; \n\ #endif \n\ \n\ - lowp vec4 color=vtx_base; \n\ - #if pp_UseAlpha==0 \n\ - color.a=1.0; \n\ + highp vec4 color = vtx_base; \n\ + lowp vec4 offset = vtx_offs; \n\ + mediump vec2 uv = vtx_uv; \n\ + bool area1 = false; \n\ + ivec2 cur_blend_mode = blend_mode[0]; \n\ + \n\ + #if pp_TwoVolumes == 1 \n\ + bool cur_use_alpha = use_alpha[0]; \n\ + bool cur_ignore_tex_alpha = ignore_tex_alpha[0]; \n\ + int cur_shading_instr = shading_instr[0]; \n\ + int cur_fog_control = fog_control[0]; \n\ + #if PASS == 1 \n\ + uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / textureSize(shadow_stencil, 0)); \n\ + if (stencil.r == 0x81u) { \n\ + color = vtx_base1; \n\ + offset = vtx_offs1; \n\ + uv = vtx_uv1; \n\ + area1 = true; \n\ + cur_blend_mode = blend_mode[1]; \n\ + cur_use_alpha = use_alpha[1]; \n\ + cur_ignore_tex_alpha = ignore_tex_alpha[1]; \n\ + cur_shading_instr = shading_instr[1]; \n\ + cur_fog_control = fog_control[1]; \n\ + } \n\ + #endif\n\ #endif\n\ - #if pp_FogCtrl==3 \n\ - color=vec4(sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ + \n\ + #if pp_UseAlpha==0 || pp_TwoVolumes == 1 \n\ + IF(!cur_use_alpha) \n\ + color.a=1.0; \n\ + #endif\n\ + #if pp_FogCtrl==3 || pp_TwoVolumes == 1 // LUT Mode 2 \n\ + IF(cur_fog_control == 3) \n\ + color=vec4(sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ #endif\n\ #if pp_Texture==1 \n\ { \n\ - lowp vec4 texcol=texture(tex, vtx_uv); \n\ - \n\ + highp vec4 texcol; \n\ + if (area1) \n\ + texcol = texture(tex1, uv); \n\ + else \n\ + texcol = texture(tex0, uv); \n\ #if pp_BumpMap == 1 \n\ highp float s = PI / 2.0 * (texcol.a * 15.0 * 16.0 + texcol.r * 15.0) / 255.0; \n\ highp float r = 2.0 * PI * (texcol.g * 15.0 * 16.0 + texcol.b * 15.0) / 255.0; \n\ texcol.a = clamp(vtx_offs.a + vtx_offs.r * sin(s) + vtx_offs.g * cos(s) * cos(r - 2.0 * PI * vtx_offs.b), 0.0, 1.0); \n\ texcol.rgb = vec3(1.0, 1.0, 1.0); \n\ #else\n\ - #if pp_IgnoreTexA==1 \n\ - texcol.a=1.0; \n\ + #if pp_IgnoreTexA==1 || pp_TwoVolumes == 1 \n\ + IF(cur_ignore_tex_alpha) \n\ + texcol.a=1.0; \n\ #endif\n\ \n\ #if cp_AlphaTest == 1 \n\ if (cp_AlphaTestValue>texcol.a) discard;\n\ #endif \n\ - #endif \n\ - #if pp_ShadInstr==0 \n\ + #endif\n\ + #if pp_ShadInstr==0 || pp_TwoVolumes == 1 // DECAL \n\ + IF(cur_shading_instr == 0) \n\ { \n\ color=texcol; \n\ } \n\ #endif\n\ - #if pp_ShadInstr==1 \n\ + #if pp_ShadInstr==1 || pp_TwoVolumes == 1 // MODULATE \n\ + IF(cur_shading_instr == 1) \n\ { \n\ color.rgb*=texcol.rgb; \n\ color.a=texcol.a; \n\ } \n\ #endif\n\ - #if pp_ShadInstr==2 \n\ + #if pp_ShadInstr==2 || pp_TwoVolumes == 1 // DECAL ALPHA \n\ + IF(cur_shading_instr == 2) \n\ { \n\ color.rgb=mix(color.rgb,texcol.rgb,texcol.a); \n\ } \n\ #endif\n\ - #if pp_ShadInstr==3 \n\ + #if pp_ShadInstr==3 || pp_TwoVolumes == 1 // MODULATE ALPHA \n\ + IF(cur_shading_instr == 3) \n\ { \n\ color*=texcol; \n\ } \n\ @@ -329,23 +326,30 @@ void main() \n\ \n\ #if pp_Offset==1 && pp_BumpMap == 0 \n\ { \n\ - color.rgb+=vtx_offs.rgb; \n\ + color.rgb += offset.rgb; \n\ } \n\ #endif\n\ } \n\ #endif\n\ + #if PASS == 1 && pp_TwoVolumes == 0 \n\ + uvec4 stencil = texture(shadow_stencil, gl_FragCoord.xy / textureSize(shadow_stencil, 0)); \n\ + if (stencil.r == 0x81u) \n\ + color.rgb *= shade_scale_factor; \n\ + #endif \n\ \n\ color = fog_clamp(color); \n\ \n\ - #if pp_FogCtrl == 0 \n\ - { \n\ - color.rgb=mix(color.rgb,sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ - } \n\ + #if pp_FogCtrl==0 || pp_TwoVolumes == 1 // LUT \n\ + IF(cur_fog_control == 0) \n\ + { \n\ + color.rgb=mix(color.rgb,sp_FOG_COL_RAM.rgb,fog_mode2(gl_FragCoord.w)); \n\ + } \n\ #endif\n\ - #if pp_FogCtrl == 1 && pp_Offset==1 && pp_BumpMap == 0 \n\ - { \n\ - color.rgb=mix(color.rgb,sp_FOG_COL_VERT.rgb,vtx_offs.a); \n\ - } \n\ + #if pp_Offset==1 && pp_BumpMap == 0 && (pp_FogCtrl == 1 || pp_TwoVolumes == 1) // Per vertex \n\ + IF(cur_fog_control == 1) \n\ + { \n\ + color.rgb=mix(color.rgb, sp_FOG_COL_VERT.rgb, offset.a); \n\ + } \n\ #endif\n\ \n\ color *= trilinear_alpha; \n\ @@ -353,71 +357,104 @@ void main() \n\ #if cp_AlphaTest == 1 \n\ color.a=1.0; \n\ #endif \n\ - //color.rgb=vec3(gl_FragCoord.w * sp_FOG_DENSITY / 128.0);\n\ -#if TARGET_GL != GLES2 \n\ - highp float w = gl_FragCoord.w * 100000.0; \n\ - gl_FragDepth = log2(1.0 + w) / 34.0; \n\ -#endif \n\ - gl_FragColor =color; \n\ + \n\ + //color.rgb=vec3(gl_FragCoord.w * sp_FOG_DENSITY / 128.0); \n\ + \n\ + #if PASS == 1 \n\ + FragColor = color; \n\ + #elif PASS > 1 \n\ + // Discard as many pixels as possible \n\ + switch (cur_blend_mode.y) // DST \n\ + { \n\ + case ONE: \n\ + switch (cur_blend_mode.x) // SRC \n\ + { \n\ + case ZERO: \n\ + discard; \n\ + case ONE: \n\ + case OTHER_COLOR: \n\ + case INVERSE_OTHER_COLOR: \n\ + if (color == vec4(0.0)) \n\ + discard; \n\ + break; \n\ + case SRC_ALPHA: \n\ + if (color.a == 0.0 || color.rgb == vec3(0.0)) \n\ + discard; \n\ + break; \n\ + case INVERSE_SRC_ALPHA: \n\ + if (color.a == 1.0 || color.rgb == vec3(0.0)) \n\ + discard; \n\ + break; \n\ + } \n\ + break; \n\ + case OTHER_COLOR: \n\ + if (cur_blend_mode.x == ZERO && color == vec4(1.0)) \n\ + discard; \n\ + break; \n\ + case INVERSE_OTHER_COLOR: \n\ + if (cur_blend_mode.x <= SRC_ALPHA && color == vec4(0.0)) \n\ + discard; \n\ + break; \n\ + case SRC_ALPHA: \n\ + if ((cur_blend_mode.x == ZERO || cur_blend_mode.x == INVERSE_SRC_ALPHA) && color.a == 1.0) \n\ + discard; \n\ + break; \n\ + case INVERSE_SRC_ALPHA: \n\ + switch (cur_blend_mode.x) // SRC \n\ + { \n\ + case ZERO: \n\ + case SRC_ALPHA: \n\ + if (color.a == 0.0) \n\ + discard; \n\ + break; \n\ + case ONE: \n\ + case OTHER_COLOR: \n\ + case INVERSE_OTHER_COLOR: \n\ + if (color == vec4(0.0)) \n\ + discard; \n\ + break; \n\ + } \n\ + break; \n\ + } \n\ + \n\ + ivec2 coords = ivec2(gl_FragCoord.xy); \n\ + uint idx = getNextPixelIndex(); \n\ + \n\ + Pixel pixel; \n\ + pixel.color = color; \n\ + pixel.depth = gl_FragDepth; \n\ + pixel.seq_num = uint(pp_Number); \n\ + pixel.next = imageAtomicExchange(abufferPointerImg, coords, idx); \n\ + pixels[idx] = pixel; \n\ + \n\ + discard; \n\ + \n\ + #endif \n\ }"; -const char* ModifierVolumeShader = -"\ -%s \n\ -#define TARGET_GL %s \n\ - \n\ -#define GLES2 0 \n\ -#define GLES3 1 \n\ -#define GL 2 \n\ - \n\ -#if TARGET_GL != GLES2 \n\ -out highp vec4 FragColor; \n\ -#define gl_FragColor FragColor \n\ -#endif \n\ - \n\ -uniform lowp float sp_ShaderColor; \n\ +const char* ModifierVolumeShader = SHADER_HEADER +" \ /* Vertex input*/ \n\ void main() \n\ { \n\ -#if TARGET_GL != GLES2 \n\ - highp float w = gl_FragCoord.w * 100000.0; \n\ - gl_FragDepth = log2(1.0 + w) / 34.0; \n\ -#endif \n\ - gl_FragColor=vec4(0.0, 0.0, 0.0, sp_ShaderColor); \n\ + setFragDepth(); \n\ + \n\ }"; const char* OSD_Shader = -"\ -%s \n\ -#define TARGET_GL %s \n\ +" \ +#version 140 \n\ +out vec4 FragColor; \n\ \n\ -#define GLES2 0 \n\ -#define GLES3 1 \n\ -#define GL 2 \n\ - \n\ -#if TARGET_GL != GLES2 \n\ -out highp vec4 FragColor; \n\ -#define gl_FragColor FragColor \n\ -#else \n\ -#define in varying \n\ -#define texture texture2D \n\ -#endif \n\ - \n\ -#if TARGET_GL != GLES2 \n\ -#define INTERPOLATION smooth \n\ -#else \n\ -#define INTERPOLATION \n\ -#endif \n\ - \n\ -INTERPOLATION in lowp vec4 vtx_base; \n\ -in mediump vec2 vtx_uv; \n\ +smooth in lowp vec4 vtx_base; \n\ + in mediump vec2 vtx_uv; \n\ /* Vertex input*/ \n\ uniform sampler2D tex; \n\ void main() \n\ { \n\ - mediump vec2 uv=vtx_uv; \n\ - uv.y=1.0-uv.y; \n\ - gl_FragColor = vtx_base*texture(tex,uv.st); \n\ + mediump vec2 uv = vtx_uv; \n\ + uv.y = 1.0 - uv.y; \n\ + FragColor = vtx_base * texture(tex, uv.st); \n\n\ }"; GLCache glcache; @@ -428,346 +465,192 @@ int screen_height; GLuint fogTextureId; #if (HOST_OS != OS_DARWIN) && !defined(TARGET_NACL32) -#if defined(GLES) && !defined(USE_SDL) - // Create a basic GLES context - bool gl_init(void* wind, void* disp) + +#if HOST_OS == OS_WINDOWS + #define WGL_DRAW_TO_WINDOW_ARB 0x2001 + #define WGL_ACCELERATION_ARB 0x2003 + #define WGL_SWAP_METHOD_ARB 0x2007 + #define WGL_SUPPORT_OPENGL_ARB 0x2010 + #define WGL_DOUBLE_BUFFER_ARB 0x2011 + #define WGL_PIXEL_TYPE_ARB 0x2013 + #define WGL_COLOR_BITS_ARB 0x2014 + #define WGL_DEPTH_BITS_ARB 0x2022 + #define WGL_STENCIL_BITS_ARB 0x2023 + #define WGL_FULL_ACCELERATION_ARB 0x2027 + #define WGL_SWAP_EXCHANGE_ARB 0x2028 + #define WGL_TYPE_RGBA_ARB 0x202B + #define WGL_CONTEXT_MAJOR_VERSION_ARB 0x2091 + #define WGL_CONTEXT_MINOR_VERSION_ARB 0x2092 + #define WGL_CONTEXT_FLAGS_ARB 0x2094 + + #define WGL_CONTEXT_PROFILE_MASK_ARB 0x9126 + #define WGL_CONTEXT_MAJOR_VERSION_ARB 0x2091 + #define WGL_CONTEXT_MINOR_VERSION_ARB 0x2092 + #define WGL_CONTEXT_LAYER_PLANE_ARB 0x2093 + #define WGL_CONTEXT_FLAGS_ARB 0x2094 + #define WGL_CONTEXT_DEBUG_BIT_ARB 0x0001 + #define WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x0002 + #define ERROR_INVALID_VERSION_ARB 0x2095 + #define WGL_CONTEXT_CORE_PROFILE_BIT_ARB 0x00000001 + + typedef BOOL (WINAPI * PFNWGLCHOOSEPIXELFORMATARBPROC) (HDC hdc, const int *piAttribIList, const FLOAT *pfAttribFList, UINT nMaxFormats, + int *piFormats, UINT *nNumFormats); + typedef HGLRC (WINAPI * PFNWGLCREATECONTEXTATTRIBSARBPROC) (HDC hDC, HGLRC hShareContext, const int *attribList); + typedef BOOL (WINAPI * PFNWGLSWAPINTERVALEXTPROC) (int interval); + + PFNWGLCHOOSEPIXELFORMATARBPROC wglChoosePixelFormatARB; + PFNWGLCREATECONTEXTATTRIBSARBPROC wglCreateContextAttribsARB; + PFNWGLSWAPINTERVALEXTPROC wglSwapIntervalEXT; + + + HDC ourWindowHandleToDeviceContext; + bool gl_init(void* hwnd, void* hdc) { - #if !defined(_ANDROID) - gl.setup.native_wind=(EGLNativeWindowType)wind; - gl.setup.native_disp=(EGLNativeDisplayType)disp; - - //try to get a display - gl.setup.display = eglGetDisplay(gl.setup.native_disp); - - //if failed, get the default display (this will not happen in win32) - if(gl.setup.display == EGL_NO_DISPLAY) - gl.setup.display = eglGetDisplay((EGLNativeDisplayType) EGL_DEFAULT_DISPLAY); - - - // Initialise EGL - EGLint maj, min; - if (!eglInitialize(gl.setup.display, &maj, &min)) + PIXELFORMATDESCRIPTOR pfd = { - printf("EGL Error: eglInitialize failed\n"); - return false; + sizeof(PIXELFORMATDESCRIPTOR), + 1, + PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER, //Flags + PFD_TYPE_RGBA, //The kind of framebuffer. RGBA or palette. + 32, //Colordepth of the framebuffer. + 0, 0, 0, 0, 0, 0, + 0, + 0, + 0, + 0, 0, 0, 0, + 24, //Number of bits for the depthbuffer + 8, //Number of bits for the stencilbuffer + 0, //Number of Aux buffers in the framebuffer. + PFD_MAIN_PLANE, + 0, + 0, 0, 0 + }; + + /*HDC*/ ourWindowHandleToDeviceContext = (HDC)hdc;//GetDC((HWND)hwnd); + + int letWindowsChooseThisPixelFormat; + letWindowsChooseThisPixelFormat = ChoosePixelFormat(ourWindowHandleToDeviceContext, &pfd); + SetPixelFormat(ourWindowHandleToDeviceContext,letWindowsChooseThisPixelFormat, &pfd); + + HGLRC ourOpenGLRenderingContext = wglCreateContext(ourWindowHandleToDeviceContext); + wglMakeCurrent (ourWindowHandleToDeviceContext, ourOpenGLRenderingContext); + + bool rv = true; + + if (rv) { + + wglChoosePixelFormatARB = (PFNWGLCHOOSEPIXELFORMATARBPROC)wglGetProcAddress("wglChoosePixelFormatARB"); + if(!wglChoosePixelFormatARB) + { + return false; + } + + wglCreateContextAttribsARB = (PFNWGLCREATECONTEXTATTRIBSARBPROC)wglGetProcAddress("wglCreateContextAttribsARB"); + if(!wglCreateContextAttribsARB) + { + return false; + } + + wglSwapIntervalEXT = (PFNWGLSWAPINTERVALEXTPROC)wglGetProcAddress("wglSwapIntervalEXT"); + if(!wglSwapIntervalEXT) + { + return false; + } + + int attribs[] = + { + WGL_CONTEXT_MAJOR_VERSION_ARB, 3, + WGL_CONTEXT_MINOR_VERSION_ARB, 1, + WGL_CONTEXT_FLAGS_ARB, WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB, + WGL_CONTEXT_PROFILE_MASK_ARB, WGL_CONTEXT_CORE_PROFILE_BIT_ARB, + 0 + }; + + HGLRC m_hrc = wglCreateContextAttribsARB(ourWindowHandleToDeviceContext,0, attribs); + + if (m_hrc) + wglMakeCurrent(ourWindowHandleToDeviceContext,m_hrc); + else + rv = false; + + wglDeleteContext(ourOpenGLRenderingContext); } - printf("Info: EGL version %d.%d\n",maj,min); - - - - EGLint pi32ConfigAttribs[] = { EGL_SURFACE_TYPE, EGL_WINDOW_BIT, EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT , EGL_DEPTH_SIZE, 24, EGL_STENCIL_SIZE, 8, EGL_NONE }; - EGLint pi32ContextAttribs[] = { EGL_CONTEXT_CLIENT_VERSION, 2 , EGL_NONE }; - - int num_config; - - EGLConfig config; - if (!eglChooseConfig(gl.setup.display, pi32ConfigAttribs, &config, 1, &num_config) || (num_config != 1)) - { - printf("EGL Error: eglChooseConfig failed\n"); - return false; + if (rv) { + rv = gl3wInit() != -1 && gl3wIsSupported(3, 1); } - gl.setup.surface = eglCreateWindowSurface(gl.setup.display, config, (EGLNativeWindowType)wind, NULL); + RECT r; + GetClientRect((HWND)hwnd, &r); + screen_width = r.right - r.left; + screen_height = r.bottom - r.top; - if (eglCheck()) - return false; - - eglBindAPI(EGL_OPENGL_ES_API); - if (eglCheck()) - return false; - - gl.setup.context = eglCreateContext(gl.setup.display, config, NULL, pi32ContextAttribs); - - if (eglCheck()) - return false; - - #endif - - eglMakeCurrent(gl.setup.display, gl.setup.surface, gl.setup.surface, gl.setup.context); - - if (eglCheck()) - return false; - - EGLint w,h; - eglQuerySurface(gl.setup.display, gl.setup.surface, EGL_WIDTH, &w); - eglQuerySurface(gl.setup.display, gl.setup.surface, EGL_HEIGHT, &h); - - screen_width=w; - screen_height=h; - - // Required when doing partial redraws - if (!eglSurfaceAttrib(gl.setup.display, gl.setup.surface, EGL_SWAP_BEHAVIOR, EGL_BUFFER_PRESERVED)) - printf("eglSurfaceAttrib(EGL_SWAP_BEHAVIOR, EGL_BUFFER_PRESERVED) failed\n"); - - printf("EGL config: %p, %08X, %08X %dx%d\n",gl.setup.context,gl.setup.display,gl.setup.surface,w,h); - return true; + return rv; } - - void egl_stealcntx() - { - gl.setup.context=eglGetCurrentContext(); - gl.setup.display=eglGetCurrentDisplay(); - gl.setup.surface=eglGetCurrentSurface(EGL_DRAW); - } - - //swap buffers + #include void gl_swap() { - #ifdef TARGET_PANDORA0 - if (fbdev >= 0) - { - int arg = 0; - ioctl(fbdev,FBIO_WAITFORVSYNC,&arg); - } - #endif - eglSwapBuffers(gl.setup.display, gl.setup.surface); - } - - //destroy the gles context and free resources - void gl_term() - { - #if HOST_OS==OS_WINDOWS - ReleaseDC((HWND)gl.setup.native_wind,(HDC)gl.setup.native_disp); - #endif - #ifdef TARGET_PANDORA - eglMakeCurrent( gl.setup.display, NULL, NULL, EGL_NO_CONTEXT ); - if (gl.setup.context) - eglDestroyContext(gl.setup.display, gl.setup.context); - if (gl.setup.surface) - eglDestroySurface(gl.setup.display, gl.setup.surface); - if (gl.setup.display) - eglTerminate(gl.setup.display); - if (fbdev>=0) - close( fbdev ); - - fbdev=-1; - gl.setup.context=0; - gl.setup.surface=0; - gl.setup.display=0; - #endif + wglSwapLayerBuffers(ourWindowHandleToDeviceContext,WGL_SWAP_MAIN_PLANE); + //SwapBuffers(ourWindowHandleToDeviceContext); } #else + #if defined(SUPPORT_X11) + //! windows && X11 + //let's assume glx for now - #if HOST_OS == OS_WINDOWS - #define WGL_DRAW_TO_WINDOW_ARB 0x2001 - #define WGL_ACCELERATION_ARB 0x2003 - #define WGL_SWAP_METHOD_ARB 0x2007 - #define WGL_SUPPORT_OPENGL_ARB 0x2010 - #define WGL_DOUBLE_BUFFER_ARB 0x2011 - #define WGL_PIXEL_TYPE_ARB 0x2013 - #define WGL_COLOR_BITS_ARB 0x2014 - #define WGL_DEPTH_BITS_ARB 0x2022 - #define WGL_STENCIL_BITS_ARB 0x2023 - #define WGL_FULL_ACCELERATION_ARB 0x2027 - #define WGL_SWAP_EXCHANGE_ARB 0x2028 - #define WGL_TYPE_RGBA_ARB 0x202B - #define WGL_CONTEXT_MAJOR_VERSION_ARB 0x2091 - #define WGL_CONTEXT_MINOR_VERSION_ARB 0x2092 - #define WGL_CONTEXT_FLAGS_ARB 0x2094 - - #define WGL_CONTEXT_PROFILE_MASK_ARB 0x9126 - #define WGL_CONTEXT_MAJOR_VERSION_ARB 0x2091 - #define WGL_CONTEXT_MINOR_VERSION_ARB 0x2092 - #define WGL_CONTEXT_LAYER_PLANE_ARB 0x2093 - #define WGL_CONTEXT_FLAGS_ARB 0x2094 - #define WGL_CONTEXT_DEBUG_BIT_ARB 0x0001 - #define WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x0002 - #define ERROR_INVALID_VERSION_ARB 0x2095 - #define WGL_CONTEXT_CORE_PROFILE_BIT_ARB 0x00000001 - - typedef BOOL (WINAPI * PFNWGLCHOOSEPIXELFORMATARBPROC) (HDC hdc, const int *piAttribIList, const FLOAT *pfAttribFList, UINT nMaxFormats, - int *piFormats, UINT *nNumFormats); - typedef HGLRC (WINAPI * PFNWGLCREATECONTEXTATTRIBSARBPROC) (HDC hDC, HGLRC hShareContext, const int *attribList); - typedef BOOL (WINAPI * PFNWGLSWAPINTERVALEXTPROC) (int interval); - - PFNWGLCHOOSEPIXELFORMATARBPROC wglChoosePixelFormatARB; - PFNWGLCREATECONTEXTATTRIBSARBPROC wglCreateContextAttribsARB; - PFNWGLSWAPINTERVALEXTPROC wglSwapIntervalEXT; + #include + #include + #include + #include - HDC ourWindowHandleToDeviceContext; - bool gl_init(void* hwnd, void* hdc) + bool gl_init(void* wind, void* disp) { - PIXELFORMATDESCRIPTOR pfd = - { - sizeof(PIXELFORMATDESCRIPTOR), - 1, - PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER, //Flags - PFD_TYPE_RGBA, //The kind of framebuffer. RGBA or palette. - 32, //Colordepth of the framebuffer. - 0, 0, 0, 0, 0, 0, - 0, - 0, - 0, - 0, 0, 0, 0, - 24, //Number of bits for the depthbuffer - 8, //Number of bits for the stencilbuffer - 0, //Number of Aux buffers in the framebuffer. - PFD_MAIN_PLANE, - 0, - 0, 0, 0 - }; + extern void* x11_glc; - /*HDC*/ ourWindowHandleToDeviceContext = (HDC)hdc;//GetDC((HWND)hwnd); + glXMakeCurrent((Display*)libPvr_GetRenderSurface(), + (GLXDrawable)libPvr_GetRenderTarget(), + (GLXContext)x11_glc); - int letWindowsChooseThisPixelFormat; - letWindowsChooseThisPixelFormat = ChoosePixelFormat(ourWindowHandleToDeviceContext, &pfd); - SetPixelFormat(ourWindowHandleToDeviceContext,letWindowsChooseThisPixelFormat, &pfd); - - HGLRC ourOpenGLRenderingContext = wglCreateContext(ourWindowHandleToDeviceContext); - wglMakeCurrent (ourWindowHandleToDeviceContext, ourOpenGLRenderingContext); - - bool rv = true; - - if (rv) { - - wglChoosePixelFormatARB = (PFNWGLCHOOSEPIXELFORMATARBPROC)wglGetProcAddress("wglChoosePixelFormatARB"); - if(!wglChoosePixelFormatARB) - { - return false; - } - - wglCreateContextAttribsARB = (PFNWGLCREATECONTEXTATTRIBSARBPROC)wglGetProcAddress("wglCreateContextAttribsARB"); - if(!wglCreateContextAttribsARB) - { - return false; - } - - wglSwapIntervalEXT = (PFNWGLSWAPINTERVALEXTPROC)wglGetProcAddress("wglSwapIntervalEXT"); - if(!wglSwapIntervalEXT) - { - return false; - } - - int attribs[] = - { - WGL_CONTEXT_MAJOR_VERSION_ARB, 3, - WGL_CONTEXT_MINOR_VERSION_ARB, 1, - WGL_CONTEXT_FLAGS_ARB, WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB, - WGL_CONTEXT_PROFILE_MASK_ARB, WGL_CONTEXT_CORE_PROFILE_BIT_ARB, - 0 - }; - - HGLRC m_hrc = wglCreateContextAttribsARB(ourWindowHandleToDeviceContext,0, attribs); - - if (m_hrc) - wglMakeCurrent(ourWindowHandleToDeviceContext,m_hrc); - else - rv = false; - - wglDeleteContext(ourOpenGLRenderingContext); - } - - if (rv) { - rv = gl3wInit() != -1 && gl3wIsSupported(3, 1); - } - - RECT r; - GetClientRect((HWND)hwnd, &r); - screen_width = r.right - r.left; - screen_height = r.bottom - r.top; - - return rv; + screen_width = 640; + screen_height = 480; + return gl3wInit() != -1 && gl3wIsSupported(3, 1); } - #include + void gl_swap() { - wglSwapLayerBuffers(ourWindowHandleToDeviceContext,WGL_SWAP_MAIN_PLANE); - //SwapBuffers(ourWindowHandleToDeviceContext); - } - #else - #if defined(SUPPORT_X11) - //! windows && X11 - //let's assume glx for now + glXSwapBuffers((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget()); - #include - #include - #include - #include + Window win; + int temp; + unsigned int tempu, new_w, new_h; + XGetGeometry((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget(), + &win, &temp, &temp, &new_w, &new_h,&tempu,&tempu); - - bool gl_init(void* wind, void* disp) - { - extern void* x11_glc; - - glXMakeCurrent((Display*)libPvr_GetRenderSurface(), - (GLXDrawable)libPvr_GetRenderTarget(), - (GLXContext)x11_glc); - - screen_width = 640; - screen_height = 480; - return gl3wInit() != -1 && gl3wIsSupported(3, 1); + //if resized, clear up the draw buffers, to avoid out-of-draw-area junk data + if (new_w != screen_width || new_h != screen_height) { + screen_width = new_w; + screen_height = new_h; } - void gl_swap() - { + #if 0 + //handy to debug really stupid render-not-working issues ... + + glcache.ClearColor( 0, 0.5, 1, 1 ); + glClear( GL_COLOR_BUFFER_BIT ); glXSwapBuffers((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget()); - Window win; - int temp; - unsigned int tempu, new_w, new_h; - XGetGeometry((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget(), - &win, &temp, &temp, &new_w, &new_h,&tempu,&tempu); - //if resized, clear up the draw buffers, to avoid out-of-draw-area junk data - if (new_w != screen_width || new_h != screen_height) { - screen_width = new_w; - screen_height = new_h; - } - - #if 0 - //handy to debug really stupid render-not-working issues ... - - glcache.ClearColor( 0, 0.5, 1, 1 ); - glClear( GL_COLOR_BUFFER_BIT ); - glXSwapBuffers((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget()); - - - glcache.ClearColor ( 1, 0.5, 0, 1 ); - glClear ( GL_COLOR_BUFFER_BIT ); - glXSwapBuffers((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget()); - #endif - } - #endif + glcache.ClearColor ( 1, 0.5, 0, 1 ); + glClear ( GL_COLOR_BUFFER_BIT ); + glXSwapBuffers((Display*)libPvr_GetRenderSurface(), (GLXDrawable)libPvr_GetRenderTarget()); + #endif + } #endif #endif #endif -void findGLVersion() -{ - while (true) - if (glGetError() == GL_NO_ERROR) - break; - glGetIntegerv(GL_MAJOR_VERSION, &gl.gl_major); - if (glGetError() == GL_INVALID_ENUM) - gl.gl_major = 2; - const char *version = (const char *)glGetString(GL_VERSION); - if (!strncmp(version, "OpenGL ES", 9)) - { - gl.is_gles = true; - if (gl.gl_major >= 3) - { - gl.gl_version = "GLES3"; - gl.glsl_version_header = "#version 300 es"; - } - else - { - gl.gl_version = "GLES2"; - gl.glsl_version_header = ""; - } - gl.fog_image_format = GL_ALPHA; - } - else - { - gl.is_gles = false; - gl.gl_version = "GL"; - gl.glsl_version_header = "#version 140"; - gl.fog_image_format = GL_RED; - } -} - struct ShaderUniforms_t ShaderUniforms; GLuint gl_CompileShader(const char* shader,GLuint type) @@ -799,8 +682,8 @@ GLuint gl_CompileShader(const char* shader,GLuint type) GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) { //create shaders - GLuint vs=gl_CompileShader(VertexShader ,GL_VERTEX_SHADER); - GLuint ps=gl_CompileShader(FragmentShader ,GL_FRAGMENT_SHADER); + GLuint vs=gl_CompileShader(VertexShader, GL_VERTEX_SHADER); + GLuint ps=gl_CompileShader(FragmentShader, GL_FRAGMENT_SHADER); GLuint program = glCreateProgram(); glAttachShader(program, vs); @@ -811,10 +694,11 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) glBindAttribLocation(program, VERTEX_COL_BASE_ARRAY, "in_base"); glBindAttribLocation(program, VERTEX_COL_OFFS_ARRAY, "in_offs"); glBindAttribLocation(program, VERTEX_UV_ARRAY, "in_uv"); + glBindAttribLocation(program, VERTEX_COL_BASE1_ARRAY, "in_base1"); + glBindAttribLocation(program, VERTEX_COL_OFFS1_ARRAY, "in_offs1"); + glBindAttribLocation(program, VERTEX_UV1_ARRAY, "in_uv1"); -#ifndef GLES glBindFragDataLocation(program, 0, "FragColor"); -#endif glLinkProgram(program); @@ -854,7 +738,7 @@ GLuint gl_CompileAndLink(const char* VertexShader, const char* FragmentShader) int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, - u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping) + u32 pp_FogCtrl, bool pp_TwoVolumes, u32 pp_DepthFunc, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, int pass) { u32 rv=0; @@ -866,37 +750,41 @@ int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, rv<<=2; rv|=pp_ShadInstr; rv<<=1; rv|=pp_Offset; rv<<=2; rv|=pp_FogCtrl; - rv<<=1; rv|=pp_Gouraud; - rv<<=1; rv|=pp_BumpMap; - rv<<=1; rv|=fog_clamping; + rv <<= 1; rv |= (int)pp_TwoVolumes; + rv <<= 3; rv |= pp_DepthFunc; + rv <<= 1; rv |= (int)pp_Gouraud; + rv <<= 1; rv |= pp_BumpMap; + rv <<= 1; rv |= fog_clamping; + rv <<= 2; rv |= pass; return rv; } -bool CompilePipelineShader( PipelineShader* s) +bool CompilePipelineShader( PipelineShader* s, const char *source /* = PixelPipelineShader */) { - char vshader[8192]; + char vshader[16384]; - sprintf(vshader, VertexShaderSource, gl.glsl_version_header, gl.gl_version, s->pp_Gouraud); + sprintf(vshader, VertexShaderSource, s->pp_Gouraud); - char pshader[8192]; + char pshader[16384]; - sprintf(pshader,PixelPipelineShader, gl.glsl_version_header, gl.gl_version, + sprintf(pshader, source, s->cp_AlphaTest,s->pp_ClipTestMode,s->pp_UseAlpha, - s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pp_Gouraud, s->pp_BumpMap, - s->fog_clamping); - - s->program=gl_CompileAndLink(vshader, pshader); + s->pp_Texture,s->pp_IgnoreTexA,s->pp_ShadInstr,s->pp_Offset,s->pp_FogCtrl, s->pp_TwoVolumes, s->pp_DepthFunc, s->pp_Gouraud, s->pp_BumpMap, s->fog_clamping, s->pass); + s->program = gl_CompileAndLink(vshader, pshader); //setup texture 0 as the input for the shader - GLuint gu=glGetUniformLocation(s->program, "tex"); - if (s->pp_Texture==1) - glUniform1i(gu,0); + GLint gu = glGetUniformLocation(s->program, "tex0"); + if (s->pp_Texture == 1 && gu != -1) + glUniform1i(gu, 0); + // Setup texture 1 as the input for area 1 in two volume mode + gu = glGetUniformLocation(s->program, "tex1"); + if (s->pp_Texture == 1 && gu != -1) + glUniform1i(gu, 1); //get the uniform locations s->scale = glGetUniformLocation(s->program, "scale"); - s->depth_scale = glGetUniformLocation(s->program, "depth_scale"); s->extra_depth_scale = glGetUniformLocation(s->program, "extra_depth_scale"); s->pp_ClipTest = glGetUniformLocation(s->program, "pp_ClipTest"); @@ -918,10 +806,13 @@ bool CompilePipelineShader( PipelineShader* s) { s->sp_FOG_COL_RAM=-1; } - // Setup texture 1 as the fog table - gu = glGetUniformLocation(s->program, "fog_table"); + s->shade_scale_factor = glGetUniformLocation(s->program, "shade_scale_factor"); + + // Use texture 1 for depth texture + gu = glGetUniformLocation(s->program, "DepthTex"); if (gu != -1) - glUniform1i(gu, 1); + glUniform1i(gu, 2); // GL_TEXTURE2 + s->trilinear_alpha = glGetUniformLocation(s->program, "trilinear_alpha"); if (s->fog_clamping) @@ -935,7 +826,18 @@ bool CompilePipelineShader( PipelineShader* s) s->fog_clamp_max = -1; } - ShaderUniforms.Set(s); + // Shadow stencil for OP/PT rendering pass + gu = glGetUniformLocation(s->program, "shadow_stencil"); + if (gu != -1) + glUniform1i(gu, 3); // GL_TEXTURE3 + + s->pp_Number = glGetUniformLocation(s->program, "pp_Number"); + + s->blend_mode = glGetUniformLocation(s->program, "blend_mode"); + s->use_alpha = glGetUniformLocation(s->program, "use_alpha"); + s->ignore_tex_alpha = glGetUniformLocation(s->program, "ignore_tex_alpha"); + s->shading_instr = glGetUniformLocation(s->program, "shading_instr"); + s->fog_control = glGetUniformLocation(s->program, "fog_control"); return glIsProgram(s->program)==GL_TRUE; } @@ -949,12 +851,10 @@ bool gl_create_resources() // Assume the resources have already been created return true; -#ifndef GLES //create vao //This is really not "proper", vaos are supposed to be defined once //i keep updating the same one to make the es2 code work in 3.1 context glGenVertexArrays(1, &gl.vbo.vao); -#endif //create vbos glGenBuffers(1, &gl.vbo.geometry); @@ -962,79 +862,16 @@ bool gl_create_resources() glGenBuffers(1, &gl.vbo.idxs); glGenBuffers(1, &gl.vbo.idxs2); - memset(gl.pogram_table,0,sizeof(gl.pogram_table)); + char vshader[16384]; + sprintf(vshader, VertexShaderSource, 1); - PipelineShader* dshader=0; - u32 compile=0; -#define forl(name,max) for(u32 name=0;name<=max;name++) - forl(cp_AlphaTest,1) - { - forl(pp_ClipTestMode,2) - { - forl(pp_UseAlpha,1) - { - forl(pp_Texture,1) - { - forl(pp_FogCtrl,3) - { - forl(pp_IgnoreTexA,1) - { - forl(pp_ShadInstr,3) - { - forl(pp_Offset,1) - { - forl(pp_Gouraud,1) - { - forl(pp_BumpMap,1) - { - forl(fog_clamping,1) - { - dshader=&gl.pogram_table[GetProgramID(cp_AlphaTest,pp_ClipTestMode,pp_Texture,pp_UseAlpha,pp_IgnoreTexA, - pp_ShadInstr,pp_Offset,pp_FogCtrl, (bool)pp_Gouraud, (bool)pp_BumpMap, (bool)fog_clamping)]; - - dshader->cp_AlphaTest = cp_AlphaTest; - dshader->pp_ClipTestMode = pp_ClipTestMode-1; - dshader->pp_Texture = pp_Texture; - dshader->pp_UseAlpha = pp_UseAlpha; - dshader->pp_IgnoreTexA = pp_IgnoreTexA; - dshader->pp_ShadInstr = pp_ShadInstr; - dshader->pp_Offset = pp_Offset; - dshader->pp_FogCtrl = pp_FogCtrl; - dshader->pp_Gouraud = pp_Gouraud; - dshader->pp_BumpMap = pp_BumpMap; - dshader->fog_clamping = fog_clamping; - dshader->program = -1; - } - } - } - } - } - } - } - } - } - } - } - - findGLVersion(); - - char vshader[8192]; - sprintf(vshader, VertexShaderSource, gl.glsl_version_header, gl.gl_version, 1); - char fshader[8192]; - sprintf(fshader, ModifierVolumeShader, gl.glsl_version_header, gl.gl_version); - - gl.modvol_shader.program=gl_CompileAndLink(vshader, fshader); + gl.modvol_shader.program=gl_CompileAndLink(vshader, ModifierVolumeShader); gl.modvol_shader.scale = glGetUniformLocation(gl.modvol_shader.program, "scale"); - gl.modvol_shader.sp_ShaderColor = glGetUniformLocation(gl.modvol_shader.program, "sp_ShaderColor"); - gl.modvol_shader.depth_scale = glGetUniformLocation(gl.modvol_shader.program, "depth_scale"); gl.modvol_shader.extra_depth_scale = glGetUniformLocation(gl.modvol_shader.program, "extra_depth_scale"); - sprintf(fshader, OSD_Shader, gl.glsl_version_header, gl.gl_version); - gl.OSD_SHADER.program=gl_CompileAndLink(vshader, fshader); - printf("OSD: %d\n",gl.OSD_SHADER.program); + gl.OSD_SHADER.program=gl_CompileAndLink(vshader, OSD_Shader); gl.OSD_SHADER.scale=glGetUniformLocation(gl.OSD_SHADER.program, "scale"); - gl.OSD_SHADER.depth_scale=glGetUniformLocation(gl.OSD_SHADER.program, "depth_scale"); gl.OSD_SHADER.extra_depth_scale = glGetUniformLocation(gl.OSD_SHADER.program, "extra_depth_scale"); glUniform1i(glGetUniformLocation(gl.OSD_SHADER.program, "tex"),0); //bind osd texture to slot 0 @@ -1057,6 +894,14 @@ bool gl_create_resources() osd_font = loadPNG(get_readonly_data_path("/font.png"), w, h); #endif + // Create the buffer for Translucent poly params + glGenBuffers(1, &gl.vbo.tr_poly_params); + // Bind it + glBindBuffer(GL_SHADER_STORAGE_BUFFER, gl.vbo.tr_poly_params); + // Declare storage + glBindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, gl.vbo.tr_poly_params); + glCheck(); + return true; } @@ -1064,15 +909,64 @@ bool gl_init(void* wind, void* disp); //swap buffers void gl_swap(); -//destroy the gles context and free resources -void gl_term(); GLuint gl_CompileShader(const char* shader,GLuint type); bool gl_create_resources(); //setup +extern void initABuffer(); +void gl_DebugOutput(GLenum source, + GLenum type, + GLuint id, + GLenum severity, + GLsizei length, + const GLchar *message, + void *userParam) +{ + // ignore non-significant error/warning codes + if(id == 131169 || id == 131185 || id == 131218 || id == 131204) return; + if (id == 131186) + // Warning when fetching the atomic_uint pixel count + return; + + printf("OpenGL Debug message (%d): %s\n", id, message); + + switch (source) + { + case GL_DEBUG_SOURCE_API: printf("Source: API"); break; + case GL_DEBUG_SOURCE_WINDOW_SYSTEM: printf("Source: Window System"); break; + case GL_DEBUG_SOURCE_SHADER_COMPILER: printf("Source: Shader Compiler"); break; + case GL_DEBUG_SOURCE_THIRD_PARTY: printf("Source: Third Party"); break; + case GL_DEBUG_SOURCE_APPLICATION: printf("Source: Application"); break; + case GL_DEBUG_SOURCE_OTHER: printf("Source: Other"); break; + } + printf(" "); + + switch (type) + { + case GL_DEBUG_TYPE_ERROR: printf("Type: Error"); break; + case GL_DEBUG_TYPE_DEPRECATED_BEHAVIOR: printf("Type: Deprecated Behaviour"); break; + case GL_DEBUG_TYPE_UNDEFINED_BEHAVIOR: printf("Type: Undefined Behaviour"); break; + case GL_DEBUG_TYPE_PORTABILITY: printf("Type: Portability"); break; + case GL_DEBUG_TYPE_PERFORMANCE: printf("Type: Performance"); break; + case GL_DEBUG_TYPE_MARKER: printf("Type: Marker"); break; + case GL_DEBUG_TYPE_PUSH_GROUP: printf("Type: Push Group"); break; + case GL_DEBUG_TYPE_POP_GROUP: printf("Type: Pop Group"); break; + case GL_DEBUG_TYPE_OTHER: printf("Type: Other"); break; + } + printf(" "); + + switch (severity) + { + case GL_DEBUG_SEVERITY_HIGH: printf("Severity: high"); break; + case GL_DEBUG_SEVERITY_MEDIUM: printf("Severity: medium"); break; + case GL_DEBUG_SEVERITY_LOW: printf("Severity: low"); break; + case GL_DEBUG_SEVERITY_NOTIFICATION: printf("Severity: notification"); break; + }; + printf("\n"); +} bool gles_init() { @@ -1084,22 +978,18 @@ bool gles_init() if (!gl_create_resources()) return false; -#if defined(GLES) && HOST_OS != OS_DARWIN && !defined(TARGET_NACL32) - #ifdef TARGET_PANDORA - fbdev=open("/dev/fb0", O_RDONLY); - #else - eglSwapInterval(gl.setup.display,1); - #endif -#endif +// glEnable(GL_DEBUG_OUTPUT); +// glEnable(GL_DEBUG_OUTPUT_SYNCHRONOUS); +// glDebugMessageCallback(gl_DebugOutput, NULL); +// glDebugMessageControl(GL_DONT_CARE, GL_DONT_CARE, GL_DONT_CARE, 0, NULL, GL_TRUE); + //clean up the buffer glcache.ClearColor(0.f, 0.f, 0.f, 0.f); glClear(GL_COLOR_BUFFER_BIT); gl_swap(); -#ifdef GLES - glHint(GL_GENERATE_MIPMAP_HINT, GL_FASTEST); -#endif + initABuffer(); if (settings.rend.TextureUpscale > 1) { @@ -1115,7 +1005,7 @@ bool gles_init() void UpdateFogTexture(u8 *fog_table) { - glActiveTexture(GL_TEXTURE1); + glActiveTexture(GL_TEXTURE5); if (fogTextureId == 0) { fogTextureId = glcache.GenTexture(); @@ -1134,8 +1024,8 @@ void UpdateFogTexture(u8 *fog_table) temp_tex_buffer[i] = fog_table[i * 4]; temp_tex_buffer[i + 128] = fog_table[i * 4 + 1]; } - glPixelStorei(GL_UNPACK_ALIGNMENT, 1); - glTexImage2D(GL_TEXTURE_2D, 0, gl.fog_image_format, 128, 2, 0, gl.fog_image_format, GL_UNSIGNED_BYTE, temp_tex_buffer); + glPixelStorei(GL_UNPACK_ALIGNMENT, 1); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RED, 128, 2, 0, GL_RED, GL_UNSIGNED_BYTE, temp_tex_buffer); glCheck(); glActiveTexture(GL_TEXTURE0); @@ -1236,12 +1126,6 @@ static void DrawButton(float* xy, u32 state) osd_count+=4; } -static void ClearBG() -{ - -} - - void DrawButton2(float* xy, bool state) { DrawButton(xy,state?0:255); } static void DrawCenteredText(float yy, float scale, int transparency, const char* text) @@ -1475,18 +1359,8 @@ void OSD_DRAW() #endif if (osd_font) { - float u=0; - float v=0; - verify(glIsProgram(gl.OSD_SHADER.program)); - float dc_width=640; - float dc_height=480; - - float dc2s_scale_h=screen_height/480.0f; - float ds2s_offs_x=(screen_width-dc2s_scale_h*640)/2; - - glcache.BindTexture(GL_TEXTURE_2D,osd_font); glcache.UseProgram(gl.OSD_SHADER.program); @@ -1494,15 +1368,12 @@ void OSD_DRAW() glcache.Disable(GL_DEPTH_TEST); glcache.BlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); - glcache.DepthMask(false); glcache.DepthFunc(GL_ALWAYS); - glcache.Disable(GL_CULL_FACE); glcache.Disable(GL_SCISSOR_TEST); - int dfa=osd_count/4; for (int i=0;i0x49800000) - vtx_max_fZ=10*1024; - - - //add some extra range to avoid clipping border cases - vtx_min_fZ*=0.98f; - vtx_max_fZ*=1.001f; - - //calculate a projection so that it matches the pvr x,y setup, and - //a) Z is linearly scaled between 0 ... 1 - //b) W is passed though for proper perspective calculations - - /* - PowerVR coords: - fx, fy (pixel coordinates) - fz=1/w - - (as a note, fx=x*fz;fy=y*fz) - - Clip space - -Wc .. Wc, xyz - x: left-right, y: bottom-top - NDC space - -1 .. 1, xyz - Window space: - translated NDC (viewport, glDepth) - - Attributes: - //this needs to be cleared up, been some time since I wrote my rasteriser and i'm starting - //to forget/mixup stuff - vaX -> VS output - iaX=vaX*W -> value to be interpolated - iaX',W' -> interpolated values - paX=iaX'/W' -> Per pixel interpolated value for attribute - - - Proper mappings: - Output from shader: - W=1/fz - x=fx*W -> maps to fx after perspective divide - y=fy*W -> fy -//- - z=-W for min, W for max. Needs to be linear. - - - - umodified W, perfect mapping: - Z mapping: - pz=z/W - pz=z/(1/fz) - pz=z*fz - z=zt_s+zt_o - pz=(zt_s+zt_o)*fz - pz=zt_s*fz+zt_o*fz - zt_s=scale - zt_s=2/(max_fz-min_fz) - zt_o*fz=-min_fz-1 - zt_o=(-min_fz-1)/fz == (-min_fz-1)*W - - - x=fx/(fx_range/2)-1 //0 to max -> -1 to 1 - y=fy/(-fy_range/2)+1 //0 to max -> 1 to -1 - z=-min_fz*W + (zt_s-1) //0 to +inf -> -1 to 1 - - o=a*z+c - 1=a*z_max+c - -1=a*z_min+c - - c=-a*z_min-1 - 1=a*z_max-a*z_min-1 - 2=a*(z_max-z_min) - a=2/(z_max-z_min) - */ - - //float B=2/(min_invW-max_invW); - //float A=-B*max_invW+vnear; - //these should be adjusted based on the current PVR scaling etc params float dc_width=640; float dc_height=480; @@ -1676,7 +1466,7 @@ bool RenderFrame() if (SCALER_CTL.hscale) { - scissoring_scale_x /= 2; + scissoring_scale_x /= 2; scale_x*=2; } } @@ -1684,30 +1474,6 @@ bool RenderFrame() dc_width *= scale_x; dc_height *= scale_y; - /* - - float vnear=0; - float vfar =1; - - float max_invW=1/vtx_min_fZ; - float min_invW=1/vtx_max_fZ; - - float B=vfar/(min_invW-max_invW); - float A=-B*max_invW+vnear; - - - GLfloat dmatrix[16] = - { - (2.f/dc_width) ,0 ,-(640/dc_width) ,0 , - 0 ,-(2.f/dc_height) ,(480/dc_height) ,0 , - 0 ,0 ,A ,B , - 0 ,0 ,1 ,0 - }; - - glUniformMatrix4fv(gl.matrix, 1, GL_FALSE, dmatrix); - - */ - /* Handle Dc to screen scaling */ @@ -1720,16 +1486,12 @@ bool RenderFrame() ShaderUniforms.scale_coefs[2]=1-2*ds2s_offs_x/(screen_width); ShaderUniforms.scale_coefs[3]=(is_rtt?1:-1); - - ShaderUniforms.depth_coefs[0]=2/(vtx_max_fZ-vtx_min_fZ); - ShaderUniforms.depth_coefs[1]=-vtx_min_fZ-1; - ShaderUniforms.depth_coefs[2]=0; - ShaderUniforms.depth_coefs[3]=0; - ShaderUniforms.extra_depth_scale = settings.rend.ExtraDepthScale; //printf("scale: %f, %f, %f, %f\n",ShaderUniforms.scale_coefs[0],ShaderUniforms.scale_coefs[1],ShaderUniforms.scale_coefs[2],ShaderUniforms.scale_coefs[3]); + if (!is_rtt) + OSD_HOOK(); //VERT and RAM fog color constants u8* fog_colvert_bgra=(u8*)&FOG_COL_VERT; @@ -1767,28 +1529,19 @@ bool RenderFrame() glcache.UseProgram(gl.modvol_shader.program); glUniform4fv( gl.modvol_shader.scale, 1, ShaderUniforms.scale_coefs); - glUniform4fv( gl.modvol_shader.depth_scale, 1, ShaderUniforms.depth_coefs); + glUniform1f(gl.modvol_shader.extra_depth_scale, ShaderUniforms.extra_depth_scale); GLfloat td[4]={0.5,0,0,0}; glcache.UseProgram(gl.OSD_SHADER.program); glUniform4fv( gl.OSD_SHADER.scale, 1, ShaderUniforms.scale_coefs); - glUniform4fv( gl.OSD_SHADER.depth_scale, 1, td); glUniform1f(gl.OSD_SHADER.extra_depth_scale, 1.0f); ShaderUniforms.PT_ALPHA=(PT_ALPHA_REF&0xFF)/255.0f; -// for (u32 i=0;iprogram == -1) -// continue; -// -// glcache.UseProgram(s->program); -// -// ShaderUniforms.Set(s); -// } + GLuint output_fbo; + //setup render target first if (is_rtt) { @@ -1827,7 +1580,7 @@ bool RenderFrame() } //printf("RTT packmode=%d stride=%d - %d,%d -> %d,%d\n", FB_W_CTRL.fb_packmode, FB_W_LINESTRIDE.stride * 8, // FB_X_CLIP.min, FB_Y_CLIP.min, FB_X_CLIP.max, FB_Y_CLIP.max); - BindRTT(FB_W_SOF1 & VRAM_MASK, dc_width, dc_height, channels, format); + output_fbo = BindRTT(FB_W_SOF1 & VRAM_MASK, dc_width, dc_height, channels, format); } else { @@ -1836,6 +1589,7 @@ bool RenderFrame() glBindFramebuffer(GL_FRAMEBUFFER,0); #endif glViewport(0, 0, screen_width, screen_height); + output_fbo = 0; } bool wide_screen_on = !is_rtt && settings.rend.WideScreen @@ -1848,11 +1602,6 @@ bool RenderFrame() glcache.Disable(GL_SCISSOR_TEST); - glcache.DepthMask(GL_TRUE); - glClearDepthf(0.0); - glStencilMask(0xFF); glCheck(); - glClear(GL_STENCIL_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); glCheck(); - //move vertex to gpu if (!pvrrc.isRenderFramebuffer) @@ -1872,6 +1621,11 @@ bool RenderFrame() glBufferData(GL_ARRAY_BUFFER,pvrrc.modtrig.bytes(),pvrrc.modtrig.head(),GL_STREAM_DRAW); glCheck(); } + // TR PolyParam data + glBindBuffer(GL_SHADER_STORAGE_BUFFER, gl.vbo.tr_poly_params); + glBufferData(GL_SHADER_STORAGE_BUFFER, sizeof(struct PolyParam) * pvrrc.global_param_tr.used(), pvrrc.global_param_tr.head(), GL_STATIC_DRAW); + glCheck(); + int offs_x=ds2s_offs_x+0.5f; //this needs to be scaled @@ -1924,13 +1678,15 @@ bool RenderFrame() //restore scale_x scale_x /= scissoring_scale_x; - - DrawStrips(); + DrawStrips(output_fbo); } else { + glBindFramebuffer(GL_FRAMEBUFFER, output_fbo); + glcache.ClearColor(0.f, 0.f, 0.f, 0.f); glClear(GL_COLOR_BUFFER_BIT); + DrawFramebuffer(dc_width, dc_height); } #if HOST_OS==OS_WINDOWS @@ -1959,16 +1715,43 @@ void rend_set_fb_scale(float x,float y) fb_scale_y=y; } +void reshapeABuffer(int w, int h); + struct glesrend : Renderer { bool Init() { return gles_init(); } - void Resize(int w, int h) { screen_width=w; screen_height=h; } + void Resize(int w, int h) + { + screen_width=w; + screen_height=h; + if (stencilTexId != 0) + { + glcache.DeleteTextures(1, &stencilTexId); + stencilTexId = 0; + } + if (depthTexId != 0) + { + glcache.DeleteTextures(1, &depthTexId); + depthTexId = 0; + } + if (opaqueTexId != 0) + { + glcache.DeleteTextures(1, &opaqueTexId); + opaqueTexId = 0; + } + if (depthSaveTexId != 0) + { + glcache.DeleteTextures(1, &depthSaveTexId); + depthSaveTexId = 0; + } + reshapeABuffer(w, h); + } void Term() { } bool Process(TA_context* ctx) { return ProcessFrame(ctx); } bool Render() { return RenderFrame(); } - void Present() { gl_swap(); glViewport(0, 0, screen_width, screen_height); } + void Present() { gl_swap(); } void DrawOSD() { OSD_DRAW(); } diff --git a/core/rend/gles/gles.h b/core/rend/gles/gles.h index 22a53f6aa..3b3b32170 100755 --- a/core/rend/gles/gles.h +++ b/core/rend/gles/gles.h @@ -1,39 +1,12 @@ #pragma once #include "rend/rend.h" +#include - -#ifdef GLES -#if defined(TARGET_IPHONE) //apple-specific ogles2 headers -//#include -#include -#include -#else -#if !defined(TARGET_NACL32) -#include -#endif -#include -#include -#endif - -#ifndef GL_NV_draw_path -//IMGTEC GLES emulation -#pragma comment(lib,"libEGL.lib") -#pragma comment(lib,"libGLESv2.lib") -#else /* NV gles emulation*/ -#pragma comment(lib,"libGLES20.lib") -#endif - -#else #if HOST_OS == OS_DARWIN #include #else #include #endif -#endif - -#ifndef GL_UNSIGNED_INT_8_8_8_8 -#define GL_UNSIGNED_INT_8_8_8_8 0x8035 -#endif #define glCheck() do { if (unlikely(settings.validate.OpenGlChecks)) { verify(glGetError()==GL_NO_ERROR); } } while(0) #define eglCheck() false @@ -42,6 +15,9 @@ #define VERTEX_COL_BASE_ARRAY 1 #define VERTEX_COL_OFFS_ARRAY 2 #define VERTEX_UV_ARRAY 3 +#define VERTEX_COL_BASE1_ARRAY 4 +#define VERTEX_COL_OFFS1_ARRAY 5 +#define VERTEX_UV1_ARRAY 6 #ifndef GL_UNSIGNED_INT_8_8_8_8 #define GL_UNSIGNED_INT_8_8_8_8 0x8035 @@ -52,71 +28,71 @@ extern u32 gcflip; extern float scale_x, scale_y; -void DrawStrips(); +void DrawStrips(GLuint output_fbo); struct PipelineShader { GLuint program; - GLuint scale,depth_scale; + GLuint scale; GLuint extra_depth_scale; GLuint pp_ClipTest,cp_AlphaTestValue; GLuint sp_FOG_COL_RAM,sp_FOG_COL_VERT,sp_FOG_DENSITY; + GLuint shade_scale_factor; + GLuint pp_Number; + GLuint blend_mode; + GLuint use_alpha; + GLuint ignore_tex_alpha; + GLuint shading_instr; + GLuint fog_control; GLuint trilinear_alpha; GLuint fog_clamp_min, fog_clamp_max; // u32 cp_AlphaTest; s32 pp_ClipTestMode; u32 pp_Texture, pp_UseAlpha, pp_IgnoreTexA, pp_ShadInstr, pp_Offset, pp_FogCtrl; - bool pp_Gouraud, pp_BumpMap; + u32 pp_DepthFunc; + int pass; + bool pp_TwoVolumes; + bool pp_Gouraud; + bool pp_BumpMap; bool fog_clamping; }; struct gl_ctx { -#if defined(GLES) && HOST_OS != OS_DARWIN && !defined(TARGET_NACL32) - struct - { - EGLNativeWindowType native_wind; - EGLNativeDisplayType native_disp; - EGLDisplay display; - EGLSurface surface; - EGLContext context; - } setup; -#endif - struct { GLuint program; - GLuint scale,depth_scale; + GLuint scale; GLuint extra_depth_scale; - GLuint sp_ShaderColor; - } modvol_shader; - PipelineShader pogram_table[12288]; + std::map shaders; struct { - GLuint program,scale,depth_scale; + GLuint program,scale; GLuint extra_depth_scale; } OSD_SHADER; struct { GLuint geometry,modvols,idxs,idxs2; -#ifndef GLES GLuint vao; -#endif + GLuint tr_poly_params; } vbo; - const char *gl_version; - const char *glsl_version_header; - int gl_major; - bool is_gles; - GLuint fog_image_format; - //GLuint matrix; + PipelineShader *getShader(int programId) { + PipelineShader *shader = shaders[programId]; + if (shader == NULL) { + shader = new PipelineShader(); + shaders[programId] = shader; + shader->program = -1; + } + return shader; + } }; extern gl_ctx gl; @@ -135,34 +111,197 @@ void CollectCleanup(); void DoCleanup(); void SortPParams(int first, int count); -void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt); +extern int screen_width; +extern int screen_height; + +GLuint BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt); void ReadRTTBuffer(); void RenderFramebuffer(); void DrawFramebuffer(float w, float h); int GetProgramID(u32 cp_AlphaTest, u32 pp_ClipTestMode, u32 pp_Texture, u32 pp_UseAlpha, u32 pp_IgnoreTexA, u32 pp_ShadInstr, u32 pp_Offset, - u32 pp_FogCtrl, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping); + u32 pp_FogCtrl, bool two_volumes, u32 pp_DepthFunc, bool pp_Gouraud, bool pp_BumpMap, bool fog_clamping, int pass); +void SetCull(u32 CulliMode); -bool CompilePipelineShader(PipelineShader* s); +extern const char *PixelPipelineShader; +bool CompilePipelineShader(PipelineShader* s, const char *source = PixelPipelineShader); #define TEXTURE_LOAD_ERROR 0 GLuint loadPNG(const string& subpath, int &width, int &height); +extern GLuint stencilTexId; +extern GLuint depthTexId; +extern GLuint opaqueTexId; +extern GLuint depthSaveTexId; + +#define SHADER_HEADER "#version 430 \n\ +\n\ +layout(r32ui, binding = 4) uniform coherent restrict uimage2D abufferPointerImg; \n\ +struct Pixel { \n\ + highp vec4 color; \n\ + highp float depth; \n\ + uint seq_num; \n\ + uint next; \n\ +}; \n\ +#define EOL 0xFFFFFFFFu \n\ +layout (binding = 0, std430) coherent restrict buffer PixelBuffer { \n\ + Pixel pixels[]; \n\ +}; \n\ +layout(binding = 0, offset = 0) uniform atomic_uint buffer_index; \n\ +\n\ +#define ZERO 0 \n\ +#define ONE 1 \n\ +#define OTHER_COLOR 2 \n\ +#define INVERSE_OTHER_COLOR 3 \n\ +#define SRC_ALPHA 4 \n\ +#define INVERSE_SRC_ALPHA 5 \n\ +#define DST_ALPHA 6 \n\ +#define INVERSE_DST_ALPHA 7 \n\ + \n\ +uint getNextPixelIndex() \n\ +{ \n\ + uint index = atomicCounterIncrement(buffer_index); \n\ + if (index >= pixels.length()) \n\ + // Buffer overflow \n\ + discard; \n\ + \n\ + return index; \n\ +} \n\ +\n\ +void setFragDepth(void) \n\ +{ \n\ + highp float w = 100000.0 * gl_FragCoord.w; \n\ + gl_FragDepth = 1.0 - log2(1.0 + w) / 34.0; \n\ +} \n\ +struct PolyParam { \n\ + int first; \n\ + int count; \n\ + int texid; \n\ + int tsp; \n\ + int tcw; \n\ + int pcw; \n\ + int isp; \n\ + float zvZ; \n\ + int tileclip; \n\ + int tsp1; \n\ + int tcw1; \n\ + int texid1; \n\ +}; \n\ +layout (binding = 1, std430) readonly buffer TrPolyParamBuffer { \n\ + PolyParam tr_poly_params[]; \n\ +}; \n\ + \n\ +#define GET_TSP_FOR_AREA int tsp; if (area1) tsp = pp.tsp1; else tsp = pp.tsp; \n\ + \n\ +int getSrcBlendFunc(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return (tsp >> 29) & 7; \n\ +} \n\ +\n\ +int getDstBlendFunc(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return (tsp >> 26) & 7; \n\ +} \n\ +\n\ +bool getSrcSelect(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return ((tsp >> 25) & 1) != 0; \n\ +} \n\ +\n\ +bool getDstSelect(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return ((tsp >> 24) & 1) != 0; \n\ +} \n\ +\n\ +int getFogControl(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return (tsp >> 22) & 3; \n\ +} \n\ +\n\ +bool getUseAlpha(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return ((tsp >> 20) & 1) != 0; \n\ +} \n\ +\n\ +bool getIgnoreTexAlpha(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return ((tsp >> 19) & 1) != 0; \n\ +} \n\ +\n\ +int getShadingInstruction(const PolyParam pp, bool area1) \n\ +{ \n\ + GET_TSP_FOR_AREA \n\ + return (tsp >> 6) & 3; \n\ +} \n\ +\n\ +int getDepthFunc(const PolyParam pp) \n\ +{ \n\ + return (pp.isp >> 29) & 7; \n\ +} \n\ +\n\ +bool getDepthMask(const PolyParam pp) \n\ +{ \n\ + return ((pp.isp >> 26) & 1) != 1; \n\ +} \n\ +\n\ +bool getShadowEnable(const PolyParam pp) \n\ +{ \n\ + return ((pp.pcw >> 7) & 1) != 0; \n\ +} \n\ +\n\ +uint getPolyNumber(const Pixel pixel) \n\ +{ \n\ + return pixel.seq_num & 0x3FFFFFFFu; \n\ +} \n\ +\n\ +#define SHADOW_STENCIL 0x40000000u \n\ +#define SHADOW_ACC 0x80000000u \n\ +\n\ +bool isShadowed(const Pixel pixel) \n\ +{ \n\ + return (pixel.seq_num & SHADOW_ACC) == SHADOW_ACC; \n\ +} \n\ +\n\ +bool isTwoVolumes(const PolyParam pp) \n\ +{ \n\ + return pp.tsp1 != -1 || pp.tcw1 != -1; \n\ +} \n\ + \n\ +" + +void SetupModvolVBO(); enum ModifierVolumeMode { Xor, Or, Inclusion, Exclusion, ModeCount }; extern struct ShaderUniforms_t { float PT_ALPHA; float scale_coefs[4]; - float depth_coefs[4]; float extra_depth_scale; float fog_den_float; float ps_FOG_COL_RAM[3]; float ps_FOG_COL_VERT[3]; + int poly_number; float trilinear_alpha; + TSP tsp0; + TSP tsp1; + TCW tcw0; + TCW tcw1; float fog_clamp_min[4]; float fog_clamp_max[4]; + void setUniformArray(GLuint location, int v0, int v1) + { + int array[] = { v0, v1 }; + glUniform1iv(location, 2, array); + } + void Set(PipelineShader* s) { if (s->cp_AlphaTestValue!=-1) @@ -171,9 +310,6 @@ extern struct ShaderUniforms_t if (s->scale!=-1) glUniform4fv( s->scale, 1, scale_coefs); - if (s->depth_scale!=-1) - glUniform4fv( s->depth_scale, 1, depth_coefs); - if (s->extra_depth_scale != -1) glUniform1f(s->extra_depth_scale, extra_depth_scale); @@ -186,6 +322,29 @@ extern struct ShaderUniforms_t if (s->sp_FOG_COL_VERT!=-1) glUniform3fv( s->sp_FOG_COL_VERT, 1, ps_FOG_COL_VERT); + if (s->shade_scale_factor != -1) + glUniform1f(s->shade_scale_factor, FPU_SHAD_SCALE.scale_factor / 256.f); + + if (s->blend_mode != -1) { + u32 blend_mode[] = { tsp0.SrcInstr, tsp0.DstInstr, tsp1.SrcInstr, tsp1.DstInstr }; + glUniform2iv(s->blend_mode, 2, (GLint *)blend_mode); + } + + if (s->use_alpha != -1) + setUniformArray(s->use_alpha, tsp0.UseAlpha, tsp1.UseAlpha); + + if (s->ignore_tex_alpha != -1) + setUniformArray(s->ignore_tex_alpha, tsp0.IgnoreTexA, tsp1.IgnoreTexA); + + if (s->shading_instr != -1) + setUniformArray(s->shading_instr, tsp0.ShadInstr, tsp1.ShadInstr); + + if (s->fog_control != -1) + setUniformArray(s->fog_control, tsp0.FogCtrl, tsp1.FogCtrl); + + if (s->pp_Number != -1) + glUniform1i(s->pp_Number, poly_number); + if (s->trilinear_alpha != -1) glUniform1f(s->trilinear_alpha, trilinear_alpha); diff --git a/core/rend/gles/gltex.cpp b/core/rend/gles/gltex.cpp index 40d309e77..8d9cc0283 100644 --- a/core/rend/gles/gltex.cpp +++ b/core/rend/gles/gltex.cpp @@ -206,7 +206,7 @@ static void dumpTexture(int texID, int w, int h, GLuint textype, void *temp_tex_ fclose(fp); for (int y = 0; y < h; y++) - free(rows[y]); + free(rows[y]); free(rows); } @@ -576,29 +576,26 @@ TextureCacheData *getTextureCacheData(TSP tsp, TCW tcw); struct FBT { u32 TexAddr; - GLuint depthb,stencilb; GLuint tex; GLuint fbo; }; FBT fb_rtt; -void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) +GLuint BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) { FBT& rv=fb_rtt; if (rv.fbo) glDeleteFramebuffers(1,&rv.fbo); if (rv.tex) glcache.DeleteTextures(1,&rv.tex); - if (rv.depthb) glDeleteRenderbuffers(1,&rv.depthb); - if (rv.stencilb) glDeleteRenderbuffers(1,&rv.stencilb); rv.TexAddr=addy>>3; - // Find the largest square power of two texture that fits into the viewport - int fbh2 = 2; + // Find the smallest power of two texture that fits the viewport + int fbh2 = 8; while (fbh2 < fbh) fbh2 *= 2; - int fbw2 = 2; + int fbw2 = 8; while (fbw2 < fbw) fbw2 *= 2; @@ -612,26 +609,6 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) // Get the currently bound frame buffer object. On most platforms this just gives 0. //glGetIntegerv(GL_FRAMEBUFFER_BINDING, &m_i32OriginalFbo); - // Generate and bind a render buffer which will become a depth buffer shared between our two FBOs - glGenRenderbuffers(1, &rv.depthb); - glBindRenderbuffer(GL_RENDERBUFFER, rv.depthb); - - /* - Currently it is unknown to GL that we want our new render buffer to be a depth buffer. - glRenderbufferStorage will fix this and in this case will allocate a depth buffer - m_i32TexSize by m_i32TexSize. - */ - -#ifdef GLES - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24_OES, fbw2, fbh2); -#else - glRenderbufferStorage(GL_RENDERBUFFER, GL_DEPTH_COMPONENT24, fbw2, fbh2); -#endif - - glGenRenderbuffers(1, &rv.stencilb); - glBindRenderbuffer(GL_RENDERBUFFER, rv.stencilb); - glRenderbufferStorage(GL_RENDERBUFFER, GL_STENCIL_INDEX8, fbw2, fbh2); - // Create a texture for rendering to rv.tex = glcache.GenTexture(); glcache.BindTexture(GL_TEXTURE_2D, rv.tex); @@ -645,15 +622,14 @@ void BindRTT(u32 addy, u32 fbw, u32 fbh, u32 channels, u32 fmt) // Attach the texture to the FBO glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, rv.tex, 0); - // Attach the depth buffer we created earlier to our FBO. - glFramebufferRenderbuffer(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_RENDERBUFFER, rv.depthb); - // Check that our FBO creation was successful GLuint uStatus = glCheckFramebufferStatus(GL_FRAMEBUFFER); verify(uStatus == GL_FRAMEBUFFER_COMPLETE); glViewport(0, 0, fbw, fbh); // TODO CLIP_X/Y min? + + return rv.fbo; } void ReadRTTBuffer() { @@ -798,10 +774,11 @@ void ReadRTTBuffer() { } fb_rtt.tex = 0; - if (fb_rtt.fbo) { glDeleteFramebuffers(1,&fb_rtt.fbo); fb_rtt.fbo = 0; } - if (fb_rtt.depthb) { glDeleteRenderbuffers(1,&fb_rtt.depthb); fb_rtt.depthb = 0; } - if (fb_rtt.stencilb) { glDeleteRenderbuffers(1,&fb_rtt.stencilb); fb_rtt.stencilb = 0; } - + if (fb_rtt.fbo) + { + glDeleteFramebuffers(1,&fb_rtt.fbo); + fb_rtt.fbo = 0; + } } static int TexCacheLookups; diff --git a/core/rend/soft/softrend.cpp b/core/rend/soft/softrend.cpp index 8125218db..719904913 100644 --- a/core/rend/soft/softrend.cpp +++ b/core/rend/soft/softrend.cpp @@ -20,6 +20,7 @@ #include #include +#include #include "rend/gles/gles.h" @@ -53,6 +54,54 @@ union m128i { uint32_t m128i_u32[4]; }; + +bool operator<(const PolyParam &left, const PolyParam &right) +{ +/* put any condition you want to sort on here */ + return left.zvZcount<2) + { + pp->zvZ=0; + } + else + { + u16* idx=idx_base+pp->first; + + Vertex* vtx=vtx_base+idx[0]; + Vertex* vtx_end=vtx_base + idx[pp->count-1]+1; + + u32 zv=0xFFFFFFFF; + while(vtx!=vtx_end) + { + zv=min(zv,(u32&)vtx->z); + vtx++; + } + + pp->zvZ=(f32&)zv; + } + pp++; + } + + std::stable_sort(pvrrc.global_param_tr.head() + first, pvrrc.global_param_tr.head() + first + count); +} + static __m128 _mm_load_scaled_float(float v, float s) { return _mm_setr_ps(v, v + s, v + s + s, v + s + s + s); diff --git a/shell/reicast.vcxproj b/shell/reicast.vcxproj index 17ac8b5ff..9484643b2 100644 --- a/shell/reicast.vcxproj +++ b/shell/reicast.vcxproj @@ -1,4 +1,4 @@ - + @@ -191,6 +191,7 @@ +