diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index f0aeec1a7..de27b6533 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -42,7 +42,7 @@ GLuint glprim[8] = GL_TRIANGLES, // With OpenGL ES we have to expand sprites into triangles, tripling the data instead of doubling. sigh. OpenGL ES, Y U NO SUPPORT GL_QUADS? }; -DecodedVertex decoded[65536]; +u8 decoded[65536 * 32]; TransformedVertex transformed[65536]; TransformedVertex transformedExpanded[65536]; uint16_t indexBuffer[65536]; // Unused @@ -270,8 +270,11 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte Lighter lighter; + VertexReader reader(decoded, dec.GetDecVtxFmt()); for (int index = indexLowerBound; index <= indexUpperBound; index++) { + reader.Goto(index); + float v[3] = {0, 0, 0}; float c0[4] = {1, 1, 1, 1}; float c1[4] = {0, 0, 0, 0}; @@ -280,11 +283,10 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (throughmode) { // Do not touch the coordinates or the colors. No lighting. - for (int j=0; j<3; j++) - v[j] = decoded[index].pos[j]; - if(dec.hasColor()) { - for (int j=0; j<4; j++) { - c0[j] = decoded[index].color[j] / 255.0f; + reader.ReadPos(v); + if (reader.hasColor0()) { + reader.ReadColor0(c0); + for (int j = 0; j < 4; j++) { c1[j] = 0.0f; } } @@ -296,48 +298,69 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte c0[3] = (gstate.materialalpha & 0xFF) / 255.f; } - // TODO : check if has uv - for (int j=0; j<2; j++) - uv[j] = decoded[index].uv[j]; - // Rescale UV? + if (reader.hasUV()) { + reader.ReadUV(uv); + } + // Scale UV? } else { // We do software T&L for now float out[3], norm[3]; + float pos[3], nrm[3]; + reader.ReadPos(pos); + if (reader.hasNormal()) + reader.ReadNrm(nrm); + if ((gstate.vertType & GE_VTYPE_WEIGHT_MASK) == GE_VTYPE_WEIGHT_NONE) { - Vec3ByMatrix43(out, decoded[index].pos, gstate.worldMatrix); - Norm3ByMatrix43(norm, decoded[index].normal, gstate.worldMatrix); + Vec3ByMatrix43(out, pos, gstate.worldMatrix); + if (reader.hasNormal()) { + Norm3ByMatrix43(norm, nrm, gstate.worldMatrix); + } else { + memset(norm, 0, 12); + } } else { + float weights[8]; + reader.ReadPos(pos); + if (reader.hasNormal()) { + reader.ReadNrm(nrm); + } else { + memset(nrm, 0, 12); + } + reader.ReadWeights(weights); // Skinning Vec3 psum(0,0,0); Vec3 nsum(0,0,0); int nweights = ((gstate.vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT) + 1; for (int i = 0; i < nweights; i++) { - if (decoded[index].weights[i] != 0.0f) { - Vec3ByMatrix43(out, decoded[index].pos, gstate.boneMatrix+i*12); - Norm3ByMatrix43(norm, decoded[index].normal, gstate.boneMatrix+i*12); - Vec3 tpos(out), tnorm(norm); - psum += tpos*decoded[index].weights[i]; - nsum += tnorm*decoded[index].weights[i]; + if (weights[i] != 0.0f) { + Vec3ByMatrix43(out, pos, gstate.boneMatrix+i*12); + Vec3 tpos(out); + psum += tpos * weights[i]; + if (reader.hasNormal()) { + Norm3ByMatrix43(norm, nrm, gstate.boneMatrix+i*12); + Vec3 tnorm(norm); + nsum += tnorm * weights[i]; + } } } - - nsum.Normalize(); - + Vec3ByMatrix43(out, psum.v, gstate.worldMatrix); - Norm3ByMatrix43(norm, nsum.v, gstate.worldMatrix); + if (reader.hasNormal()) { + nsum.Normalize(); + Norm3ByMatrix43(norm, nsum.v, gstate.worldMatrix); + } } // Perform lighting here if enabled. don't need to check through, it's checked above. float dots[4] = {0,0,0,0}; - float unlitColor[4]; - for (int j = 0; j < 4; j++) { - unlitColor[j] = decoded[index].color[j] / 255.0f; + float unlitColor[4] = {1, 1, 1, 1}; + if (reader.hasColor0()) { + reader.ReadColor0(unlitColor); } float litColor0[4]; float litColor1[4]; @@ -378,14 +401,16 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte if (customUV) { uv[0] = customUV[index * 2 + 0]*gstate_c.uScale + gstate_c.uOff; uv[1] = customUV[index * 2 + 1]*gstate_c.vScale + gstate_c.vOff; - } else { + } else if (reader.hasUV()) { + float ruv[2]; + reader.ReadUV(ruv); // Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights. switch (gstate.texmapmode & 0x3) { case 0: // UV mapping // Texture scale/offset is only performed in this mode. - uv[0] = decoded[index].uv[0]*gstate_c.uScale + gstate_c.uOff; - uv[1] = decoded[index].uv[1]*gstate_c.vScale + gstate_c.vOff; + uv[0] = ruv[0]*gstate_c.uScale + gstate_c.uOff; + uv[1] = ruv[1]*gstate_c.vScale + gstate_c.vOff; break; case 1: { @@ -394,10 +419,10 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte switch ((gstate.texmapmode >> 8) & 0x3) { case 0: // Use model space XYZ as source - source = decoded[index].pos; + source = pos; break; case 1: // Use unscaled UV as source - source = Vec3(decoded[index].uv[0], decoded[index].uv[1], 0.0f); + source = Vec3(ruv[0], ruv[1], 0.0f); break; case 2: // Use normalized normal as source source = Vec3(norm).Normalized(); @@ -406,6 +431,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte source = Vec3(norm); break; } + float uvw[3]; Vec3ByMatrix43(uvw, &source.x, gstate.tgenMatrix); uv[0] = uvw[0]; @@ -433,6 +459,8 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte // will be moved to hardware transform anyway. Vec3ByMatrix43(v, out, gstate.viewMatrix); } + + // TODO: Write to a flexible buffer. memcpy(&transformed[index].x, v, 3 * sizeof(float)); memcpy(&transformed[index].uv, uv, 2 * sizeof(float)); memcpy(&transformed[index].color0, c0, 4 * sizeof(float)); diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index 91f9aebf0..fd94ecae3 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -85,7 +85,7 @@ DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt) { } void VertexDecoder::SetVertexType(u32 fmt) { - fmt = fmt; + fmt_ = fmt; throughmode = (fmt & GE_VTYPE_THROUGH) != 0; int biggest = 0; @@ -165,6 +165,8 @@ void VertexDecoder::SetVertexType(u32 fmt) { case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break; case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break; } + // Actually, temporarily let's not. + decFmt.nrmfmt = DEC_FLOAT_3; decFmt.nrmoff = decOff; decOff += DecFmtSize(decFmt.nrmfmt); } @@ -186,10 +188,13 @@ void VertexDecoder::SetVertexType(u32 fmt) { case GE_VTYPE_POS_16BIT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_S16_3; break; case GE_VTYPE_POS_FLOAT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_FLOAT_3; break; } + // Actually, temporarily let's not. + decFmt.posfmt = DEC_FLOAT_3; } decFmt.posoff = decOff; decOff += DecFmtSize(decFmt.posfmt); } + decFmt.stride = decOff; size = align(size, biggest); onesize_ = size; @@ -197,14 +202,12 @@ void VertexDecoder::SetVertexType(u32 fmt) { DEBUG_LOG(G3D,"SVT : size = %i, aligned to biggest %i", size, biggest); } -void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const +void VertexDecoder::DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const { // TODO: Remove if (morphcount == 1) gstate_c.morphWeights[0] = 1.0f; - char *ptr = (char *)verts; - // Find index bounds. Could cache this in display lists. int lowerBound = 0x7FFFFFFF; int upperBound = 0; @@ -234,10 +237,10 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const // Decode the vertices within the found bounds, once each (unlike the previous way..) for (int index = lowerBound; index <= upperBound; index++) { - ptr = (char*)verts + (index * size); + u8 *ptr = (u8*)verts + (index * size); // TODO: Should weights be morphed? - float *wt = decoded[index].weights; + float *wt = (float *)decoded; switch (weighttype) { case GE_VTYPE_WEIGHT_NONE >> 9: @@ -267,26 +270,28 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const } break; } + if (weighttype) + decoded += nweights * sizeof(float); // TODO: Not morphing UV yet - float *uv = decoded[index].uv; switch (tc) { case GE_VTYPE_TC_NONE: - uv[0] = 0.0f; - uv[1] = 0.0f; break; case GE_VTYPE_TC_8BIT: { + float *uv = (float *)decoded; const u8 *uvdata = (const u8*)(ptr + tcoff); for (int j = 0; j < 2; j++) uv[j] = (float)uvdata[j] / 128.0f; + decoded += 2 * sizeof(float); break; } case GE_VTYPE_TC_16BIT: { + float *uv = (float *)decoded; const u16 *uvdata = (const u16*)(ptr + tcoff); if (throughmode) { @@ -298,11 +303,13 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const uv[0] = (float)uvdata[0] / 32768.0f; uv[1] = (float)uvdata[1] / 32768.0f; } + decoded += 2 * sizeof(float); } break; case GE_VTYPE_TC_FLOAT: { + float *uv = (float *)decoded; const float *uvdata = (const float*)(ptr + tcoff); if (throughmode) { uv[0] = uvdata[0] / (float)(gstate_c.curTextureWidth); @@ -311,97 +318,103 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const uv[0] = uvdata[0]; uv[1] = uvdata[1]; } + decoded += 2 * sizeof(float); } break; } // TODO: Not morphing color yet - u8 *c = decoded[index].color; switch (col) { case GE_VTYPE_COL_4444 >> 2: { + u8 *c = decoded; u16 cdata = *(u16*)(ptr + coloff); for (int j = 0; j < 4; j++) c[j] = Convert4To8((cdata >> (j * 4)) & 0xF); + decoded += 4; } break; case GE_VTYPE_COL_565 >> 2: { + u8 *c = decoded; u16 cdata = *(u16*)(ptr + coloff); c[0] = Convert5To8(cdata & 0x1f); c[1] = Convert6To8((cdata>>5) & 0x3f); c[2] = Convert5To8((cdata>>11) & 0x1f); c[3] = 1.0f; + decoded += 4; } break; case GE_VTYPE_COL_5551 >> 2: { + u8 *c = decoded; u16 cdata = *(u16*)(ptr + coloff); c[0] = Convert5To8(cdata & 0x1f); c[1] = Convert5To8((cdata>>5) & 0x1f); c[2] = Convert5To8((cdata>>10) & 0x1f); c[3] = (cdata>>15) ? 255 : 0; + decoded += 4; } break; case GE_VTYPE_COL_8888 >> 2: { + u8 *c = decoded; // TODO: speedup u8 *cdata = (u8*)(ptr + coloff); for (int j = 0; j < 4; j++) c[j] = cdata[j]; + decoded += 4; } break; default: - c[0] = 255; - c[1] = 255; - c[2] = 255; - c[3] = 255; break; } - float *normal = decoded[index].normal; - memset(normal, 0, sizeof(float)*3); - for (int n = 0; n < morphcount; n++) - { - float multiplier = gstate_c.morphWeights[n]; - if (gstate.reversenormals & 0xFFFFFF) { - multiplier = -multiplier; - } - switch (nrm) + float *normal = (float *)decoded; + if (nrm) { + memset(normal, 0, sizeof(float)*3); + for (int n = 0; n < morphcount; n++) { - case GE_VTYPE_NRM_8BIT: - { - const s8 *sv = (const s8*)(ptr + onesize_*n + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] += (sv[j]/127.0f) * multiplier; + float multiplier = gstate_c.morphWeights[n]; + if (gstate.reversenormals & 0xFFFFFF) { + multiplier = -multiplier; } - break; + switch (nrm) + { + case GE_VTYPE_NRM_8BIT: + { + const s8 *sv = (const s8*)(ptr + onesize_*n + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] += (sv[j]/127.0f) * multiplier; + } + break; - case GE_VTYPE_NRM_FLOAT >> 5: - { - const float *fv = (const float*)(ptr + onesize_*n + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] += fv[j] * multiplier; - } - break; + case GE_VTYPE_NRM_FLOAT >> 5: + { + const float *fv = (const float*)(ptr + onesize_*n + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] += fv[j] * multiplier; + } + break; - case GE_VTYPE_NRM_16BIT >> 5: - { - const short *sv = (const short*)(ptr + onesize_*n + nrmoff); - for (int j = 0; j < 3; j++) - normal[j] += (sv[j]/32767.0f) * multiplier; + case GE_VTYPE_NRM_16BIT >> 5: + { + const short *sv = (const short*)(ptr + onesize_*n + nrmoff); + for (int j = 0; j < 3; j++) + normal[j] += (sv[j]/32767.0f) * multiplier; + } + break; } - break; } + decoded += 12; } - float *v = decoded[index].pos; - + float *v = (float *)decoded; if (morphcount == 1) { switch (pos) { @@ -475,6 +488,7 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const } } } + decoded += 12; } } diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index fb02f4221..b2b3b2eee 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -92,12 +92,12 @@ public: void SetVertexType(u32 vtype); const DecVtxFormat &GetDecVtxFmt() { return decFmt; } - void DecodeVerts(DecodedVertex *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const; + void DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const; bool hasColor() const { return col != 0; } int VertexSize() const { return size; } private: - u32 fmt; + u32 fmt_; DecVtxFormat decFmt; bool throughmode; @@ -121,16 +121,17 @@ private: int nweights; }; - // Reads decoded vertex formats in a convenient way. For software transform and debugging. class VertexReader { public: - VertexReader(u8 *data, const DecVtxFormat &decFmt) : data_(data), decFmt_(decFmt) {} + VertexReader(u8 *base, const DecVtxFormat &decFmt) : base_(base), data_(base), decFmt_(decFmt) {} void ReadPos(float pos[3]) { switch (decFmt_.posfmt) { - case DEC_FLOAT_3: memcpy(pos, data_ + decFmt_.posoff, 12); break; + case DEC_FLOAT_3: + memcpy(pos, data_ + decFmt_.posoff, 12); + break; case DEC_S16_3: { s16 *p = (s16 *)(data_ + decFmt_.posoff); @@ -149,8 +150,10 @@ public: } void ReadNrm(float nrm[3]) { - switch (decFmt_.nrmoff) { - case DEC_FLOAT_3: memcpy(nrm, data_ + decFmt_.nrmoff, 12); break; + switch (decFmt_.nrmfmt) { + case DEC_FLOAT_3: + memcpy(nrm, data_ + decFmt_.nrmoff, 12); + break; case DEC_S16_3: { s16 *p = (s16 *)(data_ + decFmt_.nrmoff); @@ -171,7 +174,7 @@ public: void ReadUV(float uv[2]) { switch (decFmt_.uvfmt) { case DEC_FLOAT_2: - memcpy(uv, data_ + decFmt_.nrmoff, 8); break; + memcpy(uv, data_ + decFmt_.uvoff, 8); break; } } @@ -218,11 +221,16 @@ public: } } - void Next() { - data_ += decFmt_.stride; + bool hasColor0() const { return decFmt_.c0fmt != 0; } + bool hasNormal() const { return decFmt_.nrmfmt != 0; } + bool hasUV() const { return decFmt_.uvfmt != 0; } + + void Goto(int index) { + data_ = base_ + index * decFmt_.stride; } private: + u8 *base_; u8 *data_; DecVtxFormat decFmt_; int vtype_;