Use flexible vertex formats pre-transform. Saves memory b/w.

This commit is contained in:
Henrik Rydgard 2012-12-19 20:21:59 +01:00
parent b8d596cbec
commit 13460b7aa6
3 changed files with 134 additions and 84 deletions

View file

@ -42,7 +42,7 @@ GLuint glprim[8] =
GL_TRIANGLES, // With OpenGL ES we have to expand sprites into triangles, tripling the data instead of doubling. sigh. OpenGL ES, Y U NO SUPPORT GL_QUADS?
};
DecodedVertex decoded[65536];
u8 decoded[65536 * 32];
TransformedVertex transformed[65536];
TransformedVertex transformedExpanded[65536];
uint16_t indexBuffer[65536]; // Unused
@ -270,8 +270,11 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte
Lighter lighter;
VertexReader reader(decoded, dec.GetDecVtxFmt());
for (int index = indexLowerBound; index <= indexUpperBound; index++)
{
reader.Goto(index);
float v[3] = {0, 0, 0};
float c0[4] = {1, 1, 1, 1};
float c1[4] = {0, 0, 0, 0};
@ -280,11 +283,10 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte
if (throughmode)
{
// Do not touch the coordinates or the colors. No lighting.
for (int j=0; j<3; j++)
v[j] = decoded[index].pos[j];
if(dec.hasColor()) {
for (int j=0; j<4; j++) {
c0[j] = decoded[index].color[j] / 255.0f;
reader.ReadPos(v);
if (reader.hasColor0()) {
reader.ReadColor0(c0);
for (int j = 0; j < 4; j++) {
c1[j] = 0.0f;
}
}
@ -296,48 +298,69 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte
c0[3] = (gstate.materialalpha & 0xFF) / 255.f;
}
// TODO : check if has uv
for (int j=0; j<2; j++)
uv[j] = decoded[index].uv[j];
// Rescale UV?
if (reader.hasUV()) {
reader.ReadUV(uv);
}
// Scale UV?
}
else
{
// We do software T&L for now
float out[3], norm[3];
float pos[3], nrm[3];
reader.ReadPos(pos);
if (reader.hasNormal())
reader.ReadNrm(nrm);
if ((gstate.vertType & GE_VTYPE_WEIGHT_MASK) == GE_VTYPE_WEIGHT_NONE)
{
Vec3ByMatrix43(out, decoded[index].pos, gstate.worldMatrix);
Norm3ByMatrix43(norm, decoded[index].normal, gstate.worldMatrix);
Vec3ByMatrix43(out, pos, gstate.worldMatrix);
if (reader.hasNormal()) {
Norm3ByMatrix43(norm, nrm, gstate.worldMatrix);
} else {
memset(norm, 0, 12);
}
}
else
{
float weights[8];
reader.ReadPos(pos);
if (reader.hasNormal()) {
reader.ReadNrm(nrm);
} else {
memset(nrm, 0, 12);
}
reader.ReadWeights(weights);
// Skinning
Vec3 psum(0,0,0);
Vec3 nsum(0,0,0);
int nweights = ((gstate.vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT) + 1;
for (int i = 0; i < nweights; i++)
{
if (decoded[index].weights[i] != 0.0f) {
Vec3ByMatrix43(out, decoded[index].pos, gstate.boneMatrix+i*12);
Norm3ByMatrix43(norm, decoded[index].normal, gstate.boneMatrix+i*12);
Vec3 tpos(out), tnorm(norm);
psum += tpos*decoded[index].weights[i];
nsum += tnorm*decoded[index].weights[i];
if (weights[i] != 0.0f) {
Vec3ByMatrix43(out, pos, gstate.boneMatrix+i*12);
Vec3 tpos(out);
psum += tpos * weights[i];
if (reader.hasNormal()) {
Norm3ByMatrix43(norm, nrm, gstate.boneMatrix+i*12);
Vec3 tnorm(norm);
nsum += tnorm * weights[i];
}
}
}
nsum.Normalize();
Vec3ByMatrix43(out, psum.v, gstate.worldMatrix);
Norm3ByMatrix43(norm, nsum.v, gstate.worldMatrix);
if (reader.hasNormal()) {
nsum.Normalize();
Norm3ByMatrix43(norm, nsum.v, gstate.worldMatrix);
}
}
// Perform lighting here if enabled. don't need to check through, it's checked above.
float dots[4] = {0,0,0,0};
float unlitColor[4];
for (int j = 0; j < 4; j++) {
unlitColor[j] = decoded[index].color[j] / 255.0f;
float unlitColor[4] = {1, 1, 1, 1};
if (reader.hasColor0()) {
reader.ReadColor0(unlitColor);
}
float litColor0[4];
float litColor1[4];
@ -378,14 +401,16 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte
if (customUV) {
uv[0] = customUV[index * 2 + 0]*gstate_c.uScale + gstate_c.uOff;
uv[1] = customUV[index * 2 + 1]*gstate_c.vScale + gstate_c.vOff;
} else {
} else if (reader.hasUV()) {
float ruv[2];
reader.ReadUV(ruv);
// Perform texture coordinate generation after the transform and lighting - one style of UV depends on lights.
switch (gstate.texmapmode & 0x3)
{
case 0: // UV mapping
// Texture scale/offset is only performed in this mode.
uv[0] = decoded[index].uv[0]*gstate_c.uScale + gstate_c.uOff;
uv[1] = decoded[index].uv[1]*gstate_c.vScale + gstate_c.vOff;
uv[0] = ruv[0]*gstate_c.uScale + gstate_c.uOff;
uv[1] = ruv[1]*gstate_c.vScale + gstate_c.vOff;
break;
case 1:
{
@ -394,10 +419,10 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte
switch ((gstate.texmapmode >> 8) & 0x3)
{
case 0: // Use model space XYZ as source
source = decoded[index].pos;
source = pos;
break;
case 1: // Use unscaled UV as source
source = Vec3(decoded[index].uv[0], decoded[index].uv[1], 0.0f);
source = Vec3(ruv[0], ruv[1], 0.0f);
break;
case 2: // Use normalized normal as source
source = Vec3(norm).Normalized();
@ -406,6 +431,7 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte
source = Vec3(norm);
break;
}
float uvw[3];
Vec3ByMatrix43(uvw, &source.x, gstate.tgenMatrix);
uv[0] = uvw[0];
@ -433,6 +459,8 @@ void GLES_GPU::TransformAndDrawPrim(void *verts, void *inds, int prim, int verte
// will be moved to hardware transform anyway.
Vec3ByMatrix43(v, out, gstate.viewMatrix);
}
// TODO: Write to a flexible buffer.
memcpy(&transformed[index].x, v, 3 * sizeof(float));
memcpy(&transformed[index].uv, uv, 2 * sizeof(float));
memcpy(&transformed[index].color0, c0, 4 * sizeof(float));

View file

@ -85,7 +85,7 @@ DecVtxFormat GetTransformedVtxFormat(const DecVtxFormat &fmt) {
}
void VertexDecoder::SetVertexType(u32 fmt) {
fmt = fmt;
fmt_ = fmt;
throughmode = (fmt & GE_VTYPE_THROUGH) != 0;
int biggest = 0;
@ -165,6 +165,8 @@ void VertexDecoder::SetVertexType(u32 fmt) {
case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break;
case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break;
}
// Actually, temporarily let's not.
decFmt.nrmfmt = DEC_FLOAT_3;
decFmt.nrmoff = decOff;
decOff += DecFmtSize(decFmt.nrmfmt);
}
@ -186,10 +188,13 @@ void VertexDecoder::SetVertexType(u32 fmt) {
case GE_VTYPE_POS_16BIT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_S16_3; break;
case GE_VTYPE_POS_FLOAT >> GE_VTYPE_POS_SHIFT: decFmt.posfmt = DEC_FLOAT_3; break;
}
// Actually, temporarily let's not.
decFmt.posfmt = DEC_FLOAT_3;
}
decFmt.posoff = decOff;
decOff += DecFmtSize(decFmt.posfmt);
}
decFmt.stride = decOff;
size = align(size, biggest);
onesize_ = size;
@ -197,14 +202,12 @@ void VertexDecoder::SetVertexType(u32 fmt) {
DEBUG_LOG(G3D,"SVT : size = %i, aligned to biggest %i", size, biggest);
}
void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const
void VertexDecoder::DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const
{
// TODO: Remove
if (morphcount == 1)
gstate_c.morphWeights[0] = 1.0f;
char *ptr = (char *)verts;
// Find index bounds. Could cache this in display lists.
int lowerBound = 0x7FFFFFFF;
int upperBound = 0;
@ -234,10 +237,10 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const
// Decode the vertices within the found bounds, once each (unlike the previous way..)
for (int index = lowerBound; index <= upperBound; index++)
{
ptr = (char*)verts + (index * size);
u8 *ptr = (u8*)verts + (index * size);
// TODO: Should weights be morphed?
float *wt = decoded[index].weights;
float *wt = (float *)decoded;
switch (weighttype)
{
case GE_VTYPE_WEIGHT_NONE >> 9:
@ -267,26 +270,28 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const
}
break;
}
if (weighttype)
decoded += nweights * sizeof(float);
// TODO: Not morphing UV yet
float *uv = decoded[index].uv;
switch (tc)
{
case GE_VTYPE_TC_NONE:
uv[0] = 0.0f;
uv[1] = 0.0f;
break;
case GE_VTYPE_TC_8BIT:
{
float *uv = (float *)decoded;
const u8 *uvdata = (const u8*)(ptr + tcoff);
for (int j = 0; j < 2; j++)
uv[j] = (float)uvdata[j] / 128.0f;
decoded += 2 * sizeof(float);
break;
}
case GE_VTYPE_TC_16BIT:
{
float *uv = (float *)decoded;
const u16 *uvdata = (const u16*)(ptr + tcoff);
if (throughmode)
{
@ -298,11 +303,13 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const
uv[0] = (float)uvdata[0] / 32768.0f;
uv[1] = (float)uvdata[1] / 32768.0f;
}
decoded += 2 * sizeof(float);
}
break;
case GE_VTYPE_TC_FLOAT:
{
float *uv = (float *)decoded;
const float *uvdata = (const float*)(ptr + tcoff);
if (throughmode) {
uv[0] = uvdata[0] / (float)(gstate_c.curTextureWidth);
@ -311,97 +318,103 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const
uv[0] = uvdata[0];
uv[1] = uvdata[1];
}
decoded += 2 * sizeof(float);
}
break;
}
// TODO: Not morphing color yet
u8 *c = decoded[index].color;
switch (col)
{
case GE_VTYPE_COL_4444 >> 2:
{
u8 *c = decoded;
u16 cdata = *(u16*)(ptr + coloff);
for (int j = 0; j < 4; j++)
c[j] = Convert4To8((cdata >> (j * 4)) & 0xF);
decoded += 4;
}
break;
case GE_VTYPE_COL_565 >> 2:
{
u8 *c = decoded;
u16 cdata = *(u16*)(ptr + coloff);
c[0] = Convert5To8(cdata & 0x1f);
c[1] = Convert6To8((cdata>>5) & 0x3f);
c[2] = Convert5To8((cdata>>11) & 0x1f);
c[3] = 1.0f;
decoded += 4;
}
break;
case GE_VTYPE_COL_5551 >> 2:
{
u8 *c = decoded;
u16 cdata = *(u16*)(ptr + coloff);
c[0] = Convert5To8(cdata & 0x1f);
c[1] = Convert5To8((cdata>>5) & 0x1f);
c[2] = Convert5To8((cdata>>10) & 0x1f);
c[3] = (cdata>>15) ? 255 : 0;
decoded += 4;
}
break;
case GE_VTYPE_COL_8888 >> 2:
{
u8 *c = decoded;
// TODO: speedup
u8 *cdata = (u8*)(ptr + coloff);
for (int j = 0; j < 4; j++)
c[j] = cdata[j];
decoded += 4;
}
break;
default:
c[0] = 255;
c[1] = 255;
c[2] = 255;
c[3] = 255;
break;
}
float *normal = decoded[index].normal;
memset(normal, 0, sizeof(float)*3);
for (int n = 0; n < morphcount; n++)
{
float multiplier = gstate_c.morphWeights[n];
if (gstate.reversenormals & 0xFFFFFF) {
multiplier = -multiplier;
}
switch (nrm)
float *normal = (float *)decoded;
if (nrm) {
memset(normal, 0, sizeof(float)*3);
for (int n = 0; n < morphcount; n++)
{
case GE_VTYPE_NRM_8BIT:
{
const s8 *sv = (const s8*)(ptr + onesize_*n + nrmoff);
for (int j = 0; j < 3; j++)
normal[j] += (sv[j]/127.0f) * multiplier;
float multiplier = gstate_c.morphWeights[n];
if (gstate.reversenormals & 0xFFFFFF) {
multiplier = -multiplier;
}
break;
switch (nrm)
{
case GE_VTYPE_NRM_8BIT:
{
const s8 *sv = (const s8*)(ptr + onesize_*n + nrmoff);
for (int j = 0; j < 3; j++)
normal[j] += (sv[j]/127.0f) * multiplier;
}
break;
case GE_VTYPE_NRM_FLOAT >> 5:
{
const float *fv = (const float*)(ptr + onesize_*n + nrmoff);
for (int j = 0; j < 3; j++)
normal[j] += fv[j] * multiplier;
}
break;
case GE_VTYPE_NRM_FLOAT >> 5:
{
const float *fv = (const float*)(ptr + onesize_*n + nrmoff);
for (int j = 0; j < 3; j++)
normal[j] += fv[j] * multiplier;
}
break;
case GE_VTYPE_NRM_16BIT >> 5:
{
const short *sv = (const short*)(ptr + onesize_*n + nrmoff);
for (int j = 0; j < 3; j++)
normal[j] += (sv[j]/32767.0f) * multiplier;
case GE_VTYPE_NRM_16BIT >> 5:
{
const short *sv = (const short*)(ptr + onesize_*n + nrmoff);
for (int j = 0; j < 3; j++)
normal[j] += (sv[j]/32767.0f) * multiplier;
}
break;
}
break;
}
decoded += 12;
}
float *v = decoded[index].pos;
float *v = (float *)decoded;
if (morphcount == 1) {
switch (pos)
{
@ -475,6 +488,7 @@ void VertexDecoder::DecodeVerts(DecodedVertex *decoded, const void *verts, const
}
}
}
decoded += 12;
}
}

View file

@ -92,12 +92,12 @@ public:
void SetVertexType(u32 vtype);
const DecVtxFormat &GetDecVtxFmt() { return decFmt; }
void DecodeVerts(DecodedVertex *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const;
void DecodeVerts(u8 *decoded, const void *verts, const void *inds, int prim, int count, int *indexLowerBound, int *indexUpperBound) const;
bool hasColor() const { return col != 0; }
int VertexSize() const { return size; }
private:
u32 fmt;
u32 fmt_;
DecVtxFormat decFmt;
bool throughmode;
@ -121,16 +121,17 @@ private:
int nweights;
};
// Reads decoded vertex formats in a convenient way. For software transform and debugging.
class VertexReader
{
public:
VertexReader(u8 *data, const DecVtxFormat &decFmt) : data_(data), decFmt_(decFmt) {}
VertexReader(u8 *base, const DecVtxFormat &decFmt) : base_(base), data_(base), decFmt_(decFmt) {}
void ReadPos(float pos[3]) {
switch (decFmt_.posfmt) {
case DEC_FLOAT_3: memcpy(pos, data_ + decFmt_.posoff, 12); break;
case DEC_FLOAT_3:
memcpy(pos, data_ + decFmt_.posoff, 12);
break;
case DEC_S16_3:
{
s16 *p = (s16 *)(data_ + decFmt_.posoff);
@ -149,8 +150,10 @@ public:
}
void ReadNrm(float nrm[3]) {
switch (decFmt_.nrmoff) {
case DEC_FLOAT_3: memcpy(nrm, data_ + decFmt_.nrmoff, 12); break;
switch (decFmt_.nrmfmt) {
case DEC_FLOAT_3:
memcpy(nrm, data_ + decFmt_.nrmoff, 12);
break;
case DEC_S16_3:
{
s16 *p = (s16 *)(data_ + decFmt_.nrmoff);
@ -171,7 +174,7 @@ public:
void ReadUV(float uv[2]) {
switch (decFmt_.uvfmt) {
case DEC_FLOAT_2:
memcpy(uv, data_ + decFmt_.nrmoff, 8); break;
memcpy(uv, data_ + decFmt_.uvoff, 8); break;
}
}
@ -218,11 +221,16 @@ public:
}
}
void Next() {
data_ += decFmt_.stride;
bool hasColor0() const { return decFmt_.c0fmt != 0; }
bool hasNormal() const { return decFmt_.nrmfmt != 0; }
bool hasUV() const { return decFmt_.uvfmt != 0; }
void Goto(int index) {
data_ = base_ + index * decFmt_.stride;
}
private:
u8 *base_;
u8 *data_;
DecVtxFormat decFmt_;
int vtype_;