diff --git a/GPU/Common/DrawEngineCommon.cpp b/GPU/Common/DrawEngineCommon.cpp index 234d2af22..2222a3736 100644 --- a/GPU/Common/DrawEngineCommon.cpp +++ b/GPU/Common/DrawEngineCommon.cpp @@ -74,7 +74,7 @@ VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) { int DrawEngineCommon::ComputeNumVertsToDecode() const { int vertsToDecode = 0; - if (drawCalls_[0].indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) { + if (drawCalls_[0].IndexType() == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) { for (int i = 0; i < numDrawCalls_; i++) { const DeferredDrawCall &dc = drawCalls_[i]; vertsToDecode += dc.vertexCount; @@ -180,6 +180,8 @@ void DrawEngineCommon::NotifyConfigChanged() { decJitCache_->Clear(); lastVType_ = -1; dec_ = nullptr; + // Just make sure there's no pending draw, since we wipe the decoders. There shouldn't be one. + numDrawCalls_ = 0; decoderMap_.Iterate([&](const uint32_t vtype, VertexDecoder *decoder) { delete decoder; }); @@ -621,9 +623,12 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) { int indexLowerBound = dc.indexLowerBound; int indexUpperBound = dc.indexUpperBound; - if (dc.indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) { + int indexType = dc.IndexType(); + const VertexDecoder *dec = dc.dec; + + if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) { // Decode the verts (and at the same time apply morphing/skinning). Simple. - dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride, + dec->DecodeVerts(dest + decodedVerts * (int)dec->GetDecVtxFmt().stride, dc.verts, indexLowerBound, indexUpperBound); decodedVerts += indexUpperBound - indexLowerBound + 1; @@ -637,13 +642,14 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) { // inds pointer but the same base vertex pointer. We'd like to reuse vertices between // these as much as possible, so we make sure here to combine as many as possible // into one nice big drawcall, sharing data. + // NOTE: We can't do that if the vertex decoder changes, so let's check for that. // 1. Look ahead to find the max index, only looking as "matching" drawcalls. // Expand the lower and upper bounds as we go. int lastMatch = i; const int total = numDrawCalls_; for (int j = i + 1; j < total; ++j) { - if (drawCalls_[j].verts != dc.verts) + if (drawCalls_[j].verts != dc.verts || drawCalls_[j].dec != dc.dec) break; indexLowerBound = std::min(indexLowerBound, (int)drawCalls_[j].indexLowerBound); @@ -652,7 +658,7 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) { } // 2. Loop through the drawcalls, translating indices as we go. - switch (dc.indexType) { + switch (indexType) { case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT: for (int j = i; j <= lastMatch; j++) { bool clockwise = true; @@ -690,7 +696,7 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) { } // 3. Decode that range of vertex data. - dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride, + dec->DecodeVerts(dest + decodedVerts * (int)dec->GetDecVtxFmt().stride, dc.verts, indexLowerBound, indexUpperBound); decodedVerts += vertexCount; @@ -792,7 +798,7 @@ inline uint32_t lowbias32_r(uint32_t x) { return x; } -// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits. +// vertTypeID is the vertex type BUT with the UVGen mode smashed into the top bits. void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) { if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls_ >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) { DispatchFlush(); @@ -834,6 +840,7 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti dc.verts = verts; dc.inds = inds; dc.vertexCount = vertexCount; + dc.dec = dec_; dc.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT; dc.prim = prim; dc.cullMode = cullMode; diff --git a/GPU/Common/DrawEngineCommon.h b/GPU/Common/DrawEngineCommon.h index c9b51ff02..e2411d1c3 100644 --- a/GPU/Common/DrawEngineCommon.h +++ b/GPU/Common/DrawEngineCommon.h @@ -189,7 +189,7 @@ protected: u16 *decIndex_ = nullptr; // Cached vertex decoders - u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_... + u32 lastVType_ = -1; // corresponds to dec_, but also has a few extra bits (texgen type). DenseHashMap decoderMap_; VertexDecoder *dec_ = nullptr; VertexDecoderJitCache *decJitCache_ = nullptr; @@ -202,6 +202,7 @@ protected: struct DeferredDrawCall { const void *verts; const void *inds; + VertexDecoder *dec; u32 vertexCount; u8 indexType; s8 prim; @@ -209,6 +210,9 @@ protected: u16 indexLowerBound; u16 indexUpperBound; UVScale uvScale; + int IndexType() const { + return (dec->VertexType() & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT; + } }; enum { MAX_DEFERRED_DRAW_CALLS = 128 }; diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp index 915b89e54..592db5b2c 100644 --- a/GPU/Common/VertexDecoderCommon.cpp +++ b/GPU/Common/VertexDecoderCommon.cpp @@ -1063,6 +1063,8 @@ static const StepFunction posstep_through[4] = { &VertexDecoder::Step_PosFloatThrough, }; +// IMPORTANT: When changing how the formats map, your changes must match the rules +// in IsVTypeCompatible in GPUCommonHW. See the comments on that function. void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache) { fmt_ = fmt; throughmode = (fmt & GE_VTYPE_THROUGH) != 0; diff --git a/GPU/GPUCommonHW.cpp b/GPU/GPUCommonHW.cpp index 9587fab0a..498bc91e5 100644 --- a/GPU/GPUCommonHW.cpp +++ b/GPU/GPUCommonHW.cpp @@ -54,7 +54,7 @@ const CommonCommandTableEntry commonCommandTable[] = { { GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Spline }, // Changing the vertex type requires us to flush. - { GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommonHW::Execute_VertexType }, + { GE_CMD_VERTEXTYPE, FLAG_EXECUTEONCHANGE, 0, &GPUCommonHW::Execute_VertexType }, { GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommonHW::Execute_LoadClut}, @@ -435,10 +435,8 @@ void GPUCommonHW::DeviceRestore(Draw::DrawContext *draw) { void GPUCommonHW::UpdateCmdInfo() { if (g_Config.bSoftwareSkinning) { - cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE; cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexTypeSkinning; } else { - cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE; cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexType; } @@ -826,34 +824,78 @@ void GPUCommonHW::FastRunLoop(DisplayList &list) { downcount = 0; } -void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) { - if (diff) { - // TODO: We only need to dirty vshader-state here if the output format will be different. - gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); +// This is tricky - the rules of this needs to match how the vertex decoder behaves. If it always produces +// the same output format for a given component, then we check that existence matches. This is valid for: +// * Color +// * Position (though existence is always true) +// * Texcoords +// * Morph weight count (though not format! there are two!) +// * Skin weight count if using software skinning (more restricted with hardware skinning) +// Note that the following are different: +// * Normals (two different output formats, s8 and float) +static bool IsVTypeCompatibleSkinning(u32 prev, u32 diff) { + // Did anything outside the simple component types and weightcount change? + if ((diff & ~(GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_WEIGHTCOUNT_MASK | GE_VTYPE_TC_MASK | GE_VTYPE_COL_MASK | GE_VTYPE_POS_MASK)) != 0) + return false; + u32 cur = prev ^ diff; + if (((prev & GE_VTYPE_TC_MASK) != 0) != ((cur & GE_VTYPE_TC_MASK) != 0)) + return false; + if (((prev & GE_VTYPE_COL_MASK) != 0) != ((cur & GE_VTYPE_COL_MASK) != 0)) + return false; + return true; +} +static bool IsVTypeCompatible(u32 prev, u32 diff) { + // Did anything outside the simple component types and weightcount change? + if ((diff & ~(GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_TC_MASK | GE_VTYPE_COL_MASK | GE_VTYPE_POS_MASK)) != 0) + return false; + u32 cur = prev ^ diff; + if (((prev & GE_VTYPE_TC_MASK) != 0) != ((cur & GE_VTYPE_TC_MASK) != 0)) + return false; + if (((prev & GE_VTYPE_COL_MASK) != 0) != ((cur & GE_VTYPE_COL_MASK) != 0)) + return false; + return true; +} - if (diff & GE_VTYPE_THROUGH_MASK) { - // Switching between through and non-through, we need to invalidate a bunch of stuff. - gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE); - } + +void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) { + if (!diff) { + return; + } + + u32 prevType = gstate.vertType ^ diff; + if (!IsVTypeCompatible(prevType, diff)) { + // Restore and flush + gstate.vertType = prevType; + Flush(); + gstate.vertType ^= diff; + gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); + } + if (diff & GE_VTYPE_THROUGH_MASK) { + // Switching between through and non-through, we need to invalidate a bunch of stuff. + gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE); } } void GPUCommonHW::Execute_VertexTypeSkinning(u32 op, u32 diff) { - // Don't flush when weight count changes. - if (diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) { + if (!diff) { + return; + } + + u32 prevType = gstate.vertType ^ diff; + if (!IsVTypeCompatibleSkinning(prevType, diff)) { // Restore and flush - gstate.vertType ^= diff; + gstate.vertType = prevType; Flush(); gstate.vertType ^= diff; - // In this case, we may be doing weights and morphs. - // Update any bone matrix uniforms so it uses them correctly. - if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) { - gstate_c.Dirty(gstate_c.deferredVertTypeDirty); - gstate_c.deferredVertTypeDirty = 0; - } gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); } - if (diff & GE_VTYPE_THROUGH_MASK) + // In this case, we may be doing weights and morphs. + // Update any bone matrix uniforms so it uses them correctly. + if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) { + gstate_c.Dirty(gstate_c.deferredVertTypeDirty); + gstate_c.deferredVertTypeDirty = 0; + } + if (diff & GE_VTYPE_THROUGH_MASK) // through-mode changed on or off. Lots of dirtying needed. gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE); }