Compare commits

...
Sign in to create a new pull request.

2 commits

Author SHA1 Message Date
Henrik Rydgård
9d1a02e652 Allow merging of drawcalls with matching output vertex formats but different inputs
Broken out from #17479
2023-05-25 17:52:01 +02:00
Henrik Rydgård
1f1757537d Allow vertex decoding steps to have their own different decoders.
As long as they decode to the same output format..
2023-05-25 17:52:01 +02:00
4 changed files with 84 additions and 29 deletions

View file

@ -74,7 +74,7 @@ VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) {
int DrawEngineCommon::ComputeNumVertsToDecode() const { int DrawEngineCommon::ComputeNumVertsToDecode() const {
int vertsToDecode = 0; int vertsToDecode = 0;
if (drawCalls_[0].indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) { if (drawCalls_[0].IndexType() == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
for (int i = 0; i < numDrawCalls_; i++) { for (int i = 0; i < numDrawCalls_; i++) {
const DeferredDrawCall &dc = drawCalls_[i]; const DeferredDrawCall &dc = drawCalls_[i];
vertsToDecode += dc.vertexCount; vertsToDecode += dc.vertexCount;
@ -180,6 +180,8 @@ void DrawEngineCommon::NotifyConfigChanged() {
decJitCache_->Clear(); decJitCache_->Clear();
lastVType_ = -1; lastVType_ = -1;
dec_ = nullptr; dec_ = nullptr;
// Just make sure there's no pending draw, since we wipe the decoders. There shouldn't be one.
numDrawCalls_ = 0;
decoderMap_.Iterate([&](const uint32_t vtype, VertexDecoder *decoder) { decoderMap_.Iterate([&](const uint32_t vtype, VertexDecoder *decoder) {
delete decoder; delete decoder;
}); });
@ -621,9 +623,12 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
int indexLowerBound = dc.indexLowerBound; int indexLowerBound = dc.indexLowerBound;
int indexUpperBound = dc.indexUpperBound; int indexUpperBound = dc.indexUpperBound;
if (dc.indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) { int indexType = dc.IndexType();
const VertexDecoder *dec = dc.dec;
if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
// Decode the verts (and at the same time apply morphing/skinning). Simple. // Decode the verts (and at the same time apply morphing/skinning). Simple.
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride, dec->DecodeVerts(dest + decodedVerts * (int)dec->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound); dc.verts, indexLowerBound, indexUpperBound);
decodedVerts += indexUpperBound - indexLowerBound + 1; decodedVerts += indexUpperBound - indexLowerBound + 1;
@ -637,13 +642,14 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between // inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible // these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data. // into one nice big drawcall, sharing data.
// NOTE: We can't do that if the vertex decoder changes, so let's check for that.
// 1. Look ahead to find the max index, only looking as "matching" drawcalls. // 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go. // Expand the lower and upper bounds as we go.
int lastMatch = i; int lastMatch = i;
const int total = numDrawCalls_; const int total = numDrawCalls_;
for (int j = i + 1; j < total; ++j) { for (int j = i + 1; j < total; ++j) {
if (drawCalls_[j].verts != dc.verts) if (drawCalls_[j].verts != dc.verts || drawCalls_[j].dec != dc.dec)
break; break;
indexLowerBound = std::min(indexLowerBound, (int)drawCalls_[j].indexLowerBound); indexLowerBound = std::min(indexLowerBound, (int)drawCalls_[j].indexLowerBound);
@ -652,7 +658,7 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
} }
// 2. Loop through the drawcalls, translating indices as we go. // 2. Loop through the drawcalls, translating indices as we go.
switch (dc.indexType) { switch (indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT: case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) { for (int j = i; j <= lastMatch; j++) {
bool clockwise = true; bool clockwise = true;
@ -690,7 +696,7 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
} }
// 3. Decode that range of vertex data. // 3. Decode that range of vertex data.
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride, dec->DecodeVerts(dest + decodedVerts * (int)dec->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound); dc.verts, indexLowerBound, indexUpperBound);
decodedVerts += vertexCount; decodedVerts += vertexCount;
@ -792,7 +798,7 @@ inline uint32_t lowbias32_r(uint32_t x) {
return x; return x;
} }
// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits. // vertTypeID is the vertex type BUT with the UVGen mode smashed into the top bits.
void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) { void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls_ >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) { if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls_ >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
DispatchFlush(); DispatchFlush();
@ -834,6 +840,7 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
dc.verts = verts; dc.verts = verts;
dc.inds = inds; dc.inds = inds;
dc.vertexCount = vertexCount; dc.vertexCount = vertexCount;
dc.dec = dec_;
dc.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT; dc.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
dc.prim = prim; dc.prim = prim;
dc.cullMode = cullMode; dc.cullMode = cullMode;

View file

@ -189,7 +189,7 @@ protected:
u16 *decIndex_ = nullptr; u16 *decIndex_ = nullptr;
// Cached vertex decoders // Cached vertex decoders
u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_... u32 lastVType_ = -1; // corresponds to dec_, but also has a few extra bits (texgen type).
DenseHashMap<u32, VertexDecoder *, nullptr> decoderMap_; DenseHashMap<u32, VertexDecoder *, nullptr> decoderMap_;
VertexDecoder *dec_ = nullptr; VertexDecoder *dec_ = nullptr;
VertexDecoderJitCache *decJitCache_ = nullptr; VertexDecoderJitCache *decJitCache_ = nullptr;
@ -202,6 +202,7 @@ protected:
struct DeferredDrawCall { struct DeferredDrawCall {
const void *verts; const void *verts;
const void *inds; const void *inds;
VertexDecoder *dec;
u32 vertexCount; u32 vertexCount;
u8 indexType; u8 indexType;
s8 prim; s8 prim;
@ -209,6 +210,9 @@ protected:
u16 indexLowerBound; u16 indexLowerBound;
u16 indexUpperBound; u16 indexUpperBound;
UVScale uvScale; UVScale uvScale;
int IndexType() const {
return (dec->VertexType() & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
}
}; };
enum { MAX_DEFERRED_DRAW_CALLS = 128 }; enum { MAX_DEFERRED_DRAW_CALLS = 128 };

View file

@ -1063,6 +1063,8 @@ static const StepFunction posstep_through[4] = {
&VertexDecoder::Step_PosFloatThrough, &VertexDecoder::Step_PosFloatThrough,
}; };
// IMPORTANT: When changing how the formats map, your changes must match the rules
// in IsVTypeCompatible in GPUCommonHW. See the comments on that function.
void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache) { void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache) {
fmt_ = fmt; fmt_ = fmt;
throughmode = (fmt & GE_VTYPE_THROUGH) != 0; throughmode = (fmt & GE_VTYPE_THROUGH) != 0;

View file

@ -54,7 +54,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Spline }, { GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Spline },
// Changing the vertex type requires us to flush. // Changing the vertex type requires us to flush.
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommonHW::Execute_VertexType }, { GE_CMD_VERTEXTYPE, FLAG_EXECUTEONCHANGE, 0, &GPUCommonHW::Execute_VertexType },
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommonHW::Execute_LoadClut}, { GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommonHW::Execute_LoadClut},
@ -435,10 +435,8 @@ void GPUCommonHW::DeviceRestore(Draw::DrawContext *draw) {
void GPUCommonHW::UpdateCmdInfo() { void GPUCommonHW::UpdateCmdInfo() {
if (g_Config.bSoftwareSkinning) { if (g_Config.bSoftwareSkinning) {
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexTypeSkinning; cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexTypeSkinning;
} else { } else {
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexType; cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexType;
} }
@ -826,34 +824,78 @@ void GPUCommonHW::FastRunLoop(DisplayList &list) {
downcount = 0; downcount = 0;
} }
void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) { // This is tricky - the rules of this needs to match how the vertex decoder behaves. If it always produces
if (diff) { // the same output format for a given component, then we check that existence matches. This is valid for:
// TODO: We only need to dirty vshader-state here if the output format will be different. // * Color
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); // * Position (though existence is always true)
// * Texcoords
// * Morph weight count (though not format! there are two!)
// * Skin weight count if using software skinning (more restricted with hardware skinning)
// Note that the following are different:
// * Normals (two different output formats, s8 and float)
static bool IsVTypeCompatibleSkinning(u32 prev, u32 diff) {
// Did anything outside the simple component types and weightcount change?
if ((diff & ~(GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_WEIGHTCOUNT_MASK | GE_VTYPE_TC_MASK | GE_VTYPE_COL_MASK | GE_VTYPE_POS_MASK)) != 0)
return false;
u32 cur = prev ^ diff;
if (((prev & GE_VTYPE_TC_MASK) != 0) != ((cur & GE_VTYPE_TC_MASK) != 0))
return false;
if (((prev & GE_VTYPE_COL_MASK) != 0) != ((cur & GE_VTYPE_COL_MASK) != 0))
return false;
return true;
}
static bool IsVTypeCompatible(u32 prev, u32 diff) {
// Did anything outside the simple component types and weightcount change?
if ((diff & ~(GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_TC_MASK | GE_VTYPE_COL_MASK | GE_VTYPE_POS_MASK)) != 0)
return false;
u32 cur = prev ^ diff;
if (((prev & GE_VTYPE_TC_MASK) != 0) != ((cur & GE_VTYPE_TC_MASK) != 0))
return false;
if (((prev & GE_VTYPE_COL_MASK) != 0) != ((cur & GE_VTYPE_COL_MASK) != 0))
return false;
return true;
}
if (diff & GE_VTYPE_THROUGH_MASK) {
// Switching between through and non-through, we need to invalidate a bunch of stuff. void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE); if (!diff) {
} return;
}
u32 prevType = gstate.vertType ^ diff;
if (!IsVTypeCompatible(prevType, diff)) {
// Restore and flush
gstate.vertType = prevType;
Flush();
gstate.vertType ^= diff;
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
}
if (diff & GE_VTYPE_THROUGH_MASK) {
// Switching between through and non-through, we need to invalidate a bunch of stuff.
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
} }
} }
void GPUCommonHW::Execute_VertexTypeSkinning(u32 op, u32 diff) { void GPUCommonHW::Execute_VertexTypeSkinning(u32 op, u32 diff) {
// Don't flush when weight count changes. if (!diff) {
if (diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) { return;
}
u32 prevType = gstate.vertType ^ diff;
if (!IsVTypeCompatibleSkinning(prevType, diff)) {
// Restore and flush // Restore and flush
gstate.vertType ^= diff; gstate.vertType = prevType;
Flush(); Flush();
gstate.vertType ^= diff; gstate.vertType ^= diff;
// In this case, we may be doing weights and morphs.
// Update any bone matrix uniforms so it uses them correctly.
if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
gstate_c.Dirty(gstate_c.deferredVertTypeDirty);
gstate_c.deferredVertTypeDirty = 0;
}
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE); gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
} }
if (diff & GE_VTYPE_THROUGH_MASK) // In this case, we may be doing weights and morphs.
// Update any bone matrix uniforms so it uses them correctly.
if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
gstate_c.Dirty(gstate_c.deferredVertTypeDirty);
gstate_c.deferredVertTypeDirty = 0;
}
if (diff & GE_VTYPE_THROUGH_MASK) // through-mode changed on or off. Lots of dirtying needed.
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE); gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
} }