Compare commits

...
Sign in to create a new pull request.

2 commits

Author SHA1 Message Date
Henrik Rydgård
9d1a02e652 Allow merging of drawcalls with matching output vertex formats but different inputs
Broken out from #17479
2023-05-25 17:52:01 +02:00
Henrik Rydgård
1f1757537d Allow vertex decoding steps to have their own different decoders.
As long as they decode to the same output format..
2023-05-25 17:52:01 +02:00
4 changed files with 84 additions and 29 deletions

View file

@ -74,7 +74,7 @@ VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) {
int DrawEngineCommon::ComputeNumVertsToDecode() const {
int vertsToDecode = 0;
if (drawCalls_[0].indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
if (drawCalls_[0].IndexType() == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
for (int i = 0; i < numDrawCalls_; i++) {
const DeferredDrawCall &dc = drawCalls_[i];
vertsToDecode += dc.vertexCount;
@ -180,6 +180,8 @@ void DrawEngineCommon::NotifyConfigChanged() {
decJitCache_->Clear();
lastVType_ = -1;
dec_ = nullptr;
// Just make sure there's no pending draw, since we wipe the decoders. There shouldn't be one.
numDrawCalls_ = 0;
decoderMap_.Iterate([&](const uint32_t vtype, VertexDecoder *decoder) {
delete decoder;
});
@ -621,9 +623,12 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
int indexLowerBound = dc.indexLowerBound;
int indexUpperBound = dc.indexUpperBound;
if (dc.indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
int indexType = dc.IndexType();
const VertexDecoder *dec = dc.dec;
if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
// Decode the verts (and at the same time apply morphing/skinning). Simple.
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
dec->DecodeVerts(dest + decodedVerts * (int)dec->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts += indexUpperBound - indexLowerBound + 1;
@ -637,13 +642,14 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
// these as much as possible, so we make sure here to combine as many as possible
// into one nice big drawcall, sharing data.
// NOTE: We can't do that if the vertex decoder changes, so let's check for that.
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
// Expand the lower and upper bounds as we go.
int lastMatch = i;
const int total = numDrawCalls_;
for (int j = i + 1; j < total; ++j) {
if (drawCalls_[j].verts != dc.verts)
if (drawCalls_[j].verts != dc.verts || drawCalls_[j].dec != dc.dec)
break;
indexLowerBound = std::min(indexLowerBound, (int)drawCalls_[j].indexLowerBound);
@ -652,7 +658,7 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
}
// 2. Loop through the drawcalls, translating indices as we go.
switch (dc.indexType) {
switch (indexType) {
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
for (int j = i; j <= lastMatch; j++) {
bool clockwise = true;
@ -690,7 +696,7 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
}
// 3. Decode that range of vertex data.
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
dec->DecodeVerts(dest + decodedVerts * (int)dec->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
decodedVerts += vertexCount;
@ -792,7 +798,7 @@ inline uint32_t lowbias32_r(uint32_t x) {
return x;
}
// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
// vertTypeID is the vertex type BUT with the UVGen mode smashed into the top bits.
void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls_ >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
DispatchFlush();
@ -834,6 +840,7 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
dc.verts = verts;
dc.inds = inds;
dc.vertexCount = vertexCount;
dc.dec = dec_;
dc.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
dc.prim = prim;
dc.cullMode = cullMode;

View file

@ -189,7 +189,7 @@ protected:
u16 *decIndex_ = nullptr;
// Cached vertex decoders
u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_...
u32 lastVType_ = -1; // corresponds to dec_, but also has a few extra bits (texgen type).
DenseHashMap<u32, VertexDecoder *, nullptr> decoderMap_;
VertexDecoder *dec_ = nullptr;
VertexDecoderJitCache *decJitCache_ = nullptr;
@ -202,6 +202,7 @@ protected:
struct DeferredDrawCall {
const void *verts;
const void *inds;
VertexDecoder *dec;
u32 vertexCount;
u8 indexType;
s8 prim;
@ -209,6 +210,9 @@ protected:
u16 indexLowerBound;
u16 indexUpperBound;
UVScale uvScale;
int IndexType() const {
return (dec->VertexType() & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
}
};
enum { MAX_DEFERRED_DRAW_CALLS = 128 };

View file

@ -1063,6 +1063,8 @@ static const StepFunction posstep_through[4] = {
&VertexDecoder::Step_PosFloatThrough,
};
// IMPORTANT: When changing how the formats map, your changes must match the rules
// in IsVTypeCompatible in GPUCommonHW. See the comments on that function.
void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache) {
fmt_ = fmt;
throughmode = (fmt & GE_VTYPE_THROUGH) != 0;

View file

@ -54,7 +54,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Spline },
// Changing the vertex type requires us to flush.
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommonHW::Execute_VertexType },
{ GE_CMD_VERTEXTYPE, FLAG_EXECUTEONCHANGE, 0, &GPUCommonHW::Execute_VertexType },
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommonHW::Execute_LoadClut},
@ -435,10 +435,8 @@ void GPUCommonHW::DeviceRestore(Draw::DrawContext *draw) {
void GPUCommonHW::UpdateCmdInfo() {
if (g_Config.bSoftwareSkinning) {
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexTypeSkinning;
} else {
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexType;
}
@ -826,34 +824,78 @@ void GPUCommonHW::FastRunLoop(DisplayList &list) {
downcount = 0;
}
void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) {
if (diff) {
// TODO: We only need to dirty vshader-state here if the output format will be different.
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
// This is tricky - the rules of this needs to match how the vertex decoder behaves. If it always produces
// the same output format for a given component, then we check that existence matches. This is valid for:
// * Color
// * Position (though existence is always true)
// * Texcoords
// * Morph weight count (though not format! there are two!)
// * Skin weight count if using software skinning (more restricted with hardware skinning)
// Note that the following are different:
// * Normals (two different output formats, s8 and float)
static bool IsVTypeCompatibleSkinning(u32 prev, u32 diff) {
// Did anything outside the simple component types and weightcount change?
if ((diff & ~(GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_WEIGHTCOUNT_MASK | GE_VTYPE_TC_MASK | GE_VTYPE_COL_MASK | GE_VTYPE_POS_MASK)) != 0)
return false;
u32 cur = prev ^ diff;
if (((prev & GE_VTYPE_TC_MASK) != 0) != ((cur & GE_VTYPE_TC_MASK) != 0))
return false;
if (((prev & GE_VTYPE_COL_MASK) != 0) != ((cur & GE_VTYPE_COL_MASK) != 0))
return false;
return true;
}
static bool IsVTypeCompatible(u32 prev, u32 diff) {
// Did anything outside the simple component types and weightcount change?
if ((diff & ~(GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_TC_MASK | GE_VTYPE_COL_MASK | GE_VTYPE_POS_MASK)) != 0)
return false;
u32 cur = prev ^ diff;
if (((prev & GE_VTYPE_TC_MASK) != 0) != ((cur & GE_VTYPE_TC_MASK) != 0))
return false;
if (((prev & GE_VTYPE_COL_MASK) != 0) != ((cur & GE_VTYPE_COL_MASK) != 0))
return false;
return true;
}
if (diff & GE_VTYPE_THROUGH_MASK) {
// Switching between through and non-through, we need to invalidate a bunch of stuff.
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
}
void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) {
if (!diff) {
return;
}
u32 prevType = gstate.vertType ^ diff;
if (!IsVTypeCompatible(prevType, diff)) {
// Restore and flush
gstate.vertType = prevType;
Flush();
gstate.vertType ^= diff;
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
}
if (diff & GE_VTYPE_THROUGH_MASK) {
// Switching between through and non-through, we need to invalidate a bunch of stuff.
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
}
}
void GPUCommonHW::Execute_VertexTypeSkinning(u32 op, u32 diff) {
// Don't flush when weight count changes.
if (diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) {
if (!diff) {
return;
}
u32 prevType = gstate.vertType ^ diff;
if (!IsVTypeCompatibleSkinning(prevType, diff)) {
// Restore and flush
gstate.vertType ^= diff;
gstate.vertType = prevType;
Flush();
gstate.vertType ^= diff;
// In this case, we may be doing weights and morphs.
// Update any bone matrix uniforms so it uses them correctly.
if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
gstate_c.Dirty(gstate_c.deferredVertTypeDirty);
gstate_c.deferredVertTypeDirty = 0;
}
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
}
if (diff & GE_VTYPE_THROUGH_MASK)
// In this case, we may be doing weights and morphs.
// Update any bone matrix uniforms so it uses them correctly.
if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
gstate_c.Dirty(gstate_c.deferredVertTypeDirty);
gstate_c.deferredVertTypeDirty = 0;
}
if (diff & GE_VTYPE_THROUGH_MASK) // through-mode changed on or off. Lots of dirtying needed.
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
}