Compare commits
2 commits
master
...
merge-draw
Author | SHA1 | Date | |
---|---|---|---|
|
9d1a02e652 | ||
|
1f1757537d |
4 changed files with 84 additions and 29 deletions
|
@ -74,7 +74,7 @@ VertexDecoder *DrawEngineCommon::GetVertexDecoder(u32 vtype) {
|
|||
|
||||
int DrawEngineCommon::ComputeNumVertsToDecode() const {
|
||||
int vertsToDecode = 0;
|
||||
if (drawCalls_[0].indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
|
||||
if (drawCalls_[0].IndexType() == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
|
||||
for (int i = 0; i < numDrawCalls_; i++) {
|
||||
const DeferredDrawCall &dc = drawCalls_[i];
|
||||
vertsToDecode += dc.vertexCount;
|
||||
|
@ -180,6 +180,8 @@ void DrawEngineCommon::NotifyConfigChanged() {
|
|||
decJitCache_->Clear();
|
||||
lastVType_ = -1;
|
||||
dec_ = nullptr;
|
||||
// Just make sure there's no pending draw, since we wipe the decoders. There shouldn't be one.
|
||||
numDrawCalls_ = 0;
|
||||
decoderMap_.Iterate([&](const uint32_t vtype, VertexDecoder *decoder) {
|
||||
delete decoder;
|
||||
});
|
||||
|
@ -621,9 +623,12 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
|
|||
int indexLowerBound = dc.indexLowerBound;
|
||||
int indexUpperBound = dc.indexUpperBound;
|
||||
|
||||
if (dc.indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
|
||||
int indexType = dc.IndexType();
|
||||
const VertexDecoder *dec = dc.dec;
|
||||
|
||||
if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
|
||||
// Decode the verts (and at the same time apply morphing/skinning). Simple.
|
||||
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
|
||||
dec->DecodeVerts(dest + decodedVerts * (int)dec->GetDecVtxFmt().stride,
|
||||
dc.verts, indexLowerBound, indexUpperBound);
|
||||
decodedVerts += indexUpperBound - indexLowerBound + 1;
|
||||
|
||||
|
@ -637,13 +642,14 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
|
|||
// inds pointer but the same base vertex pointer. We'd like to reuse vertices between
|
||||
// these as much as possible, so we make sure here to combine as many as possible
|
||||
// into one nice big drawcall, sharing data.
|
||||
// NOTE: We can't do that if the vertex decoder changes, so let's check for that.
|
||||
|
||||
// 1. Look ahead to find the max index, only looking as "matching" drawcalls.
|
||||
// Expand the lower and upper bounds as we go.
|
||||
int lastMatch = i;
|
||||
const int total = numDrawCalls_;
|
||||
for (int j = i + 1; j < total; ++j) {
|
||||
if (drawCalls_[j].verts != dc.verts)
|
||||
if (drawCalls_[j].verts != dc.verts || drawCalls_[j].dec != dc.dec)
|
||||
break;
|
||||
|
||||
indexLowerBound = std::min(indexLowerBound, (int)drawCalls_[j].indexLowerBound);
|
||||
|
@ -652,7 +658,7 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
|
|||
}
|
||||
|
||||
// 2. Loop through the drawcalls, translating indices as we go.
|
||||
switch (dc.indexType) {
|
||||
switch (indexType) {
|
||||
case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT:
|
||||
for (int j = i; j <= lastMatch; j++) {
|
||||
bool clockwise = true;
|
||||
|
@ -690,7 +696,7 @@ void DrawEngineCommon::DecodeVertsStep(u8 *dest, int &i, int &decodedVerts) {
|
|||
}
|
||||
|
||||
// 3. Decode that range of vertex data.
|
||||
dec_->DecodeVerts(dest + decodedVerts * (int)dec_->GetDecVtxFmt().stride,
|
||||
dec->DecodeVerts(dest + decodedVerts * (int)dec->GetDecVtxFmt().stride,
|
||||
dc.verts, indexLowerBound, indexUpperBound);
|
||||
decodedVerts += vertexCount;
|
||||
|
||||
|
@ -792,7 +798,7 @@ inline uint32_t lowbias32_r(uint32_t x) {
|
|||
return x;
|
||||
}
|
||||
|
||||
// vertTypeID is the vertex type but with the UVGen mode smashed into the top bits.
|
||||
// vertTypeID is the vertex type BUT with the UVGen mode smashed into the top bits.
|
||||
void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimitiveType prim, int vertexCount, u32 vertTypeID, int cullMode, int *bytesRead) {
|
||||
if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls_ >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls_ + vertexCount > VERTEX_BUFFER_MAX) {
|
||||
DispatchFlush();
|
||||
|
@ -834,6 +840,7 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
|
|||
dc.verts = verts;
|
||||
dc.inds = inds;
|
||||
dc.vertexCount = vertexCount;
|
||||
dc.dec = dec_;
|
||||
dc.indexType = (vertTypeID & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
|
||||
dc.prim = prim;
|
||||
dc.cullMode = cullMode;
|
||||
|
|
|
@ -189,7 +189,7 @@ protected:
|
|||
u16 *decIndex_ = nullptr;
|
||||
|
||||
// Cached vertex decoders
|
||||
u32 lastVType_ = -1; // corresponds to dec_. Could really just pick it out of dec_...
|
||||
u32 lastVType_ = -1; // corresponds to dec_, but also has a few extra bits (texgen type).
|
||||
DenseHashMap<u32, VertexDecoder *, nullptr> decoderMap_;
|
||||
VertexDecoder *dec_ = nullptr;
|
||||
VertexDecoderJitCache *decJitCache_ = nullptr;
|
||||
|
@ -202,6 +202,7 @@ protected:
|
|||
struct DeferredDrawCall {
|
||||
const void *verts;
|
||||
const void *inds;
|
||||
VertexDecoder *dec;
|
||||
u32 vertexCount;
|
||||
u8 indexType;
|
||||
s8 prim;
|
||||
|
@ -209,6 +210,9 @@ protected:
|
|||
u16 indexLowerBound;
|
||||
u16 indexUpperBound;
|
||||
UVScale uvScale;
|
||||
int IndexType() const {
|
||||
return (dec->VertexType() & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT;
|
||||
}
|
||||
};
|
||||
|
||||
enum { MAX_DEFERRED_DRAW_CALLS = 128 };
|
||||
|
|
|
@ -1063,6 +1063,8 @@ static const StepFunction posstep_through[4] = {
|
|||
&VertexDecoder::Step_PosFloatThrough,
|
||||
};
|
||||
|
||||
// IMPORTANT: When changing how the formats map, your changes must match the rules
|
||||
// in IsVTypeCompatible in GPUCommonHW. See the comments on that function.
|
||||
void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache) {
|
||||
fmt_ = fmt;
|
||||
throughmode = (fmt & GE_VTYPE_THROUGH) != 0;
|
||||
|
|
|
@ -54,7 +54,7 @@ const CommonCommandTableEntry commonCommandTable[] = {
|
|||
{ GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Spline },
|
||||
|
||||
// Changing the vertex type requires us to flush.
|
||||
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommonHW::Execute_VertexType },
|
||||
{ GE_CMD_VERTEXTYPE, FLAG_EXECUTEONCHANGE, 0, &GPUCommonHW::Execute_VertexType },
|
||||
|
||||
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommonHW::Execute_LoadClut},
|
||||
|
||||
|
@ -435,10 +435,8 @@ void GPUCommonHW::DeviceRestore(Draw::DrawContext *draw) {
|
|||
|
||||
void GPUCommonHW::UpdateCmdInfo() {
|
||||
if (g_Config.bSoftwareSkinning) {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexTypeSkinning;
|
||||
} else {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexType;
|
||||
}
|
||||
|
||||
|
@ -826,34 +824,78 @@ void GPUCommonHW::FastRunLoop(DisplayList &list) {
|
|||
downcount = 0;
|
||||
}
|
||||
|
||||
void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) {
|
||||
if (diff) {
|
||||
// TODO: We only need to dirty vshader-state here if the output format will be different.
|
||||
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
|
||||
// This is tricky - the rules of this needs to match how the vertex decoder behaves. If it always produces
|
||||
// the same output format for a given component, then we check that existence matches. This is valid for:
|
||||
// * Color
|
||||
// * Position (though existence is always true)
|
||||
// * Texcoords
|
||||
// * Morph weight count (though not format! there are two!)
|
||||
// * Skin weight count if using software skinning (more restricted with hardware skinning)
|
||||
// Note that the following are different:
|
||||
// * Normals (two different output formats, s8 and float)
|
||||
static bool IsVTypeCompatibleSkinning(u32 prev, u32 diff) {
|
||||
// Did anything outside the simple component types and weightcount change?
|
||||
if ((diff & ~(GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_WEIGHTCOUNT_MASK | GE_VTYPE_TC_MASK | GE_VTYPE_COL_MASK | GE_VTYPE_POS_MASK)) != 0)
|
||||
return false;
|
||||
u32 cur = prev ^ diff;
|
||||
if (((prev & GE_VTYPE_TC_MASK) != 0) != ((cur & GE_VTYPE_TC_MASK) != 0))
|
||||
return false;
|
||||
if (((prev & GE_VTYPE_COL_MASK) != 0) != ((cur & GE_VTYPE_COL_MASK) != 0))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
static bool IsVTypeCompatible(u32 prev, u32 diff) {
|
||||
// Did anything outside the simple component types and weightcount change?
|
||||
if ((diff & ~(GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_TC_MASK | GE_VTYPE_COL_MASK | GE_VTYPE_POS_MASK)) != 0)
|
||||
return false;
|
||||
u32 cur = prev ^ diff;
|
||||
if (((prev & GE_VTYPE_TC_MASK) != 0) != ((cur & GE_VTYPE_TC_MASK) != 0))
|
||||
return false;
|
||||
if (((prev & GE_VTYPE_COL_MASK) != 0) != ((cur & GE_VTYPE_COL_MASK) != 0))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (diff & GE_VTYPE_THROUGH_MASK) {
|
||||
// Switching between through and non-through, we need to invalidate a bunch of stuff.
|
||||
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
|
||||
}
|
||||
|
||||
void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) {
|
||||
if (!diff) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 prevType = gstate.vertType ^ diff;
|
||||
if (!IsVTypeCompatible(prevType, diff)) {
|
||||
// Restore and flush
|
||||
gstate.vertType = prevType;
|
||||
Flush();
|
||||
gstate.vertType ^= diff;
|
||||
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
|
||||
}
|
||||
if (diff & GE_VTYPE_THROUGH_MASK) {
|
||||
// Switching between through and non-through, we need to invalidate a bunch of stuff.
|
||||
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCommonHW::Execute_VertexTypeSkinning(u32 op, u32 diff) {
|
||||
// Don't flush when weight count changes.
|
||||
if (diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) {
|
||||
if (!diff) {
|
||||
return;
|
||||
}
|
||||
|
||||
u32 prevType = gstate.vertType ^ diff;
|
||||
if (!IsVTypeCompatibleSkinning(prevType, diff)) {
|
||||
// Restore and flush
|
||||
gstate.vertType ^= diff;
|
||||
gstate.vertType = prevType;
|
||||
Flush();
|
||||
gstate.vertType ^= diff;
|
||||
// In this case, we may be doing weights and morphs.
|
||||
// Update any bone matrix uniforms so it uses them correctly.
|
||||
if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
gstate_c.Dirty(gstate_c.deferredVertTypeDirty);
|
||||
gstate_c.deferredVertTypeDirty = 0;
|
||||
}
|
||||
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
|
||||
}
|
||||
if (diff & GE_VTYPE_THROUGH_MASK)
|
||||
// In this case, we may be doing weights and morphs.
|
||||
// Update any bone matrix uniforms so it uses them correctly.
|
||||
if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
|
||||
gstate_c.Dirty(gstate_c.deferredVertTypeDirty);
|
||||
gstate_c.deferredVertTypeDirty = 0;
|
||||
}
|
||||
if (diff & GE_VTYPE_THROUGH_MASK) // through-mode changed on or off. Lots of dirtying needed.
|
||||
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue