softgpu: Track CLUTs as states for binning.
This way we can have multiple CLUTs in process at once, which helps.
This commit is contained in:
parent
ba63d9cf09
commit
d6fa301ab1
9 changed files with 45 additions and 14 deletions
|
@ -86,7 +86,7 @@ static inline void DrawBinItem(const BinItem &item, const RasterizerState &state
|
|||
|
||||
class DrawBinItemsTask : public Task {
|
||||
public:
|
||||
DrawBinItemsTask(BinWaitable *notify, BinQueue<BinItem, 1024> &items, std::atomic<bool> &status, const BinQueue<RasterizerState, 32> &states)
|
||||
DrawBinItemsTask(BinWaitable *notify, BinQueue<BinItem, 1024> &items, std::atomic<bool> &status, const BinQueue<RasterizerState, 64> &states)
|
||||
: notify_(notify), items_(items), status_(status), states_(states) {
|
||||
}
|
||||
|
||||
|
@ -114,7 +114,7 @@ private:
|
|||
BinWaitable *notify_;
|
||||
BinQueue<BinItem, 1024> &items_;
|
||||
std::atomic<bool> &status_;
|
||||
const BinQueue<RasterizerState, 32> &states_;
|
||||
const BinQueue<RasterizerState, 64> &states_;
|
||||
};
|
||||
|
||||
BinManager::BinManager() {
|
||||
|
@ -137,6 +137,7 @@ void BinManager::UpdateState() {
|
|||
Flush();
|
||||
stateIndex_ = (int)states_.Push(RasterizerState());
|
||||
ComputeRasterizerState(&states_[stateIndex_]);
|
||||
states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable;
|
||||
|
||||
DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1());
|
||||
DrawingCoords scissorBR(gstate.getScissorX2(), gstate.getScissorY2());
|
||||
|
@ -165,6 +166,13 @@ void BinManager::UpdateState() {
|
|||
}
|
||||
}
|
||||
|
||||
void BinManager::UpdateClut(void *src) {
|
||||
if (cluts_.Full())
|
||||
Flush();
|
||||
clutIndex_ = (int)cluts_.Push(BinClut());
|
||||
memcpy(cluts_[clutIndex_].readable, src, sizeof(BinClut));
|
||||
}
|
||||
|
||||
void BinManager::AddTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2) {
|
||||
Vec2<int> d01((int)v0.screenpos.x - (int)v1.screenpos.x, (int)v0.screenpos.y - (int)v1.screenpos.y);
|
||||
Vec2<int> d02((int)v0.screenpos.x - (int)v2.screenpos.x, (int)v0.screenpos.y - (int)v2.screenpos.y);
|
||||
|
@ -310,6 +318,8 @@ void BinManager::Flush() {
|
|||
queue_.Reset();
|
||||
while (states_.Size() > 1)
|
||||
states_.SkipNext();
|
||||
while (cluts_.Size() > 1)
|
||||
cluts_.SkipNext();
|
||||
|
||||
queueRange_.x1 = 0x7FFFFFFF;
|
||||
queueRange_.y1 = 0x7FFFFFFF;
|
||||
|
|
|
@ -143,12 +143,17 @@ struct BinQueue {
|
|||
std::atomic<size_t> size_;
|
||||
};
|
||||
|
||||
union BinClut {
|
||||
uint8_t readable[1024];
|
||||
};
|
||||
|
||||
class BinManager {
|
||||
public:
|
||||
BinManager();
|
||||
~BinManager();
|
||||
|
||||
void UpdateState();
|
||||
void UpdateClut(void *src);
|
||||
|
||||
const Rasterizer::RasterizerState &State() {
|
||||
return states_[stateIndex_];
|
||||
|
@ -166,8 +171,10 @@ public:
|
|||
private:
|
||||
static constexpr int MAX_POSSIBLE_TASKS = 64;
|
||||
|
||||
BinQueue<Rasterizer::RasterizerState, 32> states_;
|
||||
BinQueue<Rasterizer::RasterizerState, 64> states_;
|
||||
int stateIndex_;
|
||||
BinQueue<BinClut, 64> cluts_;
|
||||
int clutIndex_;
|
||||
BinCoords scissor_;
|
||||
BinQueue<BinItem, 1024> queue_;
|
||||
BinCoords queueRange_;
|
||||
|
|
|
@ -22,7 +22,7 @@
|
|||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Software/FuncId.h"
|
||||
|
||||
static_assert(sizeof(SamplerID) == sizeof(SamplerID::fullKey) + sizeof(SamplerID::cached), "Bad sampler ID size");
|
||||
static_assert(sizeof(SamplerID) == sizeof(SamplerID::fullKey) + sizeof(SamplerID::cached) + sizeof(SamplerID::pad), "Bad sampler ID size");
|
||||
static_assert(sizeof(PixelFuncID) == sizeof(PixelFuncID::fullKey) + sizeof(PixelFuncID::cached), "Bad pixel func ID size");
|
||||
|
||||
static inline GEComparison OptimizeRefByteCompare(GEComparison func, u8 ref) {
|
||||
|
|
|
@ -173,8 +173,15 @@ struct SamplerID {
|
|||
} sizes[8];
|
||||
uint32_t texBlendColor;
|
||||
uint32_t clutFormat;
|
||||
union {
|
||||
uint8_t *clut;
|
||||
uint16_t *clut16;
|
||||
uint32_t *clut32;
|
||||
};
|
||||
} cached;
|
||||
|
||||
uint32_t pad;
|
||||
|
||||
union {
|
||||
uint32_t fullKey;
|
||||
struct {
|
||||
|
|
|
@ -35,8 +35,6 @@
|
|||
using namespace Math3D;
|
||||
using namespace Rasterizer;
|
||||
|
||||
extern u32 clut[4096];
|
||||
|
||||
namespace Sampler {
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
|
@ -267,16 +265,16 @@ static inline u32 LookupColor(unsigned int index, unsigned int level, const Samp
|
|||
|
||||
switch (samplerID.ClutFmt()) {
|
||||
case GE_CMODE_16BIT_BGR5650:
|
||||
return RGB565ToRGBA8888(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
|
||||
return RGB565ToRGBA8888(samplerID.cached.clut16[index + clutSharingOffset]);
|
||||
|
||||
case GE_CMODE_16BIT_ABGR5551:
|
||||
return RGBA5551ToRGBA8888(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
|
||||
return RGBA5551ToRGBA8888(samplerID.cached.clut16[index + clutSharingOffset]);
|
||||
|
||||
case GE_CMODE_16BIT_ABGR4444:
|
||||
return RGBA4444ToRGBA8888(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
|
||||
return RGBA4444ToRGBA8888(samplerID.cached.clut16[index + clutSharingOffset]);
|
||||
|
||||
case GE_CMODE_32BIT_ABGR8888:
|
||||
return clut[index + clutSharingOffset];
|
||||
return samplerID.cached.clut32[index + clutSharingOffset];
|
||||
|
||||
default:
|
||||
ERROR_LOG_REPORT(G3D, "Software: Unsupported palette format: %x", samplerID.ClutFmt());
|
||||
|
|
|
@ -29,8 +29,6 @@
|
|||
using namespace Gen;
|
||||
using namespace Rasterizer;
|
||||
|
||||
extern u32 clut[4096];
|
||||
|
||||
namespace Sampler {
|
||||
|
||||
FetchFunc SamplerJitCache::CompileFetch(const SamplerID &id) {
|
||||
|
@ -1253,8 +1251,10 @@ bool SamplerJitCache::Jit_ReadClutQuad(const SamplerID &id, bool level1) {
|
|||
regCache_.Release(vecLevelReg, RegCache::VEC_TEMP0);
|
||||
}
|
||||
|
||||
X64Reg idReg = GetSamplerID();
|
||||
X64Reg clutBaseReg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
||||
MOV(PTRBITS, R(clutBaseReg), ImmPtr(clut));
|
||||
MOV(PTRBITS, R(clutBaseReg), MDisp(idReg, offsetof(SamplerID, cached.clut)));
|
||||
UnlockSamplerID(idReg);
|
||||
|
||||
X64Reg resultReg = regCache_.Find(level1 ? RegCache::VEC_RESULT1 : RegCache::VEC_RESULT);
|
||||
X64Reg maskReg = regCache_.Alloc(RegCache::VEC_TEMP0);
|
||||
|
@ -3457,8 +3457,10 @@ bool SamplerJitCache::Jit_ReadClutColor(const SamplerID &id) {
|
|||
regCache_.Release(temp2Reg, RegCache::GEN_TEMP2);
|
||||
}
|
||||
|
||||
X64Reg idReg = GetSamplerID();
|
||||
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
||||
MOV(PTRBITS, R(temp1Reg), ImmPtr(clut));
|
||||
MOV(PTRBITS, R(temp1Reg), MDisp(idReg, offsetof(SamplerID, cached.clut)));
|
||||
UnlockSamplerID(idReg);
|
||||
|
||||
switch (id.ClutFmt()) {
|
||||
case GE_CMODE_16BIT_BGR5650:
|
||||
|
|
|
@ -617,6 +617,8 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) {
|
|||
DEBUG_LOG(G3D, "Software: Invalid CLUT address, filling with garbage instead of crashing");
|
||||
memset(clut, 0x00, clutTotalBytes);
|
||||
}
|
||||
|
||||
drawEngine_->transformUnit.NotifyClutUpdate(clut);
|
||||
}
|
||||
break;
|
||||
|
||||
|
|
|
@ -613,6 +613,10 @@ void TransformUnit::Flush() {
|
|||
GPUDebug::NotifyDraw();
|
||||
}
|
||||
|
||||
void TransformUnit::NotifyClutUpdate(void *src) {
|
||||
binner_->UpdateClut(src);
|
||||
}
|
||||
|
||||
// TODO: This probably is not the best interface.
|
||||
// Also, we should try to merge this into the similar function in DrawEngineCommon.
|
||||
bool TransformUnit::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) {
|
||||
|
|
|
@ -120,6 +120,7 @@ public:
|
|||
bool GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices);
|
||||
|
||||
void Flush();
|
||||
void NotifyClutUpdate(void *src);
|
||||
|
||||
private:
|
||||
VertexData ReadVertex(VertexReader &vreader, bool &outside_range_flag);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue