softgpu: Cache CLUT params in sampler state.

And now there's no more gstate for pixel drawing or sampling.  Just a
little left in rasterization.
This commit is contained in:
Unknown W. Brackets 2022-01-15 18:09:09 -08:00
parent c0e85e6170
commit edb79d968f
6 changed files with 27 additions and 29 deletions

View file

@ -414,6 +414,7 @@ void ComputeSamplerID(SamplerID *id_out) {
id.hasClutMask = gstate.getClutIndexMask() != 0xFF;
id.hasClutShift = gstate.getClutIndexShift() != 0;
id.hasClutOffset = gstate.getClutIndexStartPos() != 0;
id.cached.clutFormat = gstate.clutformat;
}
id.clampS = gstate.isTexCoordClampedS();

View file

@ -172,6 +172,7 @@ struct SamplerID {
uint16_t h;
} sizes[8];
uint32_t texBlendColor;
uint32_t clutFormat;
} cached;
union {

View file

@ -106,14 +106,13 @@ struct RegCache {
VEC_INDEX = 0x0005,
GEN_SRC_ALPHA = 0x0100,
GEN_GSTATE = 0x0101,
GEN_ID = 0x0102,
GEN_CONST_BASE = 0x0103,
GEN_STENCIL = 0x0104,
GEN_COLOR_OFF = 0x0105,
GEN_DEPTH_OFF = 0x0106,
GEN_RESULT = 0x0107,
GEN_SHIFTVAL = 0x0108,
GEN_ID = 0x0101,
GEN_CONST_BASE = 0x0102,
GEN_STENCIL = 0x0103,
GEN_COLOR_OFF = 0x0104,
GEN_DEPTH_OFF = 0x0105,
GEN_RESULT = 0x0106,
GEN_SHIFTVAL = 0x0107,
GEN_ARG_X = 0x0180,
GEN_ARG_Y = 0x0181,

View file

@ -285,7 +285,16 @@ static inline u32 LookupColor(unsigned int index, unsigned int level, const Samp
}
uint32_t TransformClutIndex(uint32_t index, const SamplerID &samplerID) {
return gstate.transformClutIndex(index);
if (samplerID.hasClutShift || samplerID.hasClutMask || samplerID.hasClutOffset) {
const uint8_t shift = (samplerID.cached.clutFormat >> 2) & 0x1F;
const uint8_t mask = (samplerID.cached.clutFormat >> 8) & 0xFF;
const uint16_t offset = ((samplerID.cached.clutFormat >> 16) & 0x1F) << 4;
// We need to wrap any entries beyond the first 1024 bytes.
const uint16_t offsetMask = samplerID.ClutFmt() == GE_CMODE_32BIT_ABGR8888 ? 0xFF : 0x1FF;
return ((index >> shift) & mask) | (offset & offsetMask);
}
return index & 0xFF;
}
struct Nearest4 {

View file

@ -67,7 +67,6 @@ private:
void Describe(const std::string &message);
Rasterizer::RegCache::Reg GetZeroVec();
Rasterizer::RegCache::Reg GetGState();
Rasterizer::RegCache::Reg GetSamplerID();
void UnlockSamplerID(Rasterizer::RegCache::Reg &r);

View file

@ -297,7 +297,6 @@ NearestFunc SamplerJitCache::CompileNearest(const SamplerID &id) {
regCache_.Unlock(vReg, RegCache::GEN_ARG_V);
regCache_.ForceRetain(RegCache::GEN_ARG_V);
bool hadGState = regCache_.Has(RegCache::GEN_GSTATE);
bool hadId = regCache_.Has(RegCache::GEN_ID);
bool hadZero = regCache_.Has(RegCache::VEC_ZERO);
success = success && Jit_ReadTextureFormat(id);
@ -314,8 +313,6 @@ NearestFunc SamplerJitCache::CompileNearest(const SamplerID &id) {
regCache_.Unlock(resultReg, RegCache::GEN_RESULT);
// Since we're inside a conditional, make sure these go away if we allocated them.
if (!hadGState && regCache_.Has(RegCache::GEN_GSTATE))
regCache_.ForceRelease(RegCache::GEN_GSTATE);
if (!hadId && regCache_.Has(RegCache::GEN_ID))
regCache_.ForceRelease(RegCache::GEN_ID);
if (!hadZero && regCache_.Has(RegCache::VEC_ZERO))
@ -951,15 +948,6 @@ RegCache::Reg SamplerJitCache::GetZeroVec() {
return regCache_.Find(RegCache::VEC_ZERO);
}
RegCache::Reg SamplerJitCache::GetGState() {
if (!regCache_.Has(RegCache::GEN_GSTATE)) {
X64Reg r = regCache_.Alloc(RegCache::GEN_GSTATE);
MOV(PTRBITS, R(r), ImmPtr(&gstate.nop));
return r;
}
return regCache_.Find(RegCache::GEN_GSTATE);
}
RegCache::Reg SamplerJitCache::GetSamplerID() {
if (regCache_.Has(RegCache::GEN_ARG_ID))
return regCache_.Find(RegCache::GEN_ARG_ID);
@ -1162,14 +1150,14 @@ bool SamplerJitCache::Jit_TransformClutIndexQuad(const SamplerID &id, int bitsPe
X64Reg indexReg = regCache_.Find(RegCache::VEC_INDEX);
bool maskedIndex = false;
// Okay, first load the actual gstate clutformat bits we'll use.
// Okay, first load the actual samplerID clutformat bits we'll use.
X64Reg formatReg = regCache_.Alloc(RegCache::VEC_TEMP0);
X64Reg gstateReg = GetGState();
X64Reg idReg = GetSamplerID();
if (cpu_info.bAVX2 && !id.hasClutShift)
VPBROADCASTD(128, formatReg, MDisp(gstateReg, offsetof(GPUgstate, clutformat)));
VPBROADCASTD(128, formatReg, MDisp(idReg, offsetof(SamplerID, cached.clutFormat)));
else
MOVD_xmm(formatReg, MDisp(gstateReg, offsetof(GPUgstate, clutformat)));
regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE);
MOVD_xmm(formatReg, MDisp(idReg, offsetof(SamplerID, cached.clutFormat)));
UnlockSamplerID(idReg);
// Shift = (clutformat >> 2) & 0x1F
if (id.hasClutShift) {
@ -3392,8 +3380,9 @@ bool SamplerJitCache::Jit_TransformClutIndex(const SamplerID &id, int bitsPerInd
_assert_msg_(hasRCX, "Could not obtain RCX, locked?");
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
MOV(PTRBITS, R(temp1Reg), ImmPtr(&gstate.clutformat));
MOV(32, R(temp1Reg), MatR(temp1Reg));
X64Reg idReg = GetSamplerID();
MOV(32, R(temp1Reg), MDisp(idReg, offsetof(SamplerID, cached.clutFormat)));
UnlockSamplerID(idReg);
X64Reg resultReg = regCache_.Find(RegCache::GEN_RESULT);