softgpu: Cache CLUT params in sampler state.
And now there's no more gstate for pixel drawing or sampling. Just a little left in rasterization.
This commit is contained in:
parent
c0e85e6170
commit
edb79d968f
6 changed files with 27 additions and 29 deletions
|
@ -414,6 +414,7 @@ void ComputeSamplerID(SamplerID *id_out) {
|
|||
id.hasClutMask = gstate.getClutIndexMask() != 0xFF;
|
||||
id.hasClutShift = gstate.getClutIndexShift() != 0;
|
||||
id.hasClutOffset = gstate.getClutIndexStartPos() != 0;
|
||||
id.cached.clutFormat = gstate.clutformat;
|
||||
}
|
||||
|
||||
id.clampS = gstate.isTexCoordClampedS();
|
||||
|
|
|
@ -172,6 +172,7 @@ struct SamplerID {
|
|||
uint16_t h;
|
||||
} sizes[8];
|
||||
uint32_t texBlendColor;
|
||||
uint32_t clutFormat;
|
||||
} cached;
|
||||
|
||||
union {
|
||||
|
|
|
@ -106,14 +106,13 @@ struct RegCache {
|
|||
VEC_INDEX = 0x0005,
|
||||
|
||||
GEN_SRC_ALPHA = 0x0100,
|
||||
GEN_GSTATE = 0x0101,
|
||||
GEN_ID = 0x0102,
|
||||
GEN_CONST_BASE = 0x0103,
|
||||
GEN_STENCIL = 0x0104,
|
||||
GEN_COLOR_OFF = 0x0105,
|
||||
GEN_DEPTH_OFF = 0x0106,
|
||||
GEN_RESULT = 0x0107,
|
||||
GEN_SHIFTVAL = 0x0108,
|
||||
GEN_ID = 0x0101,
|
||||
GEN_CONST_BASE = 0x0102,
|
||||
GEN_STENCIL = 0x0103,
|
||||
GEN_COLOR_OFF = 0x0104,
|
||||
GEN_DEPTH_OFF = 0x0105,
|
||||
GEN_RESULT = 0x0106,
|
||||
GEN_SHIFTVAL = 0x0107,
|
||||
|
||||
GEN_ARG_X = 0x0180,
|
||||
GEN_ARG_Y = 0x0181,
|
||||
|
|
|
@ -285,7 +285,16 @@ static inline u32 LookupColor(unsigned int index, unsigned int level, const Samp
|
|||
}
|
||||
|
||||
uint32_t TransformClutIndex(uint32_t index, const SamplerID &samplerID) {
|
||||
return gstate.transformClutIndex(index);
|
||||
if (samplerID.hasClutShift || samplerID.hasClutMask || samplerID.hasClutOffset) {
|
||||
const uint8_t shift = (samplerID.cached.clutFormat >> 2) & 0x1F;
|
||||
const uint8_t mask = (samplerID.cached.clutFormat >> 8) & 0xFF;
|
||||
const uint16_t offset = ((samplerID.cached.clutFormat >> 16) & 0x1F) << 4;
|
||||
// We need to wrap any entries beyond the first 1024 bytes.
|
||||
const uint16_t offsetMask = samplerID.ClutFmt() == GE_CMODE_32BIT_ABGR8888 ? 0xFF : 0x1FF;
|
||||
|
||||
return ((index >> shift) & mask) | (offset & offsetMask);
|
||||
}
|
||||
return index & 0xFF;
|
||||
}
|
||||
|
||||
struct Nearest4 {
|
||||
|
|
|
@ -67,7 +67,6 @@ private:
|
|||
void Describe(const std::string &message);
|
||||
|
||||
Rasterizer::RegCache::Reg GetZeroVec();
|
||||
Rasterizer::RegCache::Reg GetGState();
|
||||
Rasterizer::RegCache::Reg GetSamplerID();
|
||||
void UnlockSamplerID(Rasterizer::RegCache::Reg &r);
|
||||
|
||||
|
|
|
@ -297,7 +297,6 @@ NearestFunc SamplerJitCache::CompileNearest(const SamplerID &id) {
|
|||
regCache_.Unlock(vReg, RegCache::GEN_ARG_V);
|
||||
regCache_.ForceRetain(RegCache::GEN_ARG_V);
|
||||
|
||||
bool hadGState = regCache_.Has(RegCache::GEN_GSTATE);
|
||||
bool hadId = regCache_.Has(RegCache::GEN_ID);
|
||||
bool hadZero = regCache_.Has(RegCache::VEC_ZERO);
|
||||
success = success && Jit_ReadTextureFormat(id);
|
||||
|
@ -314,8 +313,6 @@ NearestFunc SamplerJitCache::CompileNearest(const SamplerID &id) {
|
|||
regCache_.Unlock(resultReg, RegCache::GEN_RESULT);
|
||||
|
||||
// Since we're inside a conditional, make sure these go away if we allocated them.
|
||||
if (!hadGState && regCache_.Has(RegCache::GEN_GSTATE))
|
||||
regCache_.ForceRelease(RegCache::GEN_GSTATE);
|
||||
if (!hadId && regCache_.Has(RegCache::GEN_ID))
|
||||
regCache_.ForceRelease(RegCache::GEN_ID);
|
||||
if (!hadZero && regCache_.Has(RegCache::VEC_ZERO))
|
||||
|
@ -951,15 +948,6 @@ RegCache::Reg SamplerJitCache::GetZeroVec() {
|
|||
return regCache_.Find(RegCache::VEC_ZERO);
|
||||
}
|
||||
|
||||
RegCache::Reg SamplerJitCache::GetGState() {
|
||||
if (!regCache_.Has(RegCache::GEN_GSTATE)) {
|
||||
X64Reg r = regCache_.Alloc(RegCache::GEN_GSTATE);
|
||||
MOV(PTRBITS, R(r), ImmPtr(&gstate.nop));
|
||||
return r;
|
||||
}
|
||||
return regCache_.Find(RegCache::GEN_GSTATE);
|
||||
}
|
||||
|
||||
RegCache::Reg SamplerJitCache::GetSamplerID() {
|
||||
if (regCache_.Has(RegCache::GEN_ARG_ID))
|
||||
return regCache_.Find(RegCache::GEN_ARG_ID);
|
||||
|
@ -1162,14 +1150,14 @@ bool SamplerJitCache::Jit_TransformClutIndexQuad(const SamplerID &id, int bitsPe
|
|||
X64Reg indexReg = regCache_.Find(RegCache::VEC_INDEX);
|
||||
bool maskedIndex = false;
|
||||
|
||||
// Okay, first load the actual gstate clutformat bits we'll use.
|
||||
// Okay, first load the actual samplerID clutformat bits we'll use.
|
||||
X64Reg formatReg = regCache_.Alloc(RegCache::VEC_TEMP0);
|
||||
X64Reg gstateReg = GetGState();
|
||||
X64Reg idReg = GetSamplerID();
|
||||
if (cpu_info.bAVX2 && !id.hasClutShift)
|
||||
VPBROADCASTD(128, formatReg, MDisp(gstateReg, offsetof(GPUgstate, clutformat)));
|
||||
VPBROADCASTD(128, formatReg, MDisp(idReg, offsetof(SamplerID, cached.clutFormat)));
|
||||
else
|
||||
MOVD_xmm(formatReg, MDisp(gstateReg, offsetof(GPUgstate, clutformat)));
|
||||
regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE);
|
||||
MOVD_xmm(formatReg, MDisp(idReg, offsetof(SamplerID, cached.clutFormat)));
|
||||
UnlockSamplerID(idReg);
|
||||
|
||||
// Shift = (clutformat >> 2) & 0x1F
|
||||
if (id.hasClutShift) {
|
||||
|
@ -3392,8 +3380,9 @@ bool SamplerJitCache::Jit_TransformClutIndex(const SamplerID &id, int bitsPerInd
|
|||
_assert_msg_(hasRCX, "Could not obtain RCX, locked?");
|
||||
|
||||
X64Reg temp1Reg = regCache_.Alloc(RegCache::GEN_TEMP1);
|
||||
MOV(PTRBITS, R(temp1Reg), ImmPtr(&gstate.clutformat));
|
||||
MOV(32, R(temp1Reg), MatR(temp1Reg));
|
||||
X64Reg idReg = GetSamplerID();
|
||||
MOV(32, R(temp1Reg), MDisp(idReg, offsetof(SamplerID, cached.clutFormat)));
|
||||
UnlockSamplerID(idReg);
|
||||
|
||||
X64Reg resultReg = regCache_.Find(RegCache::GEN_RESULT);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue