GPU: Remove now-unused vertex decoder funcs.

We always convert to float now, so these functions are no longer used.
This commit is contained in:
Unknown W. Brackets 2017-05-06 18:55:16 -07:00
parent 6b3944d329
commit 257f8dbbc6
5 changed files with 0 additions and 221 deletions

View file

@ -121,15 +121,12 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},
{&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale},
{&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale},
{&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale},
{&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through},
{&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough},
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
// {&VertexDecoder::Step_TcU16ThroughToFloat, &VertexDecoderJitCache::Jit_TcU16ThroughToFloat},
{&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8},
@ -571,31 +568,6 @@ void VertexDecoderJitCache::Jit_TcFloat() {
STR(tempReg2, dstReg, dec_->decFmt.uvoff + 4);
}
void VertexDecoderJitCache::Jit_TcU16Through() {
LDRH(tempReg1, srcReg, dec_->tcoff);
LDRH(tempReg2, srcReg, dec_->tcoff + 2);
// TODO: Cleanup.
MOVP2R(scratchReg, &gstate_c.vertBounds.minU);
auto updateSide = [&](ARMReg r, CCFlags cc, u32 off) {
LDRH(tempReg3, scratchReg, off);
CMP(r, tempReg3);
SetCC(cc);
STRH(r, scratchReg, off);
SetCC(CC_AL);
};
// TODO: Can this actually be fast? Hmm, floats aren't better.
updateSide(tempReg1, CC_LT, offsetof(KnownVertexBounds, minU));
updateSide(tempReg1, CC_GT, offsetof(KnownVertexBounds, maxU));
updateSide(tempReg2, CC_LT, offsetof(KnownVertexBounds, minV));
updateSide(tempReg2, CC_GT, offsetof(KnownVertexBounds, maxV));
ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 16));
STR(tempReg1, dstReg, dec_->decFmt.uvoff);
}
void VertexDecoderJitCache::Jit_TcFloatThrough() {
LDR(tempReg1, srcReg, dec_->tcoff);
LDR(tempReg2, srcReg, dec_->tcoff + 4);
@ -603,22 +575,6 @@ void VertexDecoderJitCache::Jit_TcFloatThrough() {
STR(tempReg2, dstReg, dec_->decFmt.uvoff + 4);
}
void VertexDecoderJitCache::Jit_TcU16Double() {
LDRH(tempReg1, srcReg, dec_->tcoff);
LDRH(tempReg2, srcReg, dec_->tcoff + 2);
LSL(tempReg1, tempReg1, 1);
ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 17));
STR(tempReg1, dstReg, dec_->decFmt.uvoff);
}
void VertexDecoderJitCache::Jit_TcU16ThroughDouble() {
LDRH(tempReg1, srcReg, dec_->tcoff);
LDRH(tempReg2, srcReg, dec_->tcoff + 2);
LSL(tempReg1, tempReg1, 1);
ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 17));
STR(tempReg1, dstReg, dec_->decFmt.uvoff);
}
void VertexDecoderJitCache::Jit_TcU8Prescale() {
if (cpu_info.bNEON) {
// TODO: Needs testing

View file

@ -95,15 +95,12 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},
{&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale},
{&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale},
{&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale},
{&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through},
{&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough},
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
// {&VertexDecoder::Step_TcU16ThroughToFloat, &VertexDecoderJitCache::Jit_TcU16ThroughToFloat},
{&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8},
@ -582,45 +579,11 @@ void VertexDecoderJitCache::Jit_Color5551() {
CSEL(fullAlphaReg, fullAlphaReg, WZR, CC_EQ);
}
void VertexDecoderJitCache::Jit_TcU16Through() {
LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2);
auto updateSide = [&](ARM64Reg src, CCFlags cc, ARM64Reg dst) {
CMP(src, dst);
CSEL(dst, src, dst, cc);
};
updateSide(tempReg1, CC_LT, boundsMinUReg);
updateSide(tempReg1, CC_GT, boundsMaxUReg);
updateSide(tempReg2, CC_LT, boundsMinVReg);
updateSide(tempReg2, CC_GT, boundsMaxVReg);
ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 16));
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
}
void VertexDecoderJitCache::Jit_TcFloatThrough() {
LDP(INDEX_SIGNED, tempReg1, tempReg2, srcReg, dec_->tcoff);
STP(INDEX_SIGNED, tempReg1, tempReg2, dstReg, dec_->decFmt.uvoff);
}
void VertexDecoderJitCache::Jit_TcU16Double() {
LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2);
LSL(tempReg1, tempReg1, 1);
ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 17));
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
}
void VertexDecoderJitCache::Jit_TcU16ThroughDouble() {
LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->tcoff);
LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2);
LSL(tempReg1, tempReg1, 1);
ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 17));
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
}
void VertexDecoderJitCache::Jit_TcFloat() {
LDP(INDEX_SIGNED, tempReg1, tempReg2, srcReg, dec_->tcoff);
STP(INDEX_SIGNED, tempReg1, tempReg2, dstReg, dec_->decFmt.uvoff);

View file

@ -281,35 +281,6 @@ void VertexDecoder::Step_TcU16ToFloat() const
uv[1] = uvdata[1] * (1.0f / 32768.0f);
}
void VertexDecoder::Step_TcU16Double() const
{
u16 *uv = (u16*)(decoded_ + decFmt.uvoff);
const u16 *uvdata = (const u16_le*)(ptr_ + tcoff);
uv[0] = uvdata[0] * 2;
uv[1] = uvdata[1] * 2;
}
void VertexDecoder::Step_TcU16Through() const
{
u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
const u16 *uvdata = (const u16_le*)(ptr_ + tcoff);
uv[0] = uvdata[0];
uv[1] = uvdata[1];
gstate_c.vertBounds.minU = std::min(gstate_c.vertBounds.minU, uvdata[0]);
gstate_c.vertBounds.maxU = std::max(gstate_c.vertBounds.maxU, uvdata[0]);
gstate_c.vertBounds.minV = std::min(gstate_c.vertBounds.minV, uvdata[1]);
gstate_c.vertBounds.maxV = std::max(gstate_c.vertBounds.maxV, uvdata[1]);
}
void VertexDecoder::Step_TcU16ThroughDouble() const
{
u16 *uv = (u16 *)(decoded_ + decFmt.uvoff);
const u16 *uvdata = (const u16_le*)(ptr_ + tcoff);
uv[0] = uvdata[0] * 2;
uv[1] = uvdata[1] * 2;
}
void VertexDecoder::Step_TcU16DoubleToFloat() const
{
float *uv = (float*)(decoded_ + decFmt.uvoff);
@ -388,51 +359,6 @@ void VertexDecoder::Step_TcFloatPrescale() const {
uv[1] = uvdata[1] * gstate_c.uv.vScale + gstate_c.uv.vOff;
}
void VertexDecoder::Step_TcU8Morph() const {
float uv[2] = { 0, 0 };
for (int n = 0; n < morphcount; n++) {
float w = gstate_c.morphWeights[n];
const u8 *uvdata = (const u8 *)(ptr_ + onesize_*n + tcoff);
uv[0] += (float)uvdata[0] * w;
uv[1] += (float)uvdata[1] * w;
}
u8 *out = decoded_ + decFmt.uvoff;
out[0] = (int)uv[0];
out[1] = (int)uv[1];
}
void VertexDecoder::Step_TcU16Morph() const {
float uv[2] = { 0, 0 };
for (int n = 0; n < morphcount; n++) {
float w = gstate_c.morphWeights[n];
const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff);
uv[0] += (float)uvdata[0] * w;
uv[1] += (float)uvdata[1] * w;
}
u16_le *out = (u16_le *)(decoded_ + decFmt.uvoff);
out[0] = (int)uv[0];
out[1] = (int)uv[1];
}
void VertexDecoder::Step_TcU16DoubleMorph() const {
float uv[2] = { 0, 0 };
for (int n = 0; n < morphcount; n++) {
float w = gstate_c.morphWeights[n];
const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff);
uv[0] += (float)uvdata[0] * w;
uv[1] += (float)uvdata[1] * w;
}
u16_le *out = (u16_le *)(decoded_ + decFmt.uvoff);
out[0] = (int)(uv[0] * 2.0f);
out[1] = (int)(uv[1] * 2.0f);
}
void VertexDecoder::Step_TcU8MorphToFloat() const {
float uv[2] = { 0, 0 };
for (int n = 0; n < morphcount; n++) {
@ -922,20 +848,6 @@ static const StepFunction tcstep_prescale_morph_remaster[4] = {
&VertexDecoder::Step_TcFloatPrescaleMorph,
};
static const StepFunction tcstep_morph[4] = {
0,
&VertexDecoder::Step_TcU8Morph,
&VertexDecoder::Step_TcU16Morph,
&VertexDecoder::Step_TcFloatMorph,
};
static const StepFunction tcstep_morph_remaster[4] = {
0,
&VertexDecoder::Step_TcU8Morph,
&VertexDecoder::Step_TcU16DoubleMorph,
&VertexDecoder::Step_TcFloatMorph,
};
static const StepFunction tcstep_morphToFloat[4] = {
0,
&VertexDecoder::Step_TcU8MorphToFloat,

View file

@ -514,17 +514,11 @@ public:
void Step_TcU16DoublePrescale() const;
void Step_TcFloatPrescale() const;
void Step_TcU16Double() const;
void Step_TcU16Through() const;
void Step_TcU16ThroughDouble() const;
void Step_TcU16DoubleToFloat() const;
void Step_TcU16ThroughToFloat() const;
void Step_TcU16ThroughDoubleToFloat() const;
void Step_TcFloatThrough() const;
void Step_TcU8Morph() const;
void Step_TcU16Morph() const;
void Step_TcU16DoubleMorph() const;
void Step_TcU8MorphToFloat() const;
void Step_TcU16MorphToFloat() const;
void Step_TcU16DoubleMorphToFloat() const;
@ -675,10 +669,6 @@ public:
void Jit_TcU16PrescaleMorph();
void Jit_TcFloatPrescaleMorph();
void Jit_TcU16Double();
void Jit_TcU16ThroughDouble();
void Jit_TcU16Through();
void Jit_TcU16ThroughToFloat();
void Jit_TcFloatThrough();

View file

@ -100,16 +100,13 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
{&VertexDecoder::Step_TcU16Double, &VertexDecoderJitCache::Jit_TcU16Double},
{&VertexDecoder::Step_TcU8Prescale, &VertexDecoderJitCache::Jit_TcU8Prescale},
{&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale},
{&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale},
{&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through},
{&VertexDecoder::Step_TcU16ThroughToFloat, &VertexDecoderJitCache::Jit_TcU16ThroughToFloat},
{&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough},
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
{&VertexDecoder::Step_TcU8MorphToFloat, &VertexDecoderJitCache::Jit_TcU8MorphToFloat},
{&VertexDecoder::Step_TcU16MorphToFloat, &VertexDecoderJitCache::Jit_TcU16MorphToFloat},
@ -696,15 +693,6 @@ void VertexDecoderJitCache::Jit_TcU16ToFloat() {
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), XMM3);
}
void VertexDecoderJitCache::Jit_TcU16Double() {
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->tcoff));
MOVZX(32, 16, tempReg2, MDisp(srcReg, dec_->tcoff + 2));
SHL(16, R(tempReg1), Imm8(1)); // 16 to get a wall to shift into
SHL(32, R(tempReg2), Imm8(17));
OR(32, R(tempReg1), R(tempReg2));
MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
}
void VertexDecoderJitCache::Jit_TcFloat() {
#ifdef _M_X64
MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff));
@ -851,27 +839,6 @@ void VertexDecoderJitCache::Jit_TcFloatPrescaleMorph() {
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg);
}
void VertexDecoderJitCache::Jit_TcU16Through() {
MOV(32, R(tempReg1), MDisp(srcReg, dec_->tcoff));
MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
MOV(32, R(tempReg2), R(tempReg1));
SHR(32, R(tempReg2), Imm8(16));
auto updateSide = [&](X64Reg r, CCFlags skipCC, u16 *value) {
CMP(16, R(r), M(value));
FixupBranch skip = J_CC(skipCC);
MOV(16, M(value), R(r));
SetJumpTarget(skip);
};
// TODO: Can this actually be fast? Hmm, floats aren't better.
updateSide(tempReg1, CC_GE, &gstate_c.vertBounds.minU);
updateSide(tempReg1, CC_LE, &gstate_c.vertBounds.maxU);
updateSide(tempReg2, CC_GE, &gstate_c.vertBounds.minV);
updateSide(tempReg2, CC_LE, &gstate_c.vertBounds.maxV);
}
void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() {
PXOR(fpScratchReg2, R(fpScratchReg2));
MOV(32, R(tempReg1), MDisp(srcReg, dec_->tcoff));
@ -897,15 +864,6 @@ void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() {
updateSide(tempReg2, CC_LE, &gstate_c.vertBounds.maxV);
}
void VertexDecoderJitCache::Jit_TcU16ThroughDouble() {
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->tcoff));
MOVZX(32, 16, tempReg2, MDisp(srcReg, dec_->tcoff + 2));
SHL(16, R(tempReg1), Imm8(1)); // 16 to get a wall to shift into
SHL(32, R(tempReg2), Imm8(17));
OR(32, R(tempReg1), R(tempReg2));
MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
}
void VertexDecoderJitCache::Jit_TcFloatThrough() {
#ifdef _M_X64
MOV(64, R(tempReg1), MDisp(srcReg, dec_->tcoff));