irjit: Fix Vec4Shuffle overlap issue.
This commit is contained in:
parent
e9431d0d1e
commit
e73c203984
2 changed files with 12 additions and 7 deletions
|
@ -1847,22 +1847,22 @@ namespace MIPSComp {
|
||||||
ir.Write(IROp::Vec4Neg, IRVTEMP_0, tregs[0]);
|
ir.Write(IROp::Vec4Neg, IRVTEMP_0, tregs[0]);
|
||||||
|
|
||||||
// tmp = S[x,x,x,x] * T[w,-z,y,-x]
|
// tmp = S[x,x,x,x] * T[w,-z,y,-x]
|
||||||
|
ir.Write(IRInst{ IROp::Vec4Blend, IRVTEMP_PFX_S, tregs[0], IRVTEMP_0, blendConst(1, 0, 1, 0) });
|
||||||
|
ir.Write(IROp::Vec4Shuffle, IRVTEMP_PFX_T, IRVTEMP_PFX_S, shuffleImm(3, 2, 1, 0));
|
||||||
ir.Write(IROp::Vec4Shuffle, IRVTEMP_PFX_S, sregs[0], shuffleImm(0, 0, 0, 0));
|
ir.Write(IROp::Vec4Shuffle, IRVTEMP_PFX_S, sregs[0], shuffleImm(0, 0, 0, 0));
|
||||||
ir.Write(IRInst{ IROp::Vec4Blend, IRVTEMP_PFX_T, tregs[0], IRVTEMP_0, blendConst(1, 0, 1, 0) });
|
|
||||||
ir.Write(IROp::Vec4Shuffle, IRVTEMP_PFX_T, IRVTEMP_PFX_T, shuffleImm(3, 2, 1, 0));
|
|
||||||
ir.Write(IROp::Vec4Mul, IRVTEMP_PFX_D, IRVTEMP_PFX_S, IRVTEMP_PFX_T);
|
ir.Write(IROp::Vec4Mul, IRVTEMP_PFX_D, IRVTEMP_PFX_S, IRVTEMP_PFX_T);
|
||||||
|
|
||||||
// tmp += S[y,y,y,y] * T[z,w,-x,-y]
|
// tmp += S[y,y,y,y] * T[z,w,-x,-y]
|
||||||
|
ir.Write(IRInst{ IROp::Vec4Blend, IRVTEMP_PFX_S, tregs[0], IRVTEMP_0, blendConst(1, 1, 0, 0) });
|
||||||
|
ir.Write(IROp::Vec4Shuffle, IRVTEMP_PFX_T, IRVTEMP_PFX_S, shuffleImm(2, 3, 0, 1));
|
||||||
ir.Write(IROp::Vec4Shuffle, IRVTEMP_PFX_S, sregs[0], shuffleImm(1, 1, 1, 1));
|
ir.Write(IROp::Vec4Shuffle, IRVTEMP_PFX_S, sregs[0], shuffleImm(1, 1, 1, 1));
|
||||||
ir.Write(IRInst{ IROp::Vec4Blend, IRVTEMP_PFX_T, tregs[0], IRVTEMP_0, blendConst(1, 1, 0, 0) });
|
|
||||||
ir.Write(IROp::Vec4Shuffle, IRVTEMP_PFX_T, IRVTEMP_PFX_T, shuffleImm(2, 3, 0, 1));
|
|
||||||
ir.Write(IROp::Vec4Mul, IRVTEMP_PFX_S, IRVTEMP_PFX_S, IRVTEMP_PFX_T);
|
ir.Write(IROp::Vec4Mul, IRVTEMP_PFX_S, IRVTEMP_PFX_S, IRVTEMP_PFX_T);
|
||||||
ir.Write(IROp::Vec4Add, IRVTEMP_PFX_D, IRVTEMP_PFX_D, IRVTEMP_PFX_S);
|
ir.Write(IROp::Vec4Add, IRVTEMP_PFX_D, IRVTEMP_PFX_D, IRVTEMP_PFX_S);
|
||||||
|
|
||||||
// tmp += S[z,z,z,z] * T[-y,x,w,-z]
|
// tmp += S[z,z,z,z] * T[-y,x,w,-z]
|
||||||
|
ir.Write(IRInst{ IROp::Vec4Blend, IRVTEMP_PFX_S, tregs[0], IRVTEMP_0, blendConst(0, 1, 1, 0) });
|
||||||
|
ir.Write(IROp::Vec4Shuffle, IRVTEMP_PFX_T, IRVTEMP_PFX_S, shuffleImm(1, 0, 3, 2));
|
||||||
ir.Write(IROp::Vec4Shuffle, IRVTEMP_PFX_S, sregs[0], shuffleImm(2, 2, 2, 2));
|
ir.Write(IROp::Vec4Shuffle, IRVTEMP_PFX_S, sregs[0], shuffleImm(2, 2, 2, 2));
|
||||||
ir.Write(IRInst{ IROp::Vec4Blend, IRVTEMP_PFX_T, tregs[0], IRVTEMP_0, blendConst(0, 1, 1, 0) });
|
|
||||||
ir.Write(IROp::Vec4Shuffle, IRVTEMP_PFX_T, IRVTEMP_PFX_T, shuffleImm(1, 0, 3, 2));
|
|
||||||
ir.Write(IROp::Vec4Mul, IRVTEMP_PFX_S, IRVTEMP_PFX_S, IRVTEMP_PFX_T);
|
ir.Write(IROp::Vec4Mul, IRVTEMP_PFX_S, IRVTEMP_PFX_S, IRVTEMP_PFX_T);
|
||||||
ir.Write(IROp::Vec4Add, IRVTEMP_PFX_D, IRVTEMP_PFX_D, IRVTEMP_PFX_S);
|
ir.Write(IROp::Vec4Add, IRVTEMP_PFX_D, IRVTEMP_PFX_D, IRVTEMP_PFX_S);
|
||||||
|
|
||||||
|
|
|
@ -304,11 +304,16 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, int count) {
|
||||||
}
|
}
|
||||||
|
|
||||||
case IROp::Vec4Shuffle:
|
case IROp::Vec4Shuffle:
|
||||||
|
{
|
||||||
// Can't use the SSE shuffle here because it takes an immediate. pshufb with a table would work though,
|
// Can't use the SSE shuffle here because it takes an immediate. pshufb with a table would work though,
|
||||||
// or a big switch - there are only 256 shuffles possible (4^4)
|
// or a big switch - there are only 256 shuffles possible (4^4)
|
||||||
|
float temp[4];
|
||||||
for (int i = 0; i < 4; i++)
|
for (int i = 0; i < 4; i++)
|
||||||
mips->f[inst->dest + i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)];
|
temp[i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)];
|
||||||
|
for (int i = 0; i < 4; i++)
|
||||||
|
mips->f[inst->dest + i] = temp[i];
|
||||||
break;
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
case IROp::Vec4Blend:
|
case IROp::Vec4Blend:
|
||||||
// Could use _mm_blendv_ps (SSE4+BMI), vbslq_f32 (ARM), __riscv_vmerge_vvm (RISC-V)
|
// Could use _mm_blendv_ps (SSE4+BMI), vbslq_f32 (ARM), __riscv_vmerge_vvm (RISC-V)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue