softgpu: Cache logicOp in draw pixel state.

This commit is contained in:
Unknown W. Brackets 2022-01-15 11:17:43 -08:00
parent c0d548846f
commit acad2640dd
3 changed files with 22 additions and 22 deletions

View file

@ -482,7 +482,7 @@ void SOFTRAST_CALL DrawSinglePixel(int x, int y, int z, int fog, Vec4IntArg colo
// Logic ops are applied after blending (if blending is enabled.) // Logic ops are applied after blending (if blending is enabled.)
if (pixelID.applyLogicOp && !clearMode) { if (pixelID.applyLogicOp && !clearMode) {
// Logic ops don't affect stencil, which happens inside ApplyLogicOp. // Logic ops don't affect stencil, which happens inside ApplyLogicOp.
new_color = ApplyLogicOp(gstate.getLogicOp(), old_color, new_color); new_color = ApplyLogicOp(pixelID.cached.logicOp, old_color, new_color);
} }
if (clearMode) { if (clearMode) {

View file

@ -250,10 +250,10 @@ RegCache::Reg PixelJitCache::GetColorOff(const PixelFuncID &id) {
MOV(32, R(r), R(argYReg)); MOV(32, R(r), R(argYReg));
SHL(32, R(r), Imm8(9)); SHL(32, R(r), Imm8(9));
} else { } else {
if (regCache_.Has(RegCache::GEN_ARG_ID)) { if (regCache_.Has(RegCache::GEN_ARG_ID) || regCache_.Has(RegCache::GEN_ID)) {
X64Reg idReg = regCache_.Find(RegCache::GEN_ARG_ID); X64Reg idReg = GetPixelID();
MOVZX(32, 16, r, MDisp(idReg, offsetof(PixelFuncID, cached.framebufStride))); MOVZX(32, 16, r, MDisp(idReg, offsetof(PixelFuncID, cached.framebufStride)));
regCache_.Unlock(idReg, RegCache::GEN_ARG_ID); UnlockPixelID(idReg);
} else { } else {
_assert_(stackIDOffset_ != -1); _assert_(stackIDOffset_ != -1);
MOV(PTRBITS, R(r), MDisp(RSP, stackIDOffset_)); MOV(PTRBITS, R(r), MDisp(RSP, stackIDOffset_));
@ -300,10 +300,10 @@ RegCache::Reg PixelJitCache::GetDepthOff(const PixelFuncID &id) {
MOV(32, R(r), R(argYReg)); MOV(32, R(r), R(argYReg));
SHL(32, R(r), Imm8(9)); SHL(32, R(r), Imm8(9));
} else { } else {
if (regCache_.Has(RegCache::GEN_ARG_ID)) { if (regCache_.Has(RegCache::GEN_ARG_ID) || regCache_.Has(RegCache::GEN_ID)) {
X64Reg idReg = regCache_.Find(RegCache::GEN_ARG_ID); X64Reg idReg = GetPixelID();
MOVZX(32, 16, r, MDisp(idReg, offsetof(PixelFuncID, cached.depthbufStride))); MOVZX(32, 16, r, MDisp(idReg, offsetof(PixelFuncID, cached.depthbufStride)));
regCache_.Unlock(idReg, RegCache::GEN_ARG_ID); UnlockPixelID(idReg);
} else { } else {
_assert_(stackIDOffset_ != -1); _assert_(stackIDOffset_ != -1);
MOV(PTRBITS, R(r), MDisp(RSP, stackIDOffset_)); MOV(PTRBITS, R(r), MDisp(RSP, stackIDOffset_));
@ -1509,10 +1509,10 @@ bool PixelJitCache::Jit_Dither(const PixelFuncID &id) {
LEA(32, valueReg, MComplex(argXReg, valueReg, 4, offsetof(PixelFuncID, cached.ditherMatrix))); LEA(32, valueReg, MComplex(argXReg, valueReg, 4, offsetof(PixelFuncID, cached.ditherMatrix)));
// Okay, now abuse argXReg to read the PixelFuncID pointer on the stack. // Okay, now abuse argXReg to read the PixelFuncID pointer on the stack.
if (regCache_.Has(RegCache::GEN_ARG_ID)) { if (regCache_.Has(RegCache::GEN_ARG_ID) || regCache_.Has(RegCache::GEN_ID)) {
X64Reg idReg = regCache_.Find(RegCache::GEN_ARG_ID); X64Reg idReg = GetPixelID();
MOVSX(32, 8, valueReg, MRegSum(idReg, valueReg)); MOVSX(32, 8, valueReg, MRegSum(idReg, valueReg));
regCache_.Unlock(idReg, RegCache::GEN_ARG_ID); UnlockPixelID(idReg);
} else { } else {
_assert_(stackIDOffset_ != -1); _assert_(stackIDOffset_ != -1);
MOV(PTRBITS, R(argXReg), MDisp(RSP, stackIDOffset_)); MOV(PTRBITS, R(argXReg), MDisp(RSP, stackIDOffset_));
@ -1672,10 +1672,10 @@ bool PixelJitCache::Jit_WriteColor(const PixelFuncID &id) {
if (id.applyColorWriteMask) { if (id.applyColorWriteMask) {
maskReg = regCache_.Alloc(RegCache::GEN_TEMP3); maskReg = regCache_.Alloc(RegCache::GEN_TEMP3);
// Load the pre-converted and combined write mask. // Load the pre-converted and combined write mask.
if (regCache_.Has(RegCache::GEN_ARG_ID)) { if (regCache_.Has(RegCache::GEN_ARG_ID) || regCache_.Has(RegCache::GEN_ID)) {
X64Reg idReg = regCache_.Find(RegCache::GEN_ARG_ID); X64Reg idReg = GetPixelID();
MOV(32, R(maskReg), MDisp(idReg, offsetof(PixelFuncID, cached.colorWriteMask))); MOV(32, R(maskReg), MDisp(idReg, offsetof(PixelFuncID, cached.colorWriteMask)));
regCache_.Unlock(idReg, RegCache::GEN_ARG_ID); UnlockPixelID(idReg);
} else { } else {
_assert_(stackIDOffset_ != -1); _assert_(stackIDOffset_ != -1);
MOV(PTRBITS, R(maskReg), MDisp(RSP, stackIDOffset_)); MOV(PTRBITS, R(maskReg), MDisp(RSP, stackIDOffset_));
@ -1758,17 +1758,16 @@ bool PixelJitCache::Jit_WriteColor(const PixelFuncID &id) {
bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg maskReg) { bool PixelJitCache::Jit_ApplyLogicOp(const PixelFuncID &id, RegCache::Reg colorReg, RegCache::Reg maskReg) {
Describe("LogicOp"); Describe("LogicOp");
X64Reg logicOpReg = INVALID_REG; X64Reg logicOpReg = regCache_.Alloc(RegCache::GEN_TEMP4);
if (RipAccessible(&gstate.lop)) { if (regCache_.Has(RegCache::GEN_ARG_ID) || regCache_.Has(RegCache::GEN_ID)) {
logicOpReg = regCache_.Alloc(RegCache::GEN_TEMP4); X64Reg idReg = GetPixelID();
MOVZX(32, 8, logicOpReg, M(&gstate.lop)); MOVZX(32, 8, logicOpReg, MDisp(idReg, offsetof(PixelFuncID, cached.logicOp)));
UnlockPixelID(idReg);
} else { } else {
X64Reg gstateReg = GetGState(); _assert_(stackIDOffset_ != -1);
logicOpReg = regCache_.Alloc(RegCache::GEN_TEMP4); MOV(PTRBITS, R(logicOpReg), MDisp(RSP, stackIDOffset_));
MOVZX(32, 8, logicOpReg, MDisp(gstateReg, offsetof(GPUgstate, lop))); MOVZX(32, 8, logicOpReg, MDisp(logicOpReg, offsetof(PixelFuncID, cached.logicOp)));
regCache_.Unlock(gstateReg, RegCache::GEN_GSTATE);
} }
AND(8, R(logicOpReg), Imm8(0x0F));
X64Reg stencilReg = INVALID_REG; X64Reg stencilReg = INVALID_REG;
if (regCache_.Has(RegCache::GEN_STENCIL)) if (regCache_.Has(RegCache::GEN_STENCIL))

View file

@ -56,6 +56,7 @@ struct PixelFuncID {
int maxz; int maxz;
uint16_t framebufStride; uint16_t framebufStride;
uint16_t depthbufStride; uint16_t depthbufStride;
GELogicOp logicOp;
} cached; } cached;
union { union {