2021-11-21 08:46:15 -08:00
|
|
|
// Copyright (c) 2017- PPSSPP Project.
|
|
|
|
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU General Public License as published by
|
|
|
|
// the Free Software Foundation, version 2.0 or later versions.
|
|
|
|
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
|
|
|
|
// Official git repository and contact information can be found at
|
|
|
|
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
|
|
|
|
|
|
|
#include "ppsspp_config.h"
|
|
|
|
#if PPSSPP_ARCH(AMD64)
|
|
|
|
|
|
|
|
#include <emmintrin.h>
|
|
|
|
#include "Common/x64Emitter.h"
|
|
|
|
#include "Common/CPUDetect.h"
|
|
|
|
#include "GPU/GPUState.h"
|
|
|
|
#include "GPU/Software/DrawPixel.h"
|
2021-11-21 17:02:26 -08:00
|
|
|
#include "GPU/Software/SoftGpu.h"
|
2021-11-21 08:46:15 -08:00
|
|
|
#include "GPU/ge_constants.h"
|
|
|
|
|
|
|
|
using namespace Gen;
|
|
|
|
|
|
|
|
namespace Rasterizer {
|
|
|
|
|
2021-11-21 14:13:07 -08:00
|
|
|
#if PPSSPP_PLATFORM(WINDOWS)
|
|
|
|
static const X64Reg argXReg = RCX;
|
|
|
|
static const X64Reg argYReg = RDX;
|
|
|
|
static const X64Reg argZReg = R8;
|
|
|
|
static const X64Reg argFogReg = R9;
|
|
|
|
static const X64Reg argColorReg = XMM4;
|
|
|
|
|
|
|
|
// Must save: RBX, RSP, RBP, RDI, RSI, R12-R15, XMM6-15
|
|
|
|
#else
|
|
|
|
static const X64Reg argXReg = RDI;
|
|
|
|
static const X64Reg argYReg = RSI;
|
|
|
|
static const X64Reg argZReg = RDX;
|
|
|
|
static const X64Reg argFogReg = RCX;
|
|
|
|
static const X64Reg argColorReg = XMM0;
|
|
|
|
|
|
|
|
// Must save: RBX, RSP, RBP, R12-R15
|
|
|
|
#endif
|
|
|
|
|
2021-11-21 15:02:45 -08:00
|
|
|
// This one is the const base. Also a set of 255s.
|
|
|
|
alignas(16) static const uint16_t const255_16s[8] = { 255, 255, 255, 255, 255, 255, 255, 255 };
|
|
|
|
// This is used for a multiply that divides by 255 with shifting.
|
|
|
|
alignas(16) static const uint16_t by255i[8] = { 0x8081, 0x8081, 0x8081, 0x8081, 0x8081, 0x8081, 0x8081, 0x8081 };
|
|
|
|
|
|
|
|
template <typename T>
|
2021-11-21 17:02:26 -08:00
|
|
|
static bool Accessible(const T *t1, const T *t2) {
|
|
|
|
ptrdiff_t diff = (const uint8_t *)t1 - (const uint8_t *)t2;
|
2021-11-21 15:02:45 -08:00
|
|
|
return diff > -0x7FFFFFE0 && diff < 0x7FFFFFE0;
|
|
|
|
}
|
|
|
|
|
2021-11-21 17:02:26 -08:00
|
|
|
template <typename T>
|
|
|
|
static bool ConstAccessible(const T *t) {
|
|
|
|
return Accessible((const uint8_t *)&const255_16s[0], (const uint8_t *)t);
|
|
|
|
}
|
|
|
|
|
2021-11-21 15:02:45 -08:00
|
|
|
template <typename T>
|
|
|
|
static OpArg MConstDisp(X64Reg r, const T *t) {
|
|
|
|
_assert_(ConstAccessible(t));
|
2021-11-21 17:02:26 -08:00
|
|
|
ptrdiff_t diff = (const uint8_t *)t - (const uint8_t *)&const255_16s[0];
|
2021-11-21 15:02:45 -08:00
|
|
|
return MDisp(r, (int)diff);
|
|
|
|
}
|
|
|
|
|
2021-11-21 08:46:15 -08:00
|
|
|
SingleFunc PixelJitCache::CompileSingle(const PixelFuncID &id) {
|
2021-11-21 14:13:07 -08:00
|
|
|
// Setup the reg cache.
|
|
|
|
regCache_.Reset();
|
|
|
|
regCache_.Release(RAX, PixelRegCache::T_GEN);
|
|
|
|
regCache_.Release(R10, PixelRegCache::T_GEN);
|
|
|
|
regCache_.Release(R11, PixelRegCache::T_GEN);
|
|
|
|
regCache_.Release(XMM1, PixelRegCache::T_VEC);
|
|
|
|
regCache_.Release(XMM2, PixelRegCache::T_VEC);
|
|
|
|
regCache_.Release(XMM3, PixelRegCache::T_VEC);
|
|
|
|
regCache_.Release(XMM5, PixelRegCache::T_VEC);
|
|
|
|
|
|
|
|
#if !PPSSPP_PLATFORM(WINDOWS)
|
|
|
|
regCache_.Release(R8, PixelRegCache::T_GEN);
|
|
|
|
regCache_.Release(R9, PixelRegCache::T_GEN);
|
|
|
|
regCache_.Release(XMM4, PixelRegCache::T_VEC);
|
|
|
|
#else
|
|
|
|
regCache_.Release(XMM0, PixelRegCache::T_VEC);
|
|
|
|
#endif
|
|
|
|
|
2021-11-21 15:02:45 -08:00
|
|
|
BeginWrite();
|
|
|
|
const u8 *start = AlignCode16();
|
|
|
|
bool success = true;
|
|
|
|
|
|
|
|
// Start with the depth range.
|
|
|
|
success = success && Jit_ApplyDepthRange(id);
|
|
|
|
|
|
|
|
// Next, let's clamp the color (might affect alpha test, and everything expects it clamped.)
|
|
|
|
// We simply convert to 4x8-bit to clamp. Everything else expects color in this format.
|
|
|
|
PACKSSDW(argColorReg, R(argColorReg));
|
|
|
|
PACKUSWB(argColorReg, R(argColorReg));
|
|
|
|
|
|
|
|
success = success && Jit_AlphaTest(id);
|
|
|
|
// Fog is applied prior to color test. Maybe before alpha test too, but it doesn't affect it...
|
|
|
|
success = success && Jit_ApplyFog(id);
|
2021-11-21 16:01:55 -08:00
|
|
|
success = success && Jit_ColorTest(id);
|
2021-11-21 15:02:45 -08:00
|
|
|
|
|
|
|
// TODO: There's more...
|
|
|
|
success = false;
|
|
|
|
|
|
|
|
for (auto &fixup : discards_) {
|
|
|
|
SetJumpTarget(fixup);
|
|
|
|
}
|
|
|
|
discards_.clear();
|
|
|
|
|
|
|
|
if (!success) {
|
|
|
|
EndWrite();
|
|
|
|
ResetCodePtr(GetOffset(start));
|
|
|
|
return nullptr;
|
|
|
|
}
|
|
|
|
|
|
|
|
EndWrite();
|
|
|
|
return (SingleFunc)start;
|
|
|
|
}
|
|
|
|
|
|
|
|
PixelRegCache::Reg PixelJitCache::GetGState() {
|
|
|
|
if (!regCache_.Has(PixelRegCache::GSTATE, PixelRegCache::T_GEN)) {
|
|
|
|
X64Reg r = regCache_.Alloc(PixelRegCache::GSTATE, PixelRegCache::T_GEN);
|
|
|
|
MOV(PTRBITS, R(r), ImmPtr(&gstate.nop));
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
return regCache_.Find(PixelRegCache::GSTATE, PixelRegCache::T_GEN);
|
|
|
|
}
|
|
|
|
|
|
|
|
PixelRegCache::Reg PixelJitCache::GetConstBase() {
|
|
|
|
if (!regCache_.Has(PixelRegCache::CONST_BASE, PixelRegCache::T_GEN)) {
|
|
|
|
X64Reg r = regCache_.Alloc(PixelRegCache::CONST_BASE, PixelRegCache::T_GEN);
|
|
|
|
MOV(PTRBITS, R(r), ImmPtr(&const255_16s[0]));
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
return regCache_.Find(PixelRegCache::CONST_BASE, PixelRegCache::T_GEN);
|
|
|
|
}
|
|
|
|
|
2021-11-21 17:02:26 -08:00
|
|
|
PixelRegCache::Reg PixelJitCache::GetColorOff(const PixelFuncID &id) {
|
|
|
|
if (!regCache_.Has(PixelRegCache::COLOR_OFF, PixelRegCache::T_GEN)) {
|
|
|
|
if (id.useStandardStride) {
|
|
|
|
// In this mode, we force argXReg to the off, and throw away argYReg.
|
|
|
|
SHL(32, R(argYReg), Imm8(9));
|
|
|
|
ADD(32, R(argXReg), R(argYReg));
|
|
|
|
|
|
|
|
// Now add the pointer for the color buffer.
|
|
|
|
MOV(PTRBITS, R(argYReg), ImmPtr(&fb.data));
|
|
|
|
MOV(PTRBITS, R(argYReg), MatR(argYReg));
|
|
|
|
LEA(PTRBITS, argYReg, MComplex(argYReg, argXReg, id.FBFormat() == GE_FORMAT_8888 ? 4 : 2, 0));
|
|
|
|
// With that, argYOff is now COLOR_OFF.
|
|
|
|
regCache_.Release(argYReg, PixelRegCache::T_GEN, PixelRegCache::COLOR_OFF);
|
|
|
|
// Lock it, because we can't recalculate this.
|
|
|
|
regCache_.ForceLock(PixelRegCache::COLOR_OFF, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
// Next, also calculate the depth offset, unless we won't need it at all.
|
|
|
|
if (id.depthWrite || id.DepthTestFunc() != GE_COMP_ALWAYS) {
|
|
|
|
X64Reg temp = regCache_.Alloc(PixelRegCache::DEPTH_OFF, PixelRegCache::T_GEN);
|
|
|
|
MOV(PTRBITS, R(temp), ImmPtr(&depthbuf.data));
|
|
|
|
MOV(PTRBITS, R(temp), MatR(temp));
|
|
|
|
LEA(PTRBITS, argXReg, MComplex(temp, argXReg, 2, 0));
|
|
|
|
regCache_.Release(temp, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
// Okay, same deal - release as DEPTH_OFF and force lock.
|
|
|
|
regCache_.Release(argXReg, PixelRegCache::T_GEN, PixelRegCache::DEPTH_OFF);
|
|
|
|
regCache_.ForceLock(PixelRegCache::DEPTH_OFF, PixelRegCache::T_GEN);
|
|
|
|
} else {
|
|
|
|
regCache_.Release(argXReg, PixelRegCache::T_GEN);
|
|
|
|
}
|
|
|
|
|
|
|
|
return regCache_.Find(PixelRegCache::COLOR_OFF, PixelRegCache::T_GEN);
|
|
|
|
}
|
|
|
|
|
|
|
|
X64Reg gstateReg = GetGState();
|
|
|
|
X64Reg r = regCache_.Alloc(PixelRegCache::COLOR_OFF, PixelRegCache::T_GEN);
|
|
|
|
MOVZX(32, 16, r, MDisp(gstateReg, offsetof(GPUgstate, fbwidth)));
|
|
|
|
regCache_.Unlock(gstateReg, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
AND(32, R(r), Imm32(0x000007FC));
|
|
|
|
IMUL(32, r, R(argYReg));
|
|
|
|
ADD(32, R(r), R(argXReg));
|
|
|
|
|
|
|
|
X64Reg temp = regCache_.Alloc(PixelRegCache::TEMP_HELPER, PixelRegCache::T_GEN);
|
|
|
|
MOV(PTRBITS, R(temp), ImmPtr(&fb.data));
|
|
|
|
MOV(PTRBITS, R(temp), MatR(temp));
|
|
|
|
LEA(PTRBITS, r, MComplex(temp, r, id.FBFormat() == GE_FORMAT_8888 ? 4 : 2, 0));
|
|
|
|
regCache_.Release(temp, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
return regCache_.Find(PixelRegCache::COLOR_OFF, PixelRegCache::T_GEN);
|
|
|
|
}
|
|
|
|
|
|
|
|
PixelRegCache::Reg PixelJitCache::GetDepthOff(const PixelFuncID &id) {
|
|
|
|
if (!regCache_.Has(PixelRegCache::DEPTH_OFF, PixelRegCache::T_GEN)) {
|
|
|
|
// If both color and depth use 512, the offsets are the same.
|
|
|
|
if (id.useStandardStride) {
|
|
|
|
// Calculate once inside GetColorOff().
|
|
|
|
regCache_.Unlock(GetColorOff(id), PixelRegCache::T_GEN);
|
|
|
|
return regCache_.Find(PixelRegCache::DEPTH_OFF, PixelRegCache::T_GEN);
|
|
|
|
}
|
|
|
|
|
|
|
|
X64Reg gstateReg = GetGState();
|
|
|
|
X64Reg r = regCache_.Alloc(PixelRegCache::DEPTH_OFF, PixelRegCache::T_GEN);
|
|
|
|
MOVZX(32, 16, r, MDisp(gstateReg, offsetof(GPUgstate, zbwidth)));
|
|
|
|
regCache_.Unlock(gstateReg, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
AND(32, R(r), Imm32(0x000007FC));
|
|
|
|
IMUL(32, r, R(argYReg));
|
|
|
|
ADD(32, R(r), R(argXReg));
|
|
|
|
|
|
|
|
X64Reg temp = regCache_.Alloc(PixelRegCache::TEMP_HELPER, PixelRegCache::T_GEN);
|
|
|
|
MOV(PTRBITS, R(temp), ImmPtr(&depthbuf.data));
|
|
|
|
MOV(PTRBITS, R(temp), MatR(temp));
|
|
|
|
LEA(PTRBITS, r, MComplex(temp, r, 2, 0));
|
|
|
|
regCache_.Release(temp, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
return regCache_.Find(PixelRegCache::DEPTH_OFF, PixelRegCache::T_GEN);
|
|
|
|
}
|
|
|
|
|
2021-11-21 15:02:45 -08:00
|
|
|
void PixelJitCache::Discard(Gen::CCFlags cc) {
|
|
|
|
discards_.push_back(J_CC(cc, true));
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PixelJitCache::Jit_ApplyDepthRange(const PixelFuncID &id) {
|
|
|
|
if (id.applyDepthRange) {
|
|
|
|
X64Reg gstateReg = GetGState();
|
|
|
|
X64Reg minReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_GEN);
|
|
|
|
X64Reg maxReg = regCache_.Alloc(PixelRegCache::TEMP1, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
// Only load the lowest 16 bits of each.
|
|
|
|
MOVZX(32, 16, minReg, MDisp(gstateReg, offsetof(GPUgstate, minz)));
|
|
|
|
MOVZX(32, 16, maxReg, MDisp(gstateReg, offsetof(GPUgstate, maxz)));
|
|
|
|
|
|
|
|
CMP(32, R(argZReg), R(minReg));
|
|
|
|
Discard(CC_L);
|
|
|
|
CMP(32, R(argZReg), R(maxReg));
|
|
|
|
Discard(CC_G);
|
|
|
|
|
|
|
|
regCache_.Unlock(gstateReg, PixelRegCache::T_GEN);
|
|
|
|
regCache_.Release(minReg, PixelRegCache::T_GEN);
|
|
|
|
regCache_.Release(maxReg, PixelRegCache::T_GEN);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Since this is early on, try to free up the z reg if we don't need it anymore.
|
|
|
|
if (id.clearMode && !id.DepthClear())
|
|
|
|
regCache_.Release(argZReg, PixelRegCache::T_GEN);
|
|
|
|
else if (!id.clearMode && !id.depthWrite && id.DepthTestFunc() == GE_COMP_ALWAYS)
|
|
|
|
regCache_.Release(argZReg, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool PixelJitCache::Jit_AlphaTest(const PixelFuncID &id) {
|
2021-11-21 15:33:39 -08:00
|
|
|
// Take care of ALWAYS/NEVER first. ALWAYS is common, means disabled.
|
|
|
|
switch (id.AlphaTestFunc()) {
|
|
|
|
case GE_COMP_NEVER:
|
|
|
|
CMP(32, R(RAX), R(RAX));
|
|
|
|
Discard(CC_E);
|
2021-11-21 15:02:45 -08:00
|
|
|
return true;
|
2021-11-21 15:33:39 -08:00
|
|
|
|
|
|
|
case GE_COMP_ALWAYS:
|
|
|
|
return true;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load alpha into its own general reg.
|
|
|
|
X64Reg alphaReg;
|
|
|
|
if (regCache_.Has(PixelRegCache::ALPHA, PixelRegCache::T_GEN)) {
|
|
|
|
alphaReg = regCache_.Find(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
|
|
|
|
} else {
|
|
|
|
alphaReg = regCache_.Alloc(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
// TODO: Could do this a bit more cheaply on SSE4.1?
|
|
|
|
X64Reg zeroReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_VEC);
|
|
|
|
X64Reg colorCopyReg = regCache_.Alloc(PixelRegCache::TEMP1, PixelRegCache::T_VEC);
|
|
|
|
MOVDQA(colorCopyReg, R(argColorReg));
|
|
|
|
PXOR(zeroReg, R(zeroReg));
|
|
|
|
PUNPCKLBW(colorCopyReg, R(zeroReg));
|
|
|
|
PEXTRW(alphaReg, R(colorCopyReg), 3);
|
|
|
|
regCache_.Release(zeroReg, PixelRegCache::T_VEC);
|
|
|
|
regCache_.Release(colorCopyReg, PixelRegCache::T_VEC);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (id.hasAlphaTestMask) {
|
|
|
|
// Unfortunate, we'll need gstate to load the mask.
|
|
|
|
// Note: we leave the ALPHA purpose untouched and free it, because later code may reuse.
|
|
|
|
X64Reg gstateReg = GetGState();
|
|
|
|
X64Reg maskedReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
// The mask is >> 16, so we load + 2.
|
|
|
|
MOVZX(32, 8, maskedReg, MDisp(gstateReg, offsetof(GPUgstate, alphatest) + 2));
|
|
|
|
regCache_.Unlock(gstateReg, PixelRegCache::T_GEN);
|
|
|
|
AND(32, R(maskedReg), R(alphaReg));
|
|
|
|
regCache_.Unlock(alphaReg, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
// Okay now do the rest using the masked reg, which we modified.
|
|
|
|
alphaReg = maskedReg;
|
|
|
|
// Pre-emptively release, we don't need any other regs.
|
|
|
|
regCache_.Release(maskedReg, PixelRegCache::T_GEN);
|
|
|
|
} else {
|
|
|
|
regCache_.Unlock(alphaReg, PixelRegCache::T_GEN);
|
2021-11-21 15:02:45 -08:00
|
|
|
}
|
|
|
|
|
2021-11-21 15:33:39 -08:00
|
|
|
// We hardcode the ref into this jit func.
|
|
|
|
CMP(8, R(alphaReg), Imm8(id.alphaTestRef));
|
|
|
|
|
|
|
|
switch (id.AlphaTestFunc()) {
|
|
|
|
case GE_COMP_EQUAL:
|
|
|
|
Discard(CC_NE);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case GE_COMP_NOTEQUAL:
|
|
|
|
Discard(CC_E);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case GE_COMP_LESS:
|
|
|
|
Discard(CC_AE);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case GE_COMP_LEQUAL:
|
|
|
|
Discard(CC_A);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case GE_COMP_GREATER:
|
|
|
|
Discard(CC_BE);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case GE_COMP_GEQUAL:
|
|
|
|
Discard(CC_B);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
2021-11-21 15:02:45 -08:00
|
|
|
}
|
|
|
|
|
2021-11-21 16:01:55 -08:00
|
|
|
bool PixelJitCache::Jit_ColorTest(const PixelFuncID &id) {
|
|
|
|
if (!id.colorTest || id.clearMode)
|
|
|
|
return true;
|
|
|
|
|
|
|
|
// We'll have 4 with fog released, so we're using them all...
|
|
|
|
X64Reg gstateReg = GetGState();
|
|
|
|
X64Reg funcReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_GEN);
|
|
|
|
X64Reg maskReg = regCache_.Alloc(PixelRegCache::TEMP1, PixelRegCache::T_GEN);
|
|
|
|
X64Reg refReg = regCache_.Alloc(PixelRegCache::TEMP2, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
// First, load the registers: mask and ref.
|
|
|
|
MOV(32, R(maskReg), MDisp(gstateReg, offsetof(GPUgstate, colortestmask)));
|
|
|
|
AND(32, R(maskReg), Imm32(0x00FFFFFF));
|
|
|
|
MOV(32, R(refReg), MDisp(gstateReg, offsetof(GPUgstate, colorref)));
|
|
|
|
AND(32, R(refReg), R(maskReg));
|
|
|
|
|
|
|
|
// Temporarily abuse funcReg to grab the color into maskReg.
|
|
|
|
MOVD_xmm(R(funcReg), argColorReg);
|
|
|
|
AND(32, R(maskReg), R(funcReg));
|
|
|
|
|
|
|
|
// Now that we're setup, get the func and follow it.
|
|
|
|
MOVZX(32, 8, funcReg, MDisp(gstateReg, offsetof(GPUgstate, colortest)));
|
|
|
|
AND(32, R(funcReg), Imm32(3));
|
|
|
|
regCache_.Unlock(gstateReg, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
CMP(32, R(funcReg), Imm32(GE_COMP_ALWAYS));
|
|
|
|
// Discard for GE_COMP_NEVER...
|
|
|
|
Discard(CC_B);
|
|
|
|
FixupBranch skip = J_CC(CC_E);
|
|
|
|
|
|
|
|
CMP(32, R(funcReg), Imm32(GE_COMP_EQUAL));
|
|
|
|
FixupBranch doEqual = J_CC(CC_E);
|
|
|
|
regCache_.Release(funcReg, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
// The not equal path here... if they are equal, we discard.
|
|
|
|
CMP(32, R(refReg), R(maskReg));
|
|
|
|
Discard(CC_E);
|
|
|
|
FixupBranch skip2 = J();
|
|
|
|
|
|
|
|
SetJumpTarget(doEqual);
|
|
|
|
CMP(32, R(refReg), R(maskReg));
|
|
|
|
Discard(CC_NE);
|
|
|
|
|
|
|
|
regCache_.Release(maskReg, PixelRegCache::T_GEN);
|
|
|
|
regCache_.Release(refReg, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
SetJumpTarget(skip);
|
|
|
|
SetJumpTarget(skip2);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2021-11-21 15:02:45 -08:00
|
|
|
bool PixelJitCache::Jit_ApplyFog(const PixelFuncID &id) {
|
|
|
|
if (!id.applyFog) {
|
|
|
|
// Okay, anyone can use the fog register then.
|
|
|
|
regCache_.Release(argFogReg, PixelRegCache::T_GEN);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Load fog and expand to 16 bit. Ignore the high 8 bits, which'll match up with A.
|
|
|
|
X64Reg zeroReg = regCache_.Alloc(PixelRegCache::TEMP0, PixelRegCache::T_VEC);
|
|
|
|
X64Reg fogColorReg = regCache_.Alloc(PixelRegCache::TEMP1, PixelRegCache::T_VEC);
|
|
|
|
PXOR(zeroReg, R(zeroReg));
|
|
|
|
X64Reg gstateReg = GetGState();
|
|
|
|
MOVD_xmm(fogColorReg, MDisp(gstateReg, offsetof(GPUgstate, fogcolor)));
|
|
|
|
regCache_.Unlock(gstateReg, PixelRegCache::T_GEN);
|
|
|
|
PUNPCKLBW(fogColorReg, R(zeroReg));
|
|
|
|
|
|
|
|
// Load a set of 255s at 16 bit into a reg for later...
|
|
|
|
X64Reg invertReg = regCache_.Alloc(PixelRegCache::TEMP2, PixelRegCache::T_VEC);
|
|
|
|
X64Reg constReg = GetConstBase();
|
|
|
|
MOVDQA(invertReg, MConstDisp(constReg, &const255_16s[0]));
|
|
|
|
regCache_.Unlock(constReg, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
// Expand (we clamped) color to 16 bit as well, so we can multiply with fog.
|
|
|
|
PUNPCKLBW(argColorReg, R(zeroReg));
|
|
|
|
regCache_.Release(zeroReg, PixelRegCache::T_VEC);
|
|
|
|
|
|
|
|
// Save A so we can put it back, we don't "fog" A.
|
|
|
|
X64Reg alphaReg;
|
|
|
|
if (regCache_.Has(PixelRegCache::ALPHA, PixelRegCache::T_GEN)) {
|
|
|
|
alphaReg = regCache_.Find(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
|
|
|
|
} else {
|
|
|
|
alphaReg = regCache_.Alloc(PixelRegCache::ALPHA, PixelRegCache::T_GEN);
|
|
|
|
PEXTRW(alphaReg, R(argColorReg), 3);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Okay, let's broadcast fog to an XMM.
|
|
|
|
X64Reg fogMultReg = regCache_.Alloc(PixelRegCache::TEMP3, PixelRegCache::T_VEC);
|
|
|
|
MOVD_xmm(fogMultReg, R(argFogReg));
|
|
|
|
PSHUFLW(fogMultReg, R(fogMultReg), _MM_SHUFFLE(0, 0, 0, 0));
|
|
|
|
// We can free up the actual fog reg now.
|
|
|
|
regCache_.Release(argFogReg, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
// Now we multiply the existing color by fog...
|
|
|
|
PMULLW(argColorReg, R(fogMultReg));
|
|
|
|
// And then inverse the fog value using those 255s we loaded, and multiply by fog color.
|
|
|
|
PSUBUSW(invertReg, R(fogMultReg));
|
|
|
|
PMULLW(fogColorReg, R(invertReg));
|
|
|
|
// At this point, argColorReg and fogColorReg are multiplied at 16-bit, so we need to sum.
|
|
|
|
PADDUSW(argColorReg, R(fogColorReg));
|
|
|
|
regCache_.Release(fogColorReg, PixelRegCache::T_VEC);
|
|
|
|
regCache_.Release(fogMultReg, PixelRegCache::T_VEC);
|
|
|
|
regCache_.Release(invertReg, PixelRegCache::T_VEC);
|
|
|
|
|
|
|
|
// Now to divide by 255, we use bit tricks: multiply by 0x8081, and shift right by 16+7.
|
|
|
|
constReg = GetConstBase();
|
|
|
|
PMULHUW(argColorReg, MConstDisp(constReg, &by255i));
|
|
|
|
regCache_.Unlock(constReg, PixelRegCache::T_GEN);
|
|
|
|
// Now shift right by 7 (PMULHUW already did 16 of the shift.)
|
|
|
|
PSRLW(argColorReg, 7);
|
|
|
|
|
|
|
|
// Okay, put A back in and shrink to 8888 again.
|
|
|
|
PINSRW(argColorReg, R(alphaReg), 3);
|
|
|
|
PACKUSWB(argColorReg, R(argColorReg));
|
|
|
|
regCache_.Unlock(alphaReg, PixelRegCache::T_GEN);
|
|
|
|
|
|
|
|
return true;
|
2021-11-21 08:46:15 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
};
|
|
|
|
|
|
|
|
#endif
|