Compare commits
6 commits
master
...
remove-har
Author | SHA1 | Date | |
---|---|---|---|
|
64c2b84d5b | ||
|
c8d6d68aac | ||
|
55b1c8919a | ||
|
8fab44902a | ||
|
f8167d442f | ||
|
58454894eb |
41 changed files with 127 additions and 905 deletions
|
@ -879,7 +879,6 @@ static ConfigSetting graphicsSettings[] = {
|
|||
ConfigSetting("SoftwareRenderer", &g_Config.bSoftwareRendering, false, true, true),
|
||||
ConfigSetting("SoftwareRendererJit", &g_Config.bSoftwareRenderingJit, true, true, true),
|
||||
ReportedConfigSetting("HardwareTransform", &g_Config.bHardwareTransform, true, true, true),
|
||||
ReportedConfigSetting("SoftwareSkinning", &g_Config.bSoftwareSkinning, true, true, true),
|
||||
ReportedConfigSetting("TextureFiltering", &g_Config.iTexFiltering, 1, true, true),
|
||||
ReportedConfigSetting("BufferFiltering", &g_Config.iBufFilter, SCALE_LINEAR, true, true),
|
||||
ReportedConfigSetting("InternalResolution", &g_Config.iInternalResolution, &DefaultInternalResolution, true, true),
|
||||
|
|
|
@ -162,7 +162,6 @@ public:
|
|||
bool bSoftwareRendering;
|
||||
bool bSoftwareRenderingJit;
|
||||
bool bHardwareTransform; // only used in the GLES backend
|
||||
bool bSoftwareSkinning;
|
||||
bool bVendorBugChecksEnabled;
|
||||
bool bUseGeometryShader;
|
||||
|
||||
|
|
|
@ -180,11 +180,10 @@ void DrawEngineCommon::NotifyConfigChanged() {
|
|||
|
||||
useHWTransform_ = g_Config.bHardwareTransform;
|
||||
useHWTessellation_ = UpdateUseHWTessellation(g_Config.bHardwareTessellation);
|
||||
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
|
||||
}
|
||||
|
||||
u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize) {
|
||||
const u32 vertTypeID = GetVertTypeID(vertType, gstate.getUVGenMode(), decOptions_.applySkinInDecode);
|
||||
const u32 vertTypeID = GetVertTypeID(vertType, gstate.getUVGenMode(), true);
|
||||
VertexDecoder *dec = GetVertexDecoder(vertTypeID);
|
||||
if (vertexSize)
|
||||
*vertexSize = dec->VertexSize();
|
||||
|
@ -232,7 +231,7 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex
|
|||
}
|
||||
|
||||
int bytesRead;
|
||||
uint32_t vertTypeID = GetVertTypeID(vtype, 0, decOptions_.applySkinInDecode);
|
||||
uint32_t vertTypeID = GetVertTypeID(vtype, 0, true);
|
||||
SubmitPrim(&temp[0], nullptr, prim, vertexCount, vertTypeID, cullMode, &bytesRead);
|
||||
DispatchFlush();
|
||||
|
||||
|
@ -281,10 +280,7 @@ bool DrawEngineCommon::TestBoundingBox(const void *control_points, const void *i
|
|||
}
|
||||
|
||||
// Force software skinning.
|
||||
bool wasApplyingSkinInDecode = decOptions_.applySkinInDecode;
|
||||
decOptions_.applySkinInDecode = true;
|
||||
NormalizeVertices((u8 *)corners, temp_buffer, (const u8 *)control_points, indexLowerBound, indexUpperBound, vertType);
|
||||
decOptions_.applySkinInDecode = wasApplyingSkinInDecode;
|
||||
|
||||
IndexConverter conv(vertType, inds);
|
||||
for (int i = 0; i < vertexCount; i++) {
|
||||
|
@ -499,8 +495,7 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
|
|||
// implementation of the vertex decoder.
|
||||
dec->DecodeVerts(bufPtr, inPtr, lowerBound, upperBound);
|
||||
|
||||
// OK, morphing eliminated but bones still remain to be taken care of.
|
||||
// Let's do a partial software transform where we only do skinning.
|
||||
// Morph and skin are both removed during decode now.
|
||||
|
||||
VertexReader reader(bufPtr, dec->GetDecVtxFmt(), vertType);
|
||||
|
||||
|
@ -513,80 +508,29 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
|
|||
(u8)gstate.getMaterialAmbientA(),
|
||||
};
|
||||
|
||||
// Let's have two separate loops, one for non skinning and one for skinning.
|
||||
if (!dec->skinInDecode && (vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) {
|
||||
int numBoneWeights = vertTypeGetNumBoneWeights(vertType);
|
||||
for (int i = lowerBound; i <= upperBound; i++) {
|
||||
reader.Goto(i - lowerBound);
|
||||
SimpleVertex &sv = sverts[i];
|
||||
if (vertType & GE_VTYPE_TC_MASK) {
|
||||
reader.ReadUV(sv.uv);
|
||||
}
|
||||
|
||||
if (vertType & GE_VTYPE_COL_MASK) {
|
||||
reader.ReadColor0_8888(sv.color);
|
||||
} else {
|
||||
memcpy(sv.color, defaultColor, 4);
|
||||
}
|
||||
|
||||
float nrm[3], pos[3];
|
||||
float bnrm[3], bpos[3];
|
||||
|
||||
if (vertType & GE_VTYPE_NRM_MASK) {
|
||||
// Normals are generated during tessellation anyway, not sure if any need to supply
|
||||
reader.ReadNrm(nrm);
|
||||
} else {
|
||||
nrm[0] = 0;
|
||||
nrm[1] = 0;
|
||||
nrm[2] = 1.0f;
|
||||
}
|
||||
reader.ReadPos(pos);
|
||||
|
||||
// Apply skinning transform directly
|
||||
float weights[8];
|
||||
reader.ReadWeights(weights);
|
||||
// Skinning
|
||||
Vec3Packedf psum(0, 0, 0);
|
||||
Vec3Packedf nsum(0, 0, 0);
|
||||
for (int w = 0; w < numBoneWeights; w++) {
|
||||
if (weights[w] != 0.0f) {
|
||||
Vec3ByMatrix43(bpos, pos, gstate.boneMatrix + w * 12);
|
||||
Vec3Packedf tpos(bpos);
|
||||
psum += tpos * weights[w];
|
||||
|
||||
Norm3ByMatrix43(bnrm, nrm, gstate.boneMatrix + w * 12);
|
||||
Vec3Packedf tnorm(bnrm);
|
||||
nsum += tnorm * weights[w];
|
||||
}
|
||||
}
|
||||
sv.pos = psum;
|
||||
sv.nrm = nsum;
|
||||
for (int i = lowerBound; i <= upperBound; i++) {
|
||||
reader.Goto(i - lowerBound);
|
||||
SimpleVertex &sv = sverts[i];
|
||||
if (vertType & GE_VTYPE_TC_MASK) {
|
||||
reader.ReadUV(sv.uv);
|
||||
} else {
|
||||
sv.uv[0] = 0.0f; // This will get filled in during tessellation
|
||||
sv.uv[1] = 0.0f;
|
||||
}
|
||||
} else {
|
||||
for (int i = lowerBound; i <= upperBound; i++) {
|
||||
reader.Goto(i - lowerBound);
|
||||
SimpleVertex &sv = sverts[i];
|
||||
if (vertType & GE_VTYPE_TC_MASK) {
|
||||
reader.ReadUV(sv.uv);
|
||||
} else {
|
||||
sv.uv[0] = 0.0f; // This will get filled in during tessellation
|
||||
sv.uv[1] = 0.0f;
|
||||
}
|
||||
if (vertType & GE_VTYPE_COL_MASK) {
|
||||
reader.ReadColor0_8888(sv.color);
|
||||
} else {
|
||||
memcpy(sv.color, defaultColor, 4);
|
||||
}
|
||||
if (vertType & GE_VTYPE_NRM_MASK) {
|
||||
// Normals are generated during tessellation anyway, not sure if any need to supply
|
||||
reader.ReadNrm((float *)&sv.nrm);
|
||||
} else {
|
||||
sv.nrm.x = 0.0f;
|
||||
sv.nrm.y = 0.0f;
|
||||
sv.nrm.z = 1.0f;
|
||||
}
|
||||
reader.ReadPos((float *)&sv.pos);
|
||||
if (vertType & GE_VTYPE_COL_MASK) {
|
||||
reader.ReadColor0_8888(sv.color);
|
||||
} else {
|
||||
memcpy(sv.color, defaultColor, 4);
|
||||
}
|
||||
if (vertType & GE_VTYPE_NRM_MASK) {
|
||||
// Normals are generated during tessellation anyway, not sure if any need to supply
|
||||
reader.ReadNrm((float *)&sv.nrm);
|
||||
} else {
|
||||
sv.nrm.x = 0.0f;
|
||||
sv.nrm.y = 0.0f;
|
||||
sv.nrm.z = 1.0f;
|
||||
}
|
||||
reader.ReadPos((float *)&sv.pos);
|
||||
}
|
||||
|
||||
// Okay, there we are! Return the new type (but keep the index bits)
|
||||
|
@ -836,7 +780,7 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
|
|||
numDrawCalls++;
|
||||
vertexCountInDrawCalls_ += vertexCount;
|
||||
|
||||
if (decOptions_.applySkinInDecode && (vertTypeID & GE_VTYPE_WEIGHT_MASK)) {
|
||||
if (vertTypeID & GE_VTYPE_WEIGHT_MASK) {
|
||||
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
|
||||
decodeCounter_++;
|
||||
}
|
||||
|
|
|
@ -31,6 +31,7 @@
|
|||
#include "GPU/Common/ShaderId.h"
|
||||
#include "GPU/Common/ShaderUniforms.h"
|
||||
#include "GPU/Common/FragmentShaderGenerator.h"
|
||||
#include "GPU/Vulkan/DrawEngineVulkan.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/GPUState.h"
|
||||
|
||||
|
@ -185,23 +186,23 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
|
|||
WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
|
||||
}
|
||||
|
||||
WRITE(p, "layout (std140, set = 1, binding = 3) uniform baseUBO {\n%s};\n", ub_baseStr);
|
||||
WRITE(p, "layout (std140, set = 1, binding = %d) uniform baseUBO {\n%s};\n", DRAW_BINDING_DYNUBO_BASE, ub_baseStr);
|
||||
if (doTexture) {
|
||||
WRITE(p, "layout (set = 1, binding = 0) uniform %s%s tex;\n", texture3D ? "sampler3D" : "sampler2D", arrayTexture ? "Array" : "");
|
||||
WRITE(p, "layout (set = 1, binding = %d) uniform %s%s tex;\n", DRAW_BINDING_TEXTURE, texture3D ? "sampler3D" : "sampler2D", arrayTexture ? "Array" : "");
|
||||
}
|
||||
|
||||
if (readFramebufferTex) {
|
||||
// The framebuffer texture is always bound as an array.
|
||||
p.C("layout (set = 1, binding = 1) uniform sampler2DArray fbotex;\n");
|
||||
p.F("layout (set = 1, binding = %d) uniform sampler2DArray fbotex;\n", DRAW_BINDING_2ND_TEXTURE);
|
||||
} else if (fetchFramebuffer) {
|
||||
p.C("layout (input_attachment_index = 0, set = 1, binding = 9) uniform subpassInput inputColor;\n");
|
||||
p.F("layout (input_attachment_index = 0, set = 1, binding = %d) uniform subpassInput inputColor;\n", DRAW_BINDING_INPUT_ATTACHMENT);
|
||||
if (fragmentShaderFlags) {
|
||||
*fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT;
|
||||
}
|
||||
}
|
||||
|
||||
if (shaderDepalMode != ShaderDepalMode::OFF) {
|
||||
WRITE(p, "layout (set = 1, binding = 2) uniform sampler2D pal;\n");
|
||||
WRITE(p, "layout (set = 1, binding = %d) uniform sampler2D pal;\n", DRAW_BINDING_DEPAL_TEXTURE);
|
||||
}
|
||||
|
||||
// Note: the precision qualifiers must match the vertex shader!
|
||||
|
|
|
@ -90,11 +90,7 @@ enum : uint64_t {
|
|||
DIRTY_MIPBIAS = 1ULL << 37,
|
||||
DIRTY_LIGHT_CONTROL = 1ULL << 38,
|
||||
|
||||
// space for 1 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS.
|
||||
|
||||
DIRTY_BONE_UNIFORMS = 0xFF000000ULL,
|
||||
|
||||
DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFFULL,
|
||||
DIRTY_ALL_UNIFORMS = 0x7F00FFFFFFULL, // 00 is where bone uniforms used to be.
|
||||
DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3,
|
||||
|
||||
// Other dirty elements that aren't uniforms!
|
||||
|
|
|
@ -35,7 +35,6 @@ std::string VertexShaderDesc(const VShaderID &id) {
|
|||
int ls1 = id.Bits(VS_BIT_LS1, 2);
|
||||
|
||||
if (uvgMode) desc << uvgModes[uvgMode];
|
||||
if (id.Bit(VS_BIT_ENABLE_BONES)) desc << "Bones:" << (id.Bits(VS_BIT_BONES, 3) + 1) << " ";
|
||||
// Lights
|
||||
if (id.Bit(VS_BIT_LIGHTING_ENABLE)) {
|
||||
desc << "Light: ";
|
||||
|
@ -51,7 +50,6 @@ std::string VertexShaderDesc(const VShaderID &id) {
|
|||
}
|
||||
}
|
||||
if (id.Bits(VS_BIT_MATERIAL_UPDATE, 3)) desc << "MatUp:" << id.Bits(VS_BIT_MATERIAL_UPDATE, 3) << " ";
|
||||
if (id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2)) desc << "WScale " << id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2) << " ";
|
||||
if (id.Bit(VS_BIT_FLATSHADE)) desc << "Flat ";
|
||||
|
||||
if (id.Bit(VS_BIT_BEZIER)) desc << "Bezier ";
|
||||
|
@ -117,16 +115,6 @@ void ComputeVertexShaderID(VShaderID *id_out, u32 vertType, bool useHWTransform,
|
|||
id.SetBits(VS_BIT_LS1, 2, gstate.getUVLS1());
|
||||
}
|
||||
|
||||
// Bones.
|
||||
bool enableBones = !useSkinInDecode && vertTypeIsSkinningEnabled(vertType);
|
||||
id.SetBit(VS_BIT_ENABLE_BONES, enableBones);
|
||||
if (enableBones) {
|
||||
id.SetBits(VS_BIT_BONES, 3, TranslateNumBones(vertTypeGetNumBoneWeights(vertType)) - 1);
|
||||
// 2 bits. We should probably send in the weight scalefactor as a uniform instead,
|
||||
// or simply preconvert all weights to floats.
|
||||
id.SetBits(VS_BIT_WEIGHT_FMTSCALE, 2, weightsAsFloat ? 0 : (vertType & GE_VTYPE_WEIGHT_MASK) >> GE_VTYPE_WEIGHT_SHIFT);
|
||||
}
|
||||
|
||||
if (gstate.isLightingEnabled()) {
|
||||
// doShadeMapping is stored as UVGenMode, and light type doesn't matter for shade mapping.
|
||||
id.SetBit(VS_BIT_LIGHTING_ENABLE);
|
||||
|
|
|
@ -29,9 +29,7 @@ enum VShaderBit : uint8_t {
|
|||
VS_BIT_UVPROJ_MODE = 18, // 2, can overlap with LS0
|
||||
VS_BIT_LS0 = 18, // 2
|
||||
VS_BIT_LS1 = 20, // 2
|
||||
VS_BIT_BONES = 22, // 3 should be enough, not 8
|
||||
// 25 - 29 are free.
|
||||
VS_BIT_ENABLE_BONES = 30,
|
||||
// 21 - 30 are free.
|
||||
|
||||
// If this is set along with LIGHTING_ENABLE, all other lighting bits below
|
||||
// are passed to the shader directly instead.
|
||||
|
@ -52,8 +50,7 @@ enum VShaderBit : uint8_t {
|
|||
VS_BIT_LIGHT2_ENABLE = 54,
|
||||
VS_BIT_LIGHT3_ENABLE = 55,
|
||||
VS_BIT_LIGHTING_ENABLE = 56,
|
||||
VS_BIT_WEIGHT_FMTSCALE = 57, // only two bits
|
||||
// 59 - 61 are free.
|
||||
// 57 - 61 are free.
|
||||
VS_BIT_FLATSHADE = 62, // 1 bit
|
||||
VS_BIT_BEZIER = 63, // 1 bit
|
||||
// No more free
|
||||
|
|
|
@ -337,11 +337,3 @@ void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms) {
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
|
||||
ConvertMatrix4x3To3x4Transposed(ub->bones[i], gstate.boneMatrix + 12 * i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -120,17 +120,6 @@ R"( vec4 u_ambient;
|
|||
vec3 u_lightspecular3;
|
||||
)";
|
||||
|
||||
// With some cleverness, we could get away with uploading just half this when only the four or five first
|
||||
// bones are being used. This is 384b.
|
||||
struct alignas(16) UB_VS_Bones {
|
||||
float bones[8][12];
|
||||
};
|
||||
|
||||
static const char * const ub_vs_bonesStr =
|
||||
R"( mat3x4 u_bone0; mat3x4 u_bone1; mat3x4 u_bone2; mat3x4 u_bone3; mat3x4 u_bone4; mat3x4 u_bone5; mat3x4 u_bone6; mat3x4 u_bone7; mat3x4 u_bone8;
|
||||
)";
|
||||
|
||||
|
||||
static const char * const ub_frameStr =
|
||||
R"(
|
||||
float u_rotation;
|
||||
|
@ -145,7 +134,6 @@ void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bo
|
|||
|
||||
void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport, bool useBufferedRendering);
|
||||
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms);
|
||||
void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms);
|
||||
void FrameUpdateUniforms(UB_Frame *ub, bool useBufferedRendering);
|
||||
|
||||
uint32_t PackLightControlBits();
|
||||
|
|
|
@ -507,7 +507,7 @@ void DrawEngineCommon::SubmitCurve(const void *control_points, const void *indic
|
|||
if (indices)
|
||||
GetIndexBounds(indices, num_points, vertType, &index_lower_bound, &index_upper_bound);
|
||||
|
||||
VertexDecoder *origVDecoder = GetVertexDecoder(GetVertTypeID(vertType, gstate.getUVGenMode(), decOptions_.applySkinInDecode));
|
||||
VertexDecoder *origVDecoder = GetVertexDecoder(GetVertTypeID(vertType, gstate.getUVGenMode(), true));
|
||||
*bytesRead = num_points * origVDecoder->VertexSize();
|
||||
|
||||
// Simplify away bones and morph before proceeding
|
||||
|
@ -572,7 +572,7 @@ void DrawEngineCommon::SubmitCurve(const void *control_points, const void *indic
|
|||
gstate_c.uv.vOff = 0;
|
||||
}
|
||||
|
||||
uint32_t vertTypeID = GetVertTypeID(vertTypeWithIndex16, gstate.getUVGenMode(), decOptions_.applySkinInDecode);
|
||||
uint32_t vertTypeID = GetVertTypeID(vertTypeWithIndex16, gstate.getUVGenMode(), true);
|
||||
int generatedBytesRead;
|
||||
if (output.count)
|
||||
DispatchSubmitPrim(output.vertices, output.indices, PatchPrimToPrim(surface.primType), output.count, vertTypeID, gstate.getCullMode(), &generatedBytesRead);
|
||||
|
|
|
@ -103,9 +103,6 @@ static const ARMReg srcNEON = Q2;
|
|||
static const ARMReg accNEON = Q3;
|
||||
|
||||
static const JitLookup jitLookup[] = {
|
||||
{&VertexDecoder::Step_WeightsU8, &VertexDecoderJitCache::Jit_WeightsU8},
|
||||
{&VertexDecoder::Step_WeightsU16, &VertexDecoderJitCache::Jit_WeightsU16},
|
||||
{&VertexDecoder::Step_WeightsFloat, &VertexDecoderJitCache::Jit_WeightsFloat},
|
||||
{&VertexDecoder::Step_WeightsU8Skin, &VertexDecoderJitCache::Jit_WeightsU8Skin},
|
||||
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
|
||||
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
|
||||
|
@ -296,55 +293,6 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
|||
return (JittedVertexDecoder)start;
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsU8() {
|
||||
// Basic implementation - a byte at a time. TODO: Optimize
|
||||
int j;
|
||||
for (j = 0; j < dec_->nweights; j++) {
|
||||
LDRB(tempReg1, srcReg, dec_->weightoff + j);
|
||||
STRB(tempReg1, dstReg, dec_->decFmt.w0off + j);
|
||||
}
|
||||
if (j & 3) {
|
||||
// Create a zero register. Might want to make a fixed one.
|
||||
EOR(scratchReg, scratchReg, scratchReg);
|
||||
}
|
||||
while (j & 3) {
|
||||
STRB(scratchReg, dstReg, dec_->decFmt.w0off + j);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsU16() {
|
||||
// Basic implementation - a short at a time. TODO: Optimize
|
||||
int j;
|
||||
for (j = 0; j < dec_->nweights; j++) {
|
||||
LDRH(tempReg1, srcReg, dec_->weightoff + j * 2);
|
||||
STRH(tempReg1, dstReg, dec_->decFmt.w0off + j * 2);
|
||||
}
|
||||
if (j & 3) {
|
||||
// Create a zero register. Might want to make a fixed one.
|
||||
EOR(scratchReg, scratchReg, scratchReg);
|
||||
}
|
||||
while (j & 3) {
|
||||
STRH(scratchReg, dstReg, dec_->decFmt.w0off + j * 2);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsFloat() {
|
||||
int j;
|
||||
for (j = 0; j < dec_->nweights; j++) {
|
||||
LDR(tempReg1, srcReg, dec_->weightoff + j * 4);
|
||||
STR(tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
|
||||
}
|
||||
if (j & 3) {
|
||||
EOR(tempReg1, tempReg1, tempReg1);
|
||||
}
|
||||
while (j & 3) { // Zero additional weights rounding up to 4.
|
||||
STR(tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
static const ARMReg weightRegs[8] = { S8, S9, S10, S11, S12, S13, S14, S15 };
|
||||
static const ARMReg neonWeightRegsD[4] = { D4, D5, D6, D7 };
|
||||
static const ARMReg neonWeightRegsQ[2] = { Q2, Q3 };
|
||||
|
|
|
@ -80,9 +80,6 @@ static const ARM64Reg neonWeightRegsQ[2] = { Q3, Q2 }; // reverse order to prev
|
|||
// Q16+ are free-for-all for matrices. In 16 registers, we can fit 4 4x4 matrices.
|
||||
|
||||
static const JitLookup jitLookup[] = {
|
||||
{&VertexDecoder::Step_WeightsU8, &VertexDecoderJitCache::Jit_WeightsU8},
|
||||
{&VertexDecoder::Step_WeightsU16, &VertexDecoderJitCache::Jit_WeightsU16},
|
||||
{&VertexDecoder::Step_WeightsFloat, &VertexDecoderJitCache::Jit_WeightsFloat},
|
||||
{&VertexDecoder::Step_WeightsU8Skin, &VertexDecoderJitCache::Jit_WeightsU8Skin},
|
||||
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
|
||||
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
|
||||
|
@ -350,44 +347,6 @@ void VertexDecoderJitCache::Jit_ApplyWeights() {
|
|||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsU8() {
|
||||
// Basic implementation - a byte at a time. TODO: Optimize
|
||||
int j;
|
||||
for (j = 0; j < dec_->nweights; j++) {
|
||||
LDRB(INDEX_UNSIGNED, tempReg1, srcReg, dec_->weightoff + j);
|
||||
STRB(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.w0off + j);
|
||||
}
|
||||
while (j & 3) {
|
||||
STRB(INDEX_UNSIGNED, WZR, dstReg, dec_->decFmt.w0off + j);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsU16() {
|
||||
// Basic implementation - a short at a time. TODO: Optimize
|
||||
int j;
|
||||
for (j = 0; j < dec_->nweights; j++) {
|
||||
LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->weightoff + j * 2);
|
||||
STRH(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.w0off + j * 2);
|
||||
}
|
||||
while (j & 3) {
|
||||
STRH(INDEX_UNSIGNED, WZR, dstReg, dec_->decFmt.w0off + j * 2);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsFloat() {
|
||||
int j;
|
||||
for (j = 0; j < dec_->nweights; j++) {
|
||||
LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->weightoff + j * 4);
|
||||
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
|
||||
}
|
||||
while (j & 3) { // Zero additional weights rounding up to 4.
|
||||
STR(INDEX_UNSIGNED, WZR, dstReg, dec_->decFmt.w0off + j * 4);
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsU8Skin() {
|
||||
// Weight is first so srcReg is correct.
|
||||
switch (dec_->nweights) {
|
||||
|
|
|
@ -176,67 +176,6 @@ void PrintDecodedVertex(VertexReader &vtx) {
|
|||
printf("P: %f %f %f\n", pos[0], pos[1], pos[2]);
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_WeightsU8() const
|
||||
{
|
||||
u8 *wt = (u8 *)(decoded_ + decFmt.w0off);
|
||||
const u8 *wdata = (const u8*)(ptr_);
|
||||
int j;
|
||||
for (j = 0; j < nweights; j++)
|
||||
wt[j] = wdata[j];
|
||||
while (j & 3) // Zero additional weights rounding up to 4.
|
||||
wt[j++] = 0;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_WeightsU16() const
|
||||
{
|
||||
u16 *wt = (u16 *)(decoded_ + decFmt.w0off);
|
||||
const u16_le *wdata = (const u16_le *)(ptr_);
|
||||
int j;
|
||||
for (j = 0; j < nweights; j++)
|
||||
wt[j] = wdata[j];
|
||||
while (j & 3) // Zero additional weights rounding up to 4.
|
||||
wt[j++] = 0;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_WeightsU8ToFloat() const
|
||||
{
|
||||
float *wt = (float *)(decoded_ + decFmt.w0off);
|
||||
const u8 *wdata = (const u8*)(ptr_);
|
||||
int j;
|
||||
for (j = 0; j < nweights; j++) {
|
||||
wt[j] = (float)wdata[j] * (1.0f / 128.0f);
|
||||
}
|
||||
while (j & 3) // Zero additional weights rounding up to 4.
|
||||
wt[j++] = 0;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_WeightsU16ToFloat() const
|
||||
{
|
||||
float *wt = (float *)(decoded_ + decFmt.w0off);
|
||||
const u16_le *wdata = (const u16_le *)(ptr_);
|
||||
int j;
|
||||
for (j = 0; j < nweights; j++) {
|
||||
wt[j] = (float)wdata[j] * (1.0f / 32768.0f);
|
||||
}
|
||||
while (j & 3) // Zero additional weights rounding up to 4.
|
||||
wt[j++] = 0;
|
||||
}
|
||||
|
||||
// Float weights should be uncommon, we can live with having to multiply these by 2.0
|
||||
// to avoid special checks in the vertex shader generator.
|
||||
// (PSP uses 0.0-2.0 fixed point numbers for weights)
|
||||
void VertexDecoder::Step_WeightsFloat() const
|
||||
{
|
||||
float *wt = (float *)(decoded_ + decFmt.w0off);
|
||||
const float_le *wdata = (const float_le *)(ptr_);
|
||||
int j;
|
||||
for (j = 0; j < nweights; j++) {
|
||||
wt[j] = wdata[j];
|
||||
}
|
||||
while (j & 3) // Zero additional weights rounding up to 4.
|
||||
wt[j++] = 0.0f;
|
||||
}
|
||||
|
||||
void VertexDecoder::ComputeSkinMatrix(const float weights[8]) const {
|
||||
memset(skinMatrix, 0, sizeof(skinMatrix));
|
||||
for (int j = 0; j < nweights; j++) {
|
||||
|
@ -877,20 +816,6 @@ void VertexDecoder::Step_PosFloatMorphSkin() const {
|
|||
Vec3ByMatrix43(v, pos, skinMatrix);
|
||||
}
|
||||
|
||||
static const StepFunction wtstep[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_WeightsU8,
|
||||
&VertexDecoder::Step_WeightsU16,
|
||||
&VertexDecoder::Step_WeightsFloat,
|
||||
};
|
||||
|
||||
static const StepFunction wtstepToFloat[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_WeightsU8ToFloat,
|
||||
&VertexDecoder::Step_WeightsU16ToFloat,
|
||||
&VertexDecoder::Step_WeightsFloat,
|
||||
};
|
||||
|
||||
// TODO: Morph weights correctly! This is missing. Not sure if any game actually
|
||||
// use this functionality at all.
|
||||
|
||||
|
@ -1089,7 +1014,7 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
|
|||
DEBUG_LOG(G3D, "VTYPE: THRU=%i TC=%i COL=%i POS=%i NRM=%i WT=%i NW=%i IDX=%i MC=%i", (int)throughmode, tc, col, pos, nrm, weighttype, nweights, idx, morphcount);
|
||||
}
|
||||
|
||||
skinInDecode = weighttype != 0 && options.applySkinInDecode;
|
||||
skinInDecode = weighttype != 0;
|
||||
|
||||
if (weighttype) { // && nweights?
|
||||
weightoff = size;
|
||||
|
@ -1098,43 +1023,11 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
|
|||
if (wtalign[weighttype] > biggest)
|
||||
biggest = wtalign[weighttype];
|
||||
|
||||
if (skinInDecode) {
|
||||
// No visible output, computes a matrix that is passed through the skinMatrix variable
|
||||
// to the "nrm" and "pos" steps.
|
||||
// Technically we should support morphing the weights too, but I have a hard time
|
||||
// imagining that any game would use that.. but you never know.
|
||||
steps_[numSteps_++] = wtstep_skin[weighttype];
|
||||
} else {
|
||||
int fmtBase = DEC_FLOAT_1;
|
||||
if (options.expandAllWeightsToFloat) {
|
||||
steps_[numSteps_++] = wtstepToFloat[weighttype];
|
||||
fmtBase = DEC_FLOAT_1;
|
||||
} else {
|
||||
steps_[numSteps_++] = wtstep[weighttype];
|
||||
if (weighttype == GE_VTYPE_WEIGHT_8BIT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||
fmtBase = DEC_U8_1;
|
||||
} else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||
fmtBase = DEC_U16_1;
|
||||
} else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||
fmtBase = DEC_FLOAT_1;
|
||||
}
|
||||
}
|
||||
|
||||
int numWeights = TranslateNumBones(nweights);
|
||||
|
||||
if (numWeights <= 4) {
|
||||
decFmt.w0off = decOff;
|
||||
decFmt.w0fmt = fmtBase + numWeights - 1;
|
||||
decOff += DecFmtSize(decFmt.w0fmt);
|
||||
} else {
|
||||
decFmt.w0off = decOff;
|
||||
decFmt.w0fmt = fmtBase + 3;
|
||||
decOff += DecFmtSize(decFmt.w0fmt);
|
||||
decFmt.w1off = decOff;
|
||||
decFmt.w1fmt = fmtBase + numWeights - 5;
|
||||
decOff += DecFmtSize(decFmt.w1fmt);
|
||||
}
|
||||
}
|
||||
// No visible output, computes a matrix that is passed through the skinMatrix variable
|
||||
// to the "nrm" and "pos" steps.
|
||||
// Technically we should support morphing the weights too, but I have a hard time
|
||||
// imagining that any game would use that.. but you never know.
|
||||
steps_[numSteps_++] = wtstep_skin[weighttype];
|
||||
}
|
||||
|
||||
if (tc) {
|
||||
|
|
|
@ -333,7 +333,6 @@ typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);
|
|||
struct VertexDecoderOptions {
|
||||
bool expandAllWeightsToFloat;
|
||||
bool expand8BitNormalsToFloat;
|
||||
bool applySkinInDecode;
|
||||
};
|
||||
|
||||
class VertexDecoder {
|
||||
|
@ -353,12 +352,6 @@ public:
|
|||
|
||||
std::string GetString(DebugShaderStringType stringType);
|
||||
|
||||
void Step_WeightsU8() const;
|
||||
void Step_WeightsU16() const;
|
||||
void Step_WeightsU8ToFloat() const;
|
||||
void Step_WeightsU16ToFloat() const;
|
||||
void Step_WeightsFloat() const;
|
||||
|
||||
void ComputeSkinMatrix(const float weights[8]) const;
|
||||
|
||||
void Step_WeightsU8Skin() const;
|
||||
|
@ -512,12 +505,6 @@ public:
|
|||
JittedVertexDecoder Compile(const VertexDecoder &dec, int32_t *jittedSize);
|
||||
void Clear();
|
||||
|
||||
void Jit_WeightsU8();
|
||||
void Jit_WeightsU16();
|
||||
void Jit_WeightsU8ToFloat();
|
||||
void Jit_WeightsU16ToFloat();
|
||||
void Jit_WeightsFloat();
|
||||
|
||||
void Jit_WeightsU8Skin();
|
||||
void Jit_WeightsU16Skin();
|
||||
void Jit_WeightsFloatSkin();
|
||||
|
|
|
@ -94,16 +94,10 @@ static const X64Reg fpScratchReg4 = XMM4;
|
|||
// on the interpreter if the compiler fails.
|
||||
|
||||
static const JitLookup jitLookup[] = {
|
||||
{&VertexDecoder::Step_WeightsU8, &VertexDecoderJitCache::Jit_WeightsU8},
|
||||
{&VertexDecoder::Step_WeightsU16, &VertexDecoderJitCache::Jit_WeightsU16},
|
||||
{&VertexDecoder::Step_WeightsFloat, &VertexDecoderJitCache::Jit_WeightsFloat},
|
||||
{&VertexDecoder::Step_WeightsU8Skin, &VertexDecoderJitCache::Jit_WeightsU8Skin},
|
||||
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
|
||||
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
|
||||
|
||||
{&VertexDecoder::Step_WeightsU8ToFloat, &VertexDecoderJitCache::Jit_WeightsU8ToFloat},
|
||||
{&VertexDecoder::Step_WeightsU16ToFloat, &VertexDecoderJitCache::Jit_WeightsU16ToFloat},
|
||||
|
||||
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
|
||||
{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
|
||||
{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
|
||||
|
@ -297,175 +291,6 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
|||
return (JittedVertexDecoder)start;
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsU8() {
|
||||
switch (dec_->nweights) {
|
||||
case 1:
|
||||
MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->weightoff));
|
||||
break;
|
||||
case 2:
|
||||
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->weightoff));
|
||||
break;
|
||||
case 3:
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
|
||||
AND(32, R(tempReg1), Imm32(0x00FFFFFF));
|
||||
break;
|
||||
case 4:
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
|
||||
break;
|
||||
case 5:
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
|
||||
MOVZX(32, 8, tempReg2, MDisp(srcReg, dec_->weightoff + 4));
|
||||
break;
|
||||
case 6:
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
|
||||
MOVZX(32, 16, tempReg2, MDisp(srcReg, dec_->weightoff + 4));
|
||||
break;
|
||||
case 7:
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
|
||||
MOV(32, R(tempReg2), MDisp(srcReg, dec_->weightoff + 4));
|
||||
AND(32, R(tempReg2), Imm32(0x00FFFFFF));
|
||||
break;
|
||||
case 8:
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
|
||||
MOV(32, R(tempReg2), MDisp(srcReg, dec_->weightoff + 4));
|
||||
break;
|
||||
}
|
||||
|
||||
if (dec_->nweights <= 4) {
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
|
||||
} else {
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w1off), R(tempReg2));
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsU16() {
|
||||
switch (dec_->nweights) {
|
||||
case 1:
|
||||
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->weightoff));
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w0off + 4), Imm32(0));
|
||||
return;
|
||||
|
||||
case 2:
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w0off + 4), Imm32(0));
|
||||
return;
|
||||
|
||||
case 3:
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
|
||||
MOVZX(32, 16, tempReg2, MDisp(srcReg, dec_->weightoff + 4));
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w0off + 4), R(tempReg2));
|
||||
return;
|
||||
|
||||
case 4:
|
||||
// Anything above 4 will do 4 here, and then the rest after.
|
||||
case 5:
|
||||
case 6:
|
||||
case 7:
|
||||
case 8:
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
|
||||
MOV(32, R(tempReg2), MDisp(srcReg, dec_->weightoff + 4));
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w0off + 4), R(tempReg2));
|
||||
break;
|
||||
}
|
||||
|
||||
// Basic implementation - a short at a time. TODO: Optimize
|
||||
int j;
|
||||
for (j = 4; j < dec_->nweights; j++) {
|
||||
MOV(16, R(tempReg1), MDisp(srcReg, dec_->weightoff + j * 2));
|
||||
MOV(16, MDisp(dstReg, dec_->decFmt.w0off + j * 2), R(tempReg1));
|
||||
}
|
||||
while (j & 3) {
|
||||
MOV(16, MDisp(dstReg, dec_->decFmt.w0off + j * 2), Imm16(0));
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsU8ToFloat() {
|
||||
if (dec_->nweights >= 4) {
|
||||
Jit_AnyU8ToFloat(dec_->weightoff, 32);
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
|
||||
if (dec_->nweights > 4) {
|
||||
Jit_AnyU8ToFloat(dec_->weightoff + 4, (dec_->nweights - 4) * 8);
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w1off), XMM3);
|
||||
}
|
||||
} else {
|
||||
Jit_AnyU8ToFloat(dec_->weightoff, dec_->nweights * 8);
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsU16ToFloat() {
|
||||
if (dec_->nweights >= 4) {
|
||||
Jit_AnyU16ToFloat(dec_->weightoff, 64);
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
|
||||
if (dec_->nweights > 4) {
|
||||
Jit_AnyU16ToFloat(dec_->weightoff + 4 * 2, (dec_->nweights - 4) * 16);
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w1off), XMM3);
|
||||
}
|
||||
} else {
|
||||
Jit_AnyU16ToFloat(dec_->weightoff, dec_->nweights * 16);
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsFloat() {
|
||||
int j;
|
||||
switch (dec_->nweights) {
|
||||
case 1:
|
||||
// MOVSS: When the source operand is a memory location and destination operand is an XMM register, the three high-order doublewords of the destination operand are cleared to all 0s.
|
||||
MOVSS(XMM3, MDisp(srcReg, dec_->weightoff));
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
|
||||
break;
|
||||
|
||||
case 2:
|
||||
MOVQ_xmm(XMM3, MDisp(srcReg, dec_->weightoff));
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
|
||||
break;
|
||||
|
||||
case 4:
|
||||
MOVUPS(XMM3, MDisp(srcReg, dec_->weightoff));
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
|
||||
break;
|
||||
|
||||
case 5:
|
||||
MOVUPS(XMM3, MDisp(srcReg, dec_->weightoff));
|
||||
MOVSS(XMM4, MDisp(srcReg, dec_->weightoff + 16));
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off + 16), XMM4);
|
||||
break;
|
||||
|
||||
case 6:
|
||||
MOVUPS(XMM3, MDisp(srcReg, dec_->weightoff));
|
||||
MOVQ_xmm(XMM4, MDisp(srcReg, dec_->weightoff + 16));
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off + 16), XMM4);
|
||||
break;
|
||||
|
||||
case 8:
|
||||
MOVUPS(XMM3, MDisp(srcReg, dec_->weightoff));
|
||||
MOVUPS(XMM4, MDisp(srcReg, dec_->weightoff + 16));
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off + 16), XMM4);
|
||||
break;
|
||||
|
||||
default:
|
||||
for (j = 0; j < dec_->nweights; j++) {
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff + j * 4));
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w0off + j * 4), R(tempReg1));
|
||||
}
|
||||
while (j & 3) { // Zero additional weights rounding up to 4.
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.w0off + j * 4), Imm32(0));
|
||||
j++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_WeightsU8Skin() {
|
||||
MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@
|
|||
#include "GPU/Common/ShaderUniforms.h"
|
||||
#include "GPU/Common/VertexDecoderCommon.h"
|
||||
#include "GPU/Common/VertexShaderGenerator.h"
|
||||
#include "GPU/Vulkan/DrawEngineVulkan.h"
|
||||
|
||||
#undef WRITE
|
||||
|
||||
|
@ -190,7 +191,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
bool flipNormal = id.Bit(VS_BIT_NORM_REVERSE);
|
||||
int ls0 = id.Bits(VS_BIT_LS0, 2);
|
||||
int ls1 = id.Bits(VS_BIT_LS1, 2);
|
||||
bool enableBones = id.Bit(VS_BIT_ENABLE_BONES) && useHWTransform;
|
||||
bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE);
|
||||
int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3);
|
||||
|
||||
|
@ -200,9 +200,8 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
return false;
|
||||
}
|
||||
|
||||
// Apparently we don't support bezier/spline together with bones.
|
||||
bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform;
|
||||
bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform;
|
||||
bool doBezier = id.Bit(VS_BIT_BEZIER) && useHWTransform;
|
||||
bool doSpline = id.Bit(VS_BIT_SPLINE) && useHWTransform;
|
||||
if (doBezier || doSpline) {
|
||||
if (!hasNormal) {
|
||||
// Bad usage.
|
||||
|
@ -235,11 +234,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
}
|
||||
}
|
||||
|
||||
int numBoneWeights = 0;
|
||||
int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2);
|
||||
if (enableBones) {
|
||||
numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3);
|
||||
}
|
||||
bool texCoordInVec3 = false;
|
||||
|
||||
bool vertexRangeCulling = id.Bit(VS_BIT_VERTEX_RANGE_CULLING) && !isModeThrough;
|
||||
|
@ -254,15 +248,9 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
WRITE(p, "layout (std140, set = 0, binding = 0) uniform frameVars {\n%s};\n", ub_frameStr);
|
||||
}
|
||||
|
||||
WRITE(p, "layout (std140, set = 1, binding = 3) uniform baseVars {\n%s};\n", ub_baseStr);
|
||||
WRITE(p, "layout (std140, set = 1, binding = %d) uniform baseVars {\n%s};\n", DRAW_BINDING_DYNUBO_BASE, ub_baseStr);
|
||||
if (enableLighting || doShadeMapping)
|
||||
WRITE(p, "layout (std140, set = 1, binding = 4) uniform lightVars {\n%s};\n", ub_vs_lightsStr);
|
||||
if (enableBones)
|
||||
WRITE(p, "layout (std140, set = 1, binding = 5) uniform boneVars {\n%s};\n", ub_vs_bonesStr);
|
||||
|
||||
if (enableBones) {
|
||||
WRITE(p, "%s", boneWeightDecl[numBoneWeights]);
|
||||
}
|
||||
WRITE(p, "layout (std140, set = 1, binding = %d) uniform lightVars {\n%s};\n", DRAW_BINDING_DYNUBO_LIGHT, ub_vs_lightsStr);
|
||||
|
||||
if (useHWTransform)
|
||||
WRITE(p, "layout (location = %d) in vec3 position;\n", (int)PspAttributeLocation::POSITION);
|
||||
|
@ -304,7 +292,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
if (compat.shaderLanguage == HLSL_D3D11) {
|
||||
WRITE(p, "cbuffer base : register(b0) {\n%s};\n", ub_baseStr);
|
||||
WRITE(p, "cbuffer lights: register(b1) {\n%s};\n", ub_vs_lightsStr);
|
||||
WRITE(p, "cbuffer bones : register(b2) {\n%s};\n", ub_vs_bonesStr);
|
||||
} else {
|
||||
WRITE(p, "#pragma warning( disable : 3571 )\n");
|
||||
if (isModeThrough) {
|
||||
|
@ -325,15 +312,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
WRITE(p, "mat3x4 u_view : register(c%i);\n", CONST_VS_VIEW);
|
||||
if (doTextureTransform)
|
||||
WRITE(p, "mat3x4 u_texmtx : register(c%i);\n", CONST_VS_TEXMTX);
|
||||
if (enableBones) {
|
||||
#ifdef USE_BONE_ARRAY
|
||||
WRITE(p, "mat3x4 u_bone[%i] : register(c%i);\n", numBones, CONST_VS_BONE0);
|
||||
#else
|
||||
for (int i = 0; i < numBoneWeights; i++) {
|
||||
WRITE(p, "mat3x4 u_bone%i : register(c%i);\n", i, CONST_VS_BONE0 + i * 3);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (doTexture) {
|
||||
WRITE(p, "vec4 u_uvscaleoffset : register(c%i);\n", CONST_VS_UVSCALEOFFSET);
|
||||
}
|
||||
|
@ -385,9 +363,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
if ((doSpline || doBezier) && compat.shaderLanguage == HLSL_D3D11) {
|
||||
WRITE(p, " uint instanceId : SV_InstanceID;\n");
|
||||
}
|
||||
if (enableBones) {
|
||||
WRITE(p, " %s", boneWeightAttrDeclHLSL[numBoneWeights]);
|
||||
}
|
||||
if (doTexture && hasTexcoord) {
|
||||
WRITE(p, " vec2 texcoord : TEXCOORD0;\n");
|
||||
}
|
||||
|
@ -450,17 +425,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
}
|
||||
WRITE(p, "};\n");
|
||||
} else {
|
||||
if (enableBones) {
|
||||
const char * const * boneWeightDecl = boneWeightAttrDecl;
|
||||
if (!strcmp(compat.attribute, "in")) {
|
||||
boneWeightDecl = boneWeightInDecl;
|
||||
}
|
||||
WRITE(p, "%s", boneWeightDecl[numBoneWeights]);
|
||||
*attrMask |= 1 << ATTR_W1;
|
||||
if (numBoneWeights >= 5)
|
||||
*attrMask |= 1 << ATTR_W2;
|
||||
}
|
||||
|
||||
if (useHWTransform)
|
||||
WRITE(p, "%s vec3 position;\n", compat.attribute);
|
||||
else
|
||||
|
@ -523,12 +487,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
WRITE(p, "uniform mediump mat4 u_texmtx;\n");
|
||||
*uniformMask |= DIRTY_TEXMATRIX;
|
||||
}
|
||||
if (enableBones) {
|
||||
for (int i = 0; i < numBoneWeights; i++) {
|
||||
WRITE(p, "uniform mat4 u_bone%i;\n", i);
|
||||
*uniformMask |= DIRTY_BONEMATRIX0 << i;
|
||||
}
|
||||
}
|
||||
if (doTexture) {
|
||||
WRITE(p, "uniform vec4 u_uvscaleoffset;\n");
|
||||
*uniformMask |= DIRTY_UVSCALEOFFSET;
|
||||
|
@ -636,7 +594,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
WRITE(p, " vec4 tex;\n");
|
||||
WRITE(p, " vec4 col;\n");
|
||||
WRITE(p, "};\n");
|
||||
WRITE(p, "layout (std430, set = 1, binding = 6) readonly buffer s_tess_data {\n");
|
||||
WRITE(p, "layout (std430, set = 1, binding = %d) readonly buffer s_tess_data {\n", DRAW_BINDING_TESS_STORAGE_BUF);
|
||||
WRITE(p, " TessData tess_data[];\n");
|
||||
WRITE(p, "};\n");
|
||||
|
||||
|
@ -644,10 +602,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
WRITE(p, " vec4 basis;\n");
|
||||
WRITE(p, " vec4 deriv;\n");
|
||||
WRITE(p, "};\n");
|
||||
WRITE(p, "layout (std430, set = 1, binding = 7) readonly buffer s_tess_weights_u {\n");
|
||||
WRITE(p, "layout (std430, set = 1, binding = %d) readonly buffer s_tess_weights_u {\n", DRAW_BINDING_TESS_STORAGE_BUF_WU);
|
||||
WRITE(p, " TessWeight tess_weights_u[];\n");
|
||||
WRITE(p, "};\n");
|
||||
WRITE(p, "layout (std430, set = 1, binding = 8) readonly buffer s_tess_weights_v {\n");
|
||||
WRITE(p, "layout (std430, set = 1, binding = %d) readonly buffer s_tess_weights_v {\n", DRAW_BINDING_TESS_STORAGE_BUF_WV);
|
||||
WRITE(p, " TessWeight tess_weights_v[];\n");
|
||||
WRITE(p, "};\n");
|
||||
} else if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
|
||||
|
@ -823,9 +781,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
if (!useHWTransform) {
|
||||
WRITE(p, " float fog = In.fog;\n");
|
||||
}
|
||||
if (enableBones) {
|
||||
WRITE(p, "%s", boneWeightAttrInitHLSL[numBoneWeights]);
|
||||
}
|
||||
}
|
||||
|
||||
if (!useHWTransform) {
|
||||
|
@ -873,7 +828,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
}
|
||||
} else {
|
||||
// Step 1: World Transform / Skinning
|
||||
if (!enableBones) {
|
||||
if (true) {
|
||||
if (doBezier || doSpline) {
|
||||
// Hardware tessellation
|
||||
WRITE(p, " Tess tess;\n");
|
||||
|
@ -897,44 +852,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
|
|||
else
|
||||
WRITE(p, " mediump vec3 worldnormal = normalizeOr001(mul(vec4(0.0, 0.0, %s1.0, 0.0), u_world).xyz);\n", flipNormal ? "-" : "");
|
||||
}
|
||||
} else {
|
||||
static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
|
||||
const char *factor = rescale[boneWeightScale];
|
||||
|
||||
static const char * const boneWeightAttr[8] = {
|
||||
"w1.x", "w1.y", "w1.z", "w1.w",
|
||||
"w2.x", "w2.y", "w2.z", "w2.w",
|
||||
};
|
||||
|
||||
const char *boneMatrix = compat.forceMatrix4x4 ? "mat4" : "mat3x4";
|
||||
|
||||
// Uncomment this to screw up bone shaders to check the vertex shader software fallback
|
||||
// WRITE(p, "THIS SHOULD ERROR! #error");
|
||||
if (numBoneWeights == 1 && ShaderLanguageIsOpenGL(compat.shaderLanguage))
|
||||
WRITE(p, " %s skinMatrix = mul(w1, u_bone0)", boneMatrix);
|
||||
else
|
||||
WRITE(p, " %s skinMatrix = mul(w1.x, u_bone0)", boneMatrix);
|
||||
for (int i = 1; i < numBoneWeights; i++) {
|
||||
const char *weightAttr = boneWeightAttr[i];
|
||||
// workaround for "cant do .x of scalar" issue.
|
||||
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
|
||||
if (numBoneWeights == 1 && i == 0) weightAttr = "w1";
|
||||
if (numBoneWeights == 5 && i == 4) weightAttr = "w2";
|
||||
}
|
||||
WRITE(p, " + mul(%s, u_bone%i)", weightAttr, i);
|
||||
}
|
||||
|
||||
WRITE(p, ";\n");
|
||||
|
||||
WRITE(p, " vec3 skinnedpos = mul(vec4(position, 1.0), skinMatrix).xyz%s;\n", factor);
|
||||
WRITE(p, " vec3 worldpos = mul(vec4(skinnedpos, 1.0), u_world).xyz;\n");
|
||||
|
||||
if (hasNormal) {
|
||||
WRITE(p, " mediump vec3 skinnednormal = mul(vec4(%snormal, 0.0), skinMatrix).xyz%s;\n", flipNormal ? "-" : "", factor);
|
||||
} else {
|
||||
WRITE(p, " mediump vec3 skinnednormal = mul(vec4(0.0, 0.0, %s1.0, 0.0), skinMatrix).xyz%s;\n", flipNormal ? "-" : "", factor);
|
||||
}
|
||||
WRITE(p, " mediump vec3 worldnormal = normalizeOr001(mul(vec4(skinnednormal, 0.0), u_world).xyz);\n");
|
||||
}
|
||||
|
||||
std::string matrixPostfix;
|
||||
|
|
|
@ -55,16 +55,7 @@ enum {
|
|||
CONST_VS_LIGHTSPECULAR = 44,
|
||||
CONST_VS_LIGHTAMBIENT = 48,
|
||||
CONST_VS_DEPTHRANGE = 52,
|
||||
CONST_VS_BONE0 = 53,
|
||||
CONST_VS_BONE1 = 56,
|
||||
CONST_VS_BONE2 = 59,
|
||||
CONST_VS_BONE3 = 62,
|
||||
CONST_VS_BONE4 = 65,
|
||||
CONST_VS_BONE5 = 68,
|
||||
CONST_VS_BONE6 = 71,
|
||||
CONST_VS_BONE7 = 74,
|
||||
CONST_VS_BONE8 = 77,
|
||||
CONST_VS_CULLRANGEMIN = 80,
|
||||
CONST_VS_CULLRANGEMAX = 81,
|
||||
CONST_VS_ROTATION = 82,
|
||||
CONST_VS_CULLRANGEMIN = 53,
|
||||
CONST_VS_CULLRANGEMAX = 54,
|
||||
CONST_VS_ROTATION = 55,
|
||||
};
|
||||
|
|
|
@ -364,8 +364,8 @@ void DrawEngineD3D11::DoFlush() {
|
|||
|
||||
// Cannot cache vertex data with morph enabled.
|
||||
bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
|
||||
// Also avoid caching when software skinning.
|
||||
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK))
|
||||
// Also avoid caching when skinning.
|
||||
if (lastVType_ & GE_VTYPE_WEIGHT_MASK)
|
||||
useCache = false;
|
||||
|
||||
if (useCache) {
|
||||
|
@ -538,7 +538,7 @@ rotateVBO:
|
|||
|
||||
D3D11VertexShader *vshader;
|
||||
D3D11FragmentShader *fshader;
|
||||
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, useHWTransform, useHWTessellation_, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode);
|
||||
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, useHWTransform, useHWTessellation_, decOptions_.expandAllWeightsToFloat, true);
|
||||
ID3D11InputLayout *inputLayout = SetupDecFmtForDraw(vshader, dec_->GetDecVtxFmt(), dec_->VertexType());
|
||||
context_->PSSetShader(fshader->GetShader(), nullptr, 0);
|
||||
context_->VSSetShader(vshader->GetShader(), nullptr, 0);
|
||||
|
@ -581,7 +581,6 @@ rotateVBO:
|
|||
}
|
||||
} else {
|
||||
PROFILE_THIS_SCOPE("soft");
|
||||
decOptions_.applySkinInDecode = true;
|
||||
DecodeVerts(decoded);
|
||||
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
|
||||
if (gstate.isModeThrough()) {
|
||||
|
@ -649,7 +648,7 @@ rotateVBO:
|
|||
if (result.action == SW_DRAW_PRIMITIVES) {
|
||||
D3D11VertexShader *vshader;
|
||||
D3D11FragmentShader *fshader;
|
||||
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode);
|
||||
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, true);
|
||||
context_->PSSetShader(fshader->GetShader(), nullptr, 0);
|
||||
context_->VSSetShader(vshader->GetShader(), nullptr, 0);
|
||||
shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
|
||||
|
@ -710,7 +709,6 @@ rotateVBO:
|
|||
framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor);
|
||||
}
|
||||
}
|
||||
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
|
||||
}
|
||||
|
||||
gpuStats.numDrawCalls += numDrawCalls;
|
||||
|
|
|
@ -95,24 +95,19 @@ ShaderManagerD3D11::ShaderManagerD3D11(Draw::DrawContext *draw, ID3D11Device *de
|
|||
codeBuffer_ = new char[CODE_BUFFER_SIZE];
|
||||
memset(&ub_base, 0, sizeof(ub_base));
|
||||
memset(&ub_lights, 0, sizeof(ub_lights));
|
||||
memset(&ub_bones, 0, sizeof(ub_bones));
|
||||
|
||||
static_assert(sizeof(ub_base) <= 512, "ub_base grew too big");
|
||||
static_assert(sizeof(ub_lights) <= 512, "ub_lights grew too big");
|
||||
static_assert(sizeof(ub_bones) <= 384, "ub_bones grew too big");
|
||||
|
||||
D3D11_BUFFER_DESC desc{sizeof(ub_base), D3D11_USAGE_DYNAMIC, D3D11_BIND_CONSTANT_BUFFER, D3D11_CPU_ACCESS_WRITE };
|
||||
ASSERT_SUCCESS(device_->CreateBuffer(&desc, nullptr, &push_base));
|
||||
desc.ByteWidth = sizeof(ub_lights);
|
||||
ASSERT_SUCCESS(device_->CreateBuffer(&desc, nullptr, &push_lights));
|
||||
desc.ByteWidth = sizeof(ub_bones);
|
||||
ASSERT_SUCCESS(device_->CreateBuffer(&desc, nullptr, &push_bones));
|
||||
}
|
||||
|
||||
ShaderManagerD3D11::~ShaderManagerD3D11() {
|
||||
push_base->Release();
|
||||
push_lights->Release();
|
||||
push_bones->Release();
|
||||
ClearShaders();
|
||||
delete[] codeBuffer_;
|
||||
}
|
||||
|
@ -161,19 +156,13 @@ uint64_t ShaderManagerD3D11::UpdateUniforms(bool useBufferedRendering) {
|
|||
memcpy(map.pData, &ub_lights, sizeof(ub_lights));
|
||||
context_->Unmap(push_lights, 0);
|
||||
}
|
||||
if (dirty & DIRTY_BONE_UNIFORMS) {
|
||||
BoneUpdateUniforms(&ub_bones, dirty);
|
||||
context_->Map(push_bones, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
|
||||
memcpy(map.pData, &ub_bones, sizeof(ub_bones));
|
||||
context_->Unmap(push_bones, 0);
|
||||
}
|
||||
}
|
||||
gstate_c.CleanUniforms();
|
||||
return dirty;
|
||||
}
|
||||
|
||||
void ShaderManagerD3D11::BindUniforms() {
|
||||
ID3D11Buffer *vs_cbs[3] = { push_base, push_lights, push_bones };
|
||||
ID3D11Buffer *vs_cbs[3] = { push_base, push_lights };
|
||||
ID3D11Buffer *ps_cbs[1] = { push_base };
|
||||
context_->VSSetConstantBuffers(0, 3, vs_cbs);
|
||||
context_->PSSetConstantBuffers(0, 1, ps_cbs);
|
||||
|
|
|
@ -85,8 +85,7 @@ public:
|
|||
ShaderManagerD3D11(Draw::DrawContext *draw, ID3D11Device *device, ID3D11DeviceContext *context, D3D_FEATURE_LEVEL featureLevel);
|
||||
~ShaderManagerD3D11();
|
||||
|
||||
void GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat, bool useSkinInDecode);
|
||||
void ClearShaders();
|
||||
void GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat, bool useSkinInDecode); void ClearShaders();
|
||||
void DirtyLastShader() override;
|
||||
|
||||
int GetNumVertexShaders() const { return (int)vsCache_.size(); }
|
||||
|
@ -122,12 +121,10 @@ private:
|
|||
// Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time.
|
||||
UB_VS_FS_Base ub_base;
|
||||
UB_VS_Lights ub_lights;
|
||||
UB_VS_Bones ub_bones;
|
||||
|
||||
// Not actual pushbuffers, requires D3D11.1, let's try to live without that first.
|
||||
ID3D11Buffer *push_base;
|
||||
ID3D11Buffer *push_lights;
|
||||
ID3D11Buffer *push_bones;
|
||||
|
||||
D3D11FragmentShader *lastFShader_ = nullptr;
|
||||
D3D11VertexShader *lastVShader_ = nullptr;
|
||||
|
|
|
@ -348,8 +348,8 @@ void DrawEngineDX9::DoFlush() {
|
|||
|
||||
// Cannot cache vertex data with morph enabled.
|
||||
bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
|
||||
// Also avoid caching when software skinning.
|
||||
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK))
|
||||
// Also avoid caching when skinning.
|
||||
if (lastVType_ & GE_VTYPE_WEIGHT_MASK)
|
||||
useCache = false;
|
||||
|
||||
if (useCache) {
|
||||
|
@ -522,7 +522,7 @@ rotateVBO:
|
|||
ApplyDrawState(prim);
|
||||
ApplyDrawStateLate();
|
||||
|
||||
VSShader *vshader = shaderManager_->ApplyShader(true, useHWTessellation_, lastVType_, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode, pipelineState_);
|
||||
VSShader *vshader = shaderManager_->ApplyShader(true, useHWTessellation_, lastVType_, decOptions_.expandAllWeightsToFloat, true, pipelineState_);
|
||||
IDirect3DVertexDeclaration9 *pHardwareVertexDecl = SetupDecFmtForDraw(vshader, dec_->GetDecVtxFmt(), dec_->VertexType());
|
||||
|
||||
if (pHardwareVertexDecl) {
|
||||
|
@ -546,7 +546,6 @@ rotateVBO:
|
|||
}
|
||||
}
|
||||
} else {
|
||||
decOptions_.applySkinInDecode = true;
|
||||
DecodeVerts(decoded);
|
||||
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
|
||||
if (gstate.isModeThrough()) {
|
||||
|
@ -614,7 +613,7 @@ rotateVBO:
|
|||
|
||||
ApplyDrawStateLate();
|
||||
|
||||
VSShader *vshader = shaderManager_->ApplyShader(false, false, lastVType_, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode, pipelineState_);
|
||||
VSShader *vshader = shaderManager_->ApplyShader(false, false, lastVType_, decOptions_.expandAllWeightsToFloat, true, pipelineState_);
|
||||
|
||||
if (result.action == SW_DRAW_PRIMITIVES) {
|
||||
if (result.setStencil) {
|
||||
|
@ -654,7 +653,6 @@ rotateVBO:
|
|||
framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor);
|
||||
}
|
||||
}
|
||||
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
|
||||
}
|
||||
|
||||
gpuStats.numDrawCalls += numDrawCalls;
|
||||
|
|
|
@ -324,7 +324,7 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) {
|
|||
}
|
||||
|
||||
const uint64_t vsUniforms = DIRTY_PROJMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX |
|
||||
DIRTY_FOGCOEF | DIRTY_BONE_UNIFORMS | DIRTY_UVSCALEOFFSET | DIRTY_DEPTHRANGE | DIRTY_CULLRANGE |
|
||||
DIRTY_FOGCOEF | DIRTY_UVSCALEOFFSET | DIRTY_DEPTHRANGE | DIRTY_CULLRANGE |
|
||||
DIRTY_AMBIENT | DIRTY_MATAMBIENTALPHA | DIRTY_MATSPECULAR | DIRTY_MATDIFFUSE | DIRTY_MATEMISSIVE | DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3;
|
||||
|
||||
void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
|
||||
|
@ -387,38 +387,6 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
|
|||
}
|
||||
VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2);
|
||||
}
|
||||
// TODO: Could even set all bones in one go if they're all dirty.
|
||||
#ifdef USE_BONE_ARRAY
|
||||
if (u_bone != 0) {
|
||||
float allBones[8 * 16];
|
||||
|
||||
bool allDirty = true;
|
||||
for (int i = 0; i < numBones; i++) {
|
||||
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
|
||||
ConvertMatrix4x3To4x4(allBones + 16 * i, gstate.boneMatrix + 12 * i);
|
||||
} else {
|
||||
allDirty = false;
|
||||
}
|
||||
}
|
||||
if (allDirty) {
|
||||
// Set them all with one call
|
||||
//glUniformMatrix4fv(u_bone, numBones, GL_FALSE, allBones);
|
||||
} else {
|
||||
// Set them one by one. Could try to coalesce two in a row etc but too lazy.
|
||||
for (int i = 0; i < numBones; i++) {
|
||||
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
|
||||
//glUniformMatrix4fv(u_bone + i, 1, GL_FALSE, allBones + 16 * i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
|
||||
VSSetMatrix4x3_3(CONST_VS_BONE0 + 3 * i, gstate.boneMatrix + 12 * i);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Texturing
|
||||
if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
|
||||
|
|
|
@ -267,7 +267,7 @@ void DrawEngineGLES::DoFlush() {
|
|||
GEPrimitiveType prim = prevPrim_;
|
||||
|
||||
VShaderID vsid;
|
||||
Shader *vshader = shaderManager_->ApplyVertexShader(CanUseHardwareTransform(prim), useHWTessellation_, lastVType_, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode, &vsid);
|
||||
Shader *vshader = shaderManager_->ApplyVertexShader(CanUseHardwareTransform(prim), useHWTessellation_, lastVType_, decOptions_.expandAllWeightsToFloat, true, &vsid);
|
||||
|
||||
GLRBuffer *vertexBuffer = nullptr;
|
||||
GLRBuffer *indexBuffer = nullptr;
|
||||
|
@ -278,8 +278,8 @@ void DrawEngineGLES::DoFlush() {
|
|||
int vertexCount = 0;
|
||||
bool useElements = true;
|
||||
|
||||
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
|
||||
// If software skinning, we've already predecoded into "decoded". So push that content.
|
||||
if (lastVType_ & GE_VTYPE_WEIGHT_MASK) {
|
||||
// Since we're software skinning, we've already predecoded into "decoded". So push that content.
|
||||
size_t size = decodedVerts_ * dec_->GetDecVtxFmt().stride;
|
||||
u8 *dest = (u8 *)frameData.pushVertex->Push(size, &vertexBufferOffset, &vertexBuffer);
|
||||
memcpy(dest, decoded, size);
|
||||
|
@ -331,7 +331,6 @@ void DrawEngineGLES::DoFlush() {
|
|||
}
|
||||
} else {
|
||||
PROFILE_THIS_SCOPE("soft");
|
||||
decOptions_.applySkinInDecode = true;
|
||||
DecodeVerts(decoded);
|
||||
|
||||
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
|
||||
|
@ -448,7 +447,6 @@ void DrawEngineGLES::DoFlush() {
|
|||
}
|
||||
gstate_c.Dirty(DIRTY_BLEND_STATE); // Make sure the color mask gets re-applied.
|
||||
}
|
||||
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
|
||||
}
|
||||
|
||||
gpuStats.numDrawCalls += numDrawCalls;
|
||||
|
|
|
@ -121,10 +121,6 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
|
|||
queries.push_back({ &u_world, "u_world" });
|
||||
queries.push_back({ &u_texmtx, "u_texmtx" });
|
||||
|
||||
if (VSID.Bit(VS_BIT_ENABLE_BONES))
|
||||
numBones = TranslateNumBones(VSID.Bits(VS_BIT_BONES, 3) + 1);
|
||||
else
|
||||
numBones = 0;
|
||||
queries.push_back({ &u_depthRange, "u_depthRange" });
|
||||
queries.push_back({ &u_cullRangeMin, "u_cullRangeMin" });
|
||||
queries.push_back({ &u_cullRangeMax, "u_cullRangeMax" });
|
||||
|
@ -134,15 +130,6 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
|
|||
queries.push_back({ &u_scaleX, "u_scaleX" });
|
||||
queries.push_back({ &u_scaleY, "u_scaleY" });
|
||||
|
||||
#ifdef USE_BONE_ARRAY
|
||||
queries.push_back({ &u_bone, "u_bone" });
|
||||
#else
|
||||
static const char * const boneNames[8] = { "u_bone0", "u_bone1", "u_bone2", "u_bone3", "u_bone4", "u_bone5", "u_bone6", "u_bone7", };
|
||||
for (int i = 0; i < 8; i++) {
|
||||
queries.push_back({ &u_bone[i], boneNames[i] });
|
||||
}
|
||||
#endif
|
||||
|
||||
// Lighting, texturing
|
||||
queries.push_back({ &u_ambient, "u_ambient" });
|
||||
queries.push_back({ &u_matambientalpha, "u_matambientalpha" });
|
||||
|
@ -584,13 +571,6 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu
|
|||
float f = (float)gstate.getStencilTestRef() * (1.0f / 255.0f);
|
||||
render_->SetUniformF(&u_stencilReplaceValue, 1, &f);
|
||||
}
|
||||
float bonetemp[16];
|
||||
for (int i = 0; i < numBones; i++) {
|
||||
if (dirty & (DIRTY_BONEMATRIX0 << i)) {
|
||||
ConvertMatrix4x3To4x4Transposed(bonetemp, gstate.boneMatrix + 12 * i);
|
||||
render_->SetUniformM4x4(&u_bone[i], bonetemp);
|
||||
}
|
||||
}
|
||||
|
||||
if (dirty & DIRTY_SHADERBLEND) {
|
||||
if (u_blendFixA != -1) {
|
||||
|
|
|
@ -67,13 +67,6 @@ public:
|
|||
int u_scaleX;
|
||||
int u_scaleY;
|
||||
|
||||
#ifdef USE_BONE_ARRAY
|
||||
int u_bone; // array, size is numBones
|
||||
#else
|
||||
int u_bone[8];
|
||||
#endif
|
||||
int numBones;
|
||||
|
||||
// Shader blending.
|
||||
int u_fbotex;
|
||||
int u_blendFixA;
|
||||
|
|
|
@ -66,8 +66,8 @@ const CommonCommandTableEntry commonCommandTable[] = {
|
|||
{ GE_CMD_BEZIER, FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
|
||||
{ GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
|
||||
|
||||
// Changing the vertex type requires us to flush.
|
||||
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
|
||||
// Changing the vertex type requires us to flush, unless we just change the weight count - need to handle in a func.
|
||||
{ GE_CMD_VERTEXTYPE, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
|
||||
|
||||
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_LoadClut },
|
||||
|
||||
|
@ -436,14 +436,6 @@ GPUCommon::~GPUCommon() {
|
|||
}
|
||||
|
||||
void GPUCommon::UpdateCmdInfo() {
|
||||
if (g_Config.bSoftwareSkinning) {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning;
|
||||
} else {
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
|
||||
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType;
|
||||
}
|
||||
|
||||
if (g_Config.bFastMemory) {
|
||||
cmdInfo_[GE_CMD_JUMP].func = &GPUCommon::Execute_JumpFast;
|
||||
cmdInfo_[GE_CMD_CALL].func = &GPUCommon::Execute_CallFast;
|
||||
|
@ -782,7 +774,7 @@ void GPUCommon::ResetMatrices() {
|
|||
matrixVisible.tgen[i] = toFloat24(gstate.tgenMatrix[i]);
|
||||
|
||||
// Assume all the matrices changed, so dirty things related to them.
|
||||
gstate_c.Dirty(DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_PROJMATRIX | DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE | DIRTY_BONE_UNIFORMS);
|
||||
gstate_c.Dirty(DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_PROJMATRIX | DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE);
|
||||
}
|
||||
|
||||
u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head) {
|
||||
|
@ -1677,22 +1669,12 @@ void GPUCommon::Execute_TexSize0(u32 op, u32 diff) {
|
|||
}
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_VertexType(u32 op, u32 diff) {
|
||||
if (diff)
|
||||
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
|
||||
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) {
|
||||
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
|
||||
if (diff & GE_VTYPE_THROUGH_MASK)
|
||||
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
|
||||
}
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_LoadClut(u32 op, u32 diff) {
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
|
||||
textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
|
||||
}
|
||||
|
||||
void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) {
|
||||
void GPUCommon::Execute_VertexType(u32 op, u32 diff) {
|
||||
// Don't flush when weight count changes.
|
||||
if (diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) {
|
||||
// Restore and flush
|
||||
|
@ -1834,7 +1816,7 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
|
|||
// cull mode
|
||||
int cullMode = gstate.getCullMode();
|
||||
|
||||
uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
|
||||
uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), true);
|
||||
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, cullMode, &bytesRead);
|
||||
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
|
||||
// Some games rely on this, they don't bother reloading VADDR and IADDR.
|
||||
|
@ -1853,8 +1835,6 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
|
|||
// between each prim, we just change the triangle winding right here to still be able to join draw calls.
|
||||
|
||||
uint32_t vtypeCheckMask = ~GE_VTYPE_WEIGHTCOUNT_MASK;
|
||||
if (!g_Config.bSoftwareSkinning)
|
||||
vtypeCheckMask = 0xFFFFFFFF;
|
||||
|
||||
if (debugRecording_)
|
||||
goto bail;
|
||||
|
@ -1892,7 +1872,7 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
|
|||
goto bail;
|
||||
} else {
|
||||
vertexType = data;
|
||||
vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
|
||||
vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), true);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -2033,10 +2013,6 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
|
|||
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
}
|
||||
|
||||
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
|
||||
DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
|
||||
}
|
||||
|
||||
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
|
||||
if (flushOnParams_)
|
||||
drawEngineCommon_->DispatchFlush();
|
||||
|
@ -2106,10 +2082,6 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) {
|
|||
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
}
|
||||
|
||||
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
|
||||
DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
|
||||
}
|
||||
|
||||
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
|
||||
if (flushOnParams_)
|
||||
drawEngineCommon_->DispatchFlush();
|
||||
|
@ -2451,35 +2423,16 @@ void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
|
|||
}
|
||||
|
||||
if (fastLoad) {
|
||||
// If we can't use software skinning, we have to flush and dirty.
|
||||
if (!g_Config.bSoftwareSkinning) {
|
||||
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
const u32 newVal = src[i] << 8;
|
||||
if (dst[i] != newVal) {
|
||||
Flush();
|
||||
dst[i] = newVal;
|
||||
}
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
dst[i] = src[i] << 8;
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned int numPlusCount = (op & 0x7F) + i;
|
||||
for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) {
|
||||
gstate_c.Dirty(DIRTY_BONEMATRIX0 << (num / 12));
|
||||
}
|
||||
} else {
|
||||
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
|
||||
dst[i] = src[i] << 8;
|
||||
if (++i >= end) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const unsigned int numPlusCount = (op & 0x7F) + i;
|
||||
for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) {
|
||||
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
|
||||
}
|
||||
const unsigned int numPlusCount = (op & 0x7F) + i;
|
||||
for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) {
|
||||
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2496,13 +2449,8 @@ void GPUCommon::Execute_BoneMtxData(u32 op, u32 diff) {
|
|||
int num = gstate.boneMatrixNumber & 0x00FFFFFF;
|
||||
u32 newVal = op << 8;
|
||||
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
|
||||
// Bone matrices should NOT flush when software skinning is enabled!
|
||||
if (!g_Config.bSoftwareSkinning) {
|
||||
Flush();
|
||||
gstate_c.Dirty(DIRTY_BONEMATRIX0 << (num / 12));
|
||||
} else {
|
||||
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
|
||||
}
|
||||
// Bone matrices should NOT flush since we're software skinning!
|
||||
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
|
||||
((u32 *)gstate.boneMatrix)[num] = newVal;
|
||||
}
|
||||
num++;
|
||||
|
@ -2528,7 +2476,7 @@ void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
|
|||
|
||||
int prim = (op >> 8) & 0x7;
|
||||
if (prim != GE_PRIM_KEEP_PREVIOUS) {
|
||||
// Flush before changing the prim type. Only continue can be used to continue a prim.
|
||||
// Flush before changing the prim type. Only continue can be used to continue a prim.
|
||||
FlushImm();
|
||||
}
|
||||
|
||||
|
@ -2692,13 +2640,7 @@ void GPUCommon::FastLoadBoneMatrix(u32 target) {
|
|||
uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7);
|
||||
}
|
||||
|
||||
if (!g_Config.bSoftwareSkinning) {
|
||||
if (flushOnParams_)
|
||||
Flush();
|
||||
gstate_c.Dirty(uniformsToDirty);
|
||||
} else {
|
||||
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
|
||||
}
|
||||
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
|
||||
gstate.FastLoadBoneMatrix(target);
|
||||
}
|
||||
|
||||
|
|
|
@ -146,7 +146,6 @@ public:
|
|||
void Execute_End(u32 op, u32 diff);
|
||||
|
||||
void Execute_VertexType(u32 op, u32 diff);
|
||||
void Execute_VertexTypeSkinning(u32 op, u32 diff);
|
||||
|
||||
void Execute_Prim(u32 op, u32 diff);
|
||||
void Execute_Bezier(u32 op, u32 diff);
|
||||
|
|
|
@ -260,10 +260,6 @@ void GPUgstate::Restore(u32_le *ptr) {
|
|||
gpu->ResetMatrices();
|
||||
}
|
||||
|
||||
bool vertTypeIsSkinningEnabled(u32 vertType) {
|
||||
return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE);
|
||||
}
|
||||
|
||||
struct GPUStateCache_v0 {
|
||||
u32 vertexAddr;
|
||||
u32 indexAddr;
|
||||
|
|
|
@ -448,8 +448,6 @@ struct GPUgstate {
|
|||
void Restore(u32_le *ptr);
|
||||
};
|
||||
|
||||
bool vertTypeIsSkinningEnabled(u32 vertType);
|
||||
|
||||
inline int vertTypeGetNumBoneWeights(u32 vertType) { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); }
|
||||
inline int vertTypeGetWeightMask(u32 vertType) { return vertType & GE_VTYPE_WEIGHT_MASK; }
|
||||
|
||||
|
|
|
@ -889,7 +889,7 @@ void SoftGPU::Execute_Bezier(u32 op, u32 diff) {
|
|||
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
}
|
||||
|
||||
if ((gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) || vertTypeIsSkinningEnabled(gstate.vertType)) {
|
||||
if (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) {
|
||||
DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
|
||||
}
|
||||
|
||||
|
@ -941,7 +941,7 @@ void SoftGPU::Execute_Spline(u32 op, u32 diff) {
|
|||
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
|
||||
}
|
||||
|
||||
if ((gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) || vertTypeIsSkinningEnabled(gstate.vertType)) {
|
||||
if (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) {
|
||||
DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
|
||||
}
|
||||
|
||||
|
|
|
@ -64,7 +64,6 @@ SoftwareDrawEngine::~SoftwareDrawEngine() {
|
|||
|
||||
void SoftwareDrawEngine::NotifyConfigChanged() {
|
||||
DrawEngineCommon::NotifyConfigChanged();
|
||||
decOptions_.applySkinInDecode = true;
|
||||
}
|
||||
|
||||
void SoftwareDrawEngine::DispatchFlush() {
|
||||
|
@ -940,7 +939,6 @@ bool TransformUnit::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVert
|
|||
|
||||
VertexDecoder vdecoder;
|
||||
VertexDecoderOptions options{};
|
||||
options.applySkinInDecode = true;
|
||||
vdecoder.SetVertexType(gstate.vertType, options);
|
||||
|
||||
if (!Memory::IsValidRange(gstate_c.vertexAddr, (indexUpperBound + 1) * vdecoder.VertexSize()))
|
||||
|
|
|
@ -62,19 +62,6 @@ enum {
|
|||
|
||||
enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 };
|
||||
|
||||
enum {
|
||||
DRAW_BINDING_TEXTURE = 0,
|
||||
DRAW_BINDING_2ND_TEXTURE = 1,
|
||||
DRAW_BINDING_DEPAL_TEXTURE = 2,
|
||||
DRAW_BINDING_DYNUBO_BASE = 3,
|
||||
DRAW_BINDING_DYNUBO_LIGHT = 4,
|
||||
DRAW_BINDING_DYNUBO_BONE = 5,
|
||||
DRAW_BINDING_TESS_STORAGE_BUF = 6,
|
||||
DRAW_BINDING_TESS_STORAGE_BUF_WU = 7,
|
||||
DRAW_BINDING_TESS_STORAGE_BUF_WV = 8,
|
||||
DRAW_BINDING_INPUT_ATTACHMENT = 9,
|
||||
};
|
||||
|
||||
enum {
|
||||
TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex)
|
||||
};
|
||||
|
@ -97,7 +84,7 @@ void DrawEngineVulkan::InitDeviceObjects() {
|
|||
|
||||
// TODO: Make things more flexible, so we at least have specialized layouts for input attachments and tess.
|
||||
// Note that it becomes a support matrix..
|
||||
VkDescriptorSetLayoutBinding bindings[10]{};
|
||||
VkDescriptorSetLayoutBinding bindings[DRAW_BINDING_COUNT]{};
|
||||
bindings[0].descriptorCount = 1;
|
||||
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
|
@ -120,27 +107,23 @@ void DrawEngineVulkan::InitDeviceObjects() {
|
|||
bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
bindings[4].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[4].binding = DRAW_BINDING_DYNUBO_LIGHT;
|
||||
bindings[5].descriptorCount = 1;
|
||||
bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
bindings[5].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[5].binding = DRAW_BINDING_DYNUBO_BONE;
|
||||
// Used only for hardware tessellation.
|
||||
bindings[5].descriptorCount = 1;
|
||||
bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[5].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[5].binding = DRAW_BINDING_TESS_STORAGE_BUF;
|
||||
bindings[6].descriptorCount = 1;
|
||||
bindings[6].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[6].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[6].binding = DRAW_BINDING_TESS_STORAGE_BUF;
|
||||
bindings[6].binding = DRAW_BINDING_TESS_STORAGE_BUF_WU;
|
||||
bindings[7].descriptorCount = 1;
|
||||
bindings[7].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[7].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[7].binding = DRAW_BINDING_TESS_STORAGE_BUF_WU;
|
||||
bindings[7].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
|
||||
bindings[8].descriptorCount = 1;
|
||||
bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
|
||||
bindings[8].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
|
||||
bindings[8].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
|
||||
bindings[9].descriptorCount = 1;
|
||||
bindings[9].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
|
||||
bindings[9].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
bindings[9].binding = DRAW_BINDING_INPUT_ATTACHMENT;
|
||||
bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
|
||||
bindings[8].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
|
||||
bindings[8].binding = DRAW_BINDING_INPUT_ATTACHMENT;
|
||||
|
||||
VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
|
||||
VkDevice device = vulkan->GetDevice();
|
||||
|
@ -155,7 +138,7 @@ void DrawEngineVulkan::InitDeviceObjects() {
|
|||
static constexpr int DEFAULT_DESC_POOL_SIZE = 512;
|
||||
std::vector<VkDescriptorPoolSize> dpTypes;
|
||||
dpTypes.resize(5);
|
||||
dpTypes[0].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3;
|
||||
dpTypes[0].descriptorCount = DEFAULT_DESC_POOL_SIZE * 2;
|
||||
dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
dpTypes[1].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; // Don't use these for tess anymore, need max three per set.
|
||||
dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
|
||||
|
@ -384,10 +367,9 @@ void DrawEngineVulkan::DecodeVertsToPushBuffer(VulkanPushBuffer *push, uint32_t
|
|||
DecodeVerts(dest);
|
||||
}
|
||||
|
||||
VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone, bool tess) {
|
||||
VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, bool tess) {
|
||||
_dbg_assert_(base != VK_NULL_HANDLE);
|
||||
_dbg_assert_(light != VK_NULL_HANDLE);
|
||||
_dbg_assert_(bone != VK_NULL_HANDLE);
|
||||
|
||||
DescriptorSetKey key{};
|
||||
key.imageView_ = imageView;
|
||||
|
@ -396,7 +378,6 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
|
|||
key.depalImageView_ = boundDepal_;
|
||||
key.base_ = base;
|
||||
key.light_ = light;
|
||||
key.bone_ = bone;
|
||||
key.secondaryIsInputAttachment = boundSecondaryIsInputAttachment_;
|
||||
|
||||
FrameData &frame = GetCurFrame();
|
||||
|
@ -506,10 +487,6 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
|
|||
buf[count].offset = 0;
|
||||
buf[count].range = sizeof(UB_VS_Lights);
|
||||
count++;
|
||||
buf[count].buffer = bone;
|
||||
buf[count].offset = 0;
|
||||
buf[count].range = sizeof(UB_VS_Bones);
|
||||
count++;
|
||||
for (int i = 0; i < count; i++) {
|
||||
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
|
||||
writes[n].pNext = nullptr;
|
||||
|
@ -533,11 +510,9 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
|
|||
void DrawEngineVulkan::DirtyAllUBOs() {
|
||||
baseUBOOffset = 0;
|
||||
lightUBOOffset = 0;
|
||||
boneUBOOffset = 0;
|
||||
baseBuf = VK_NULL_HANDLE;
|
||||
lightBuf = VK_NULL_HANDLE;
|
||||
boneBuf = VK_NULL_HANDLE;
|
||||
dirtyUniforms_ = DIRTY_BASE_UNIFORMS | DIRTY_LIGHT_UNIFORMS | DIRTY_BONE_UNIFORMS;
|
||||
dirtyUniforms_ = DIRTY_BASE_UNIFORMS | DIRTY_LIGHT_UNIFORMS;
|
||||
imageView = VK_NULL_HANDLE;
|
||||
sampler = VK_NULL_HANDLE;
|
||||
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
|
||||
|
@ -599,7 +574,7 @@ void DrawEngineVulkan::DoFlush() {
|
|||
// Also avoid caching when software skinning.
|
||||
VkBuffer vbuf = VK_NULL_HANDLE;
|
||||
VkBuffer ibuf = VK_NULL_HANDLE;
|
||||
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
|
||||
if (lastVType_ & GE_VTYPE_WEIGHT_MASK) {
|
||||
useCache = false;
|
||||
}
|
||||
|
||||
|
@ -739,7 +714,7 @@ void DrawEngineVulkan::DoFlush() {
|
|||
break;
|
||||
}
|
||||
} else {
|
||||
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
|
||||
if (lastVType_ & GE_VTYPE_WEIGHT_MASK) {
|
||||
// If software skinning, we've already predecoded into "decoded". So push that content.
|
||||
VkDeviceSize size = decodedVerts_ * dec_->GetDecVtxFmt().stride;
|
||||
u8 *dest = (u8 *)frameData.pushVertex->Push(size, &vbOffset, &vbuf);
|
||||
|
@ -784,7 +759,7 @@ void DrawEngineVulkan::DoFlush() {
|
|||
VulkanFragmentShader *fshader = nullptr;
|
||||
VulkanGeometryShader *gshader = nullptr;
|
||||
|
||||
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, &gshader, pipelineState_, true, useHWTessellation_, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode);
|
||||
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, &gshader, pipelineState_, true, useHWTessellation_, decOptions_.expandAllWeightsToFloat, true);
|
||||
if (!vshader) {
|
||||
// We're screwed.
|
||||
return;
|
||||
|
@ -826,10 +801,10 @@ void DrawEngineVulkan::DoFlush() {
|
|||
dirtyUniforms_ |= shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
|
||||
UpdateUBOs(&frameData);
|
||||
|
||||
VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf, tess);
|
||||
VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, tess);
|
||||
|
||||
const uint32_t dynamicUBOOffsets[3] = {
|
||||
baseUBOOffset, lightUBOOffset, boneUBOOffset,
|
||||
const uint32_t dynamicUBOOffsets[] = {
|
||||
baseUBOOffset, lightUBOOffset,
|
||||
};
|
||||
if (useElements) {
|
||||
if (!ibuf) {
|
||||
|
@ -841,7 +816,6 @@ void DrawEngineVulkan::DoFlush() {
|
|||
}
|
||||
} else {
|
||||
PROFILE_THIS_SCOPE("soft");
|
||||
decOptions_.applySkinInDecode = true;
|
||||
DecodeVerts(decoded);
|
||||
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
|
||||
if (gstate.isModeThrough()) {
|
||||
|
@ -920,7 +894,7 @@ void DrawEngineVulkan::DoFlush() {
|
|||
VulkanFragmentShader *fshader = nullptr;
|
||||
VulkanGeometryShader *gshader = nullptr;
|
||||
|
||||
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, &gshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode);
|
||||
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, &gshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, true);
|
||||
_dbg_assert_msg_(!vshader->UseHWTransform(), "Bad vshader");
|
||||
VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(renderManager, pipelineLayout_, pipelineKey_, &dec_->decFmt, vshader, fshader, gshader, false, 0);
|
||||
if (!pipeline || !pipeline->pipeline) {
|
||||
|
@ -928,7 +902,6 @@ void DrawEngineVulkan::DoFlush() {
|
|||
decodedVerts_ = 0;
|
||||
numDrawCalls = 0;
|
||||
decodeCounter_ = 0;
|
||||
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
|
||||
return;
|
||||
}
|
||||
BindShaderBlendTex(); // This might cause copies so super important to do before BindPipeline.
|
||||
|
@ -964,9 +937,9 @@ void DrawEngineVulkan::DoFlush() {
|
|||
// Even if the first draw is through-mode, make sure we at least have one copy of these uniforms buffered
|
||||
UpdateUBOs(&frameData);
|
||||
|
||||
VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf, tess);
|
||||
const uint32_t dynamicUBOOffsets[3] = {
|
||||
baseUBOOffset, lightUBOOffset, boneUBOOffset,
|
||||
VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, tess);
|
||||
const uint32_t dynamicUBOOffsets[] = {
|
||||
baseUBOOffset, lightUBOOffset,
|
||||
};
|
||||
|
||||
PROFILE_THIS_SCOPE("renderman_q");
|
||||
|
@ -996,7 +969,6 @@ void DrawEngineVulkan::DoFlush() {
|
|||
framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, result.color);
|
||||
}
|
||||
}
|
||||
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
|
||||
}
|
||||
|
||||
gpuStats.numDrawCalls += numDrawCalls;
|
||||
|
@ -1056,10 +1028,6 @@ void DrawEngineVulkan::UpdateUBOs(FrameData *frame) {
|
|||
lightUBOOffset = shaderManager_->PushLightBuffer(frame->pushUBO, &lightBuf);
|
||||
dirtyUniforms_ &= ~DIRTY_LIGHT_UNIFORMS;
|
||||
}
|
||||
if ((dirtyUniforms_ & DIRTY_BONE_UNIFORMS) || boneBuf == VK_NULL_HANDLE) {
|
||||
boneUBOOffset = shaderManager_->PushBoneBuffer(frame->pushUBO, &boneBuf);
|
||||
dirtyUniforms_ &= ~DIRTY_BONE_UNIFORMS;
|
||||
}
|
||||
}
|
||||
|
||||
DrawEngineVulkan::FrameData &DrawEngineVulkan::GetCurFrame() {
|
||||
|
|
|
@ -121,6 +121,19 @@ private:
|
|||
VkDescriptorBufferInfo bufInfo_[3]{};
|
||||
};
|
||||
|
||||
enum {
|
||||
DRAW_BINDING_TEXTURE = 0,
|
||||
DRAW_BINDING_2ND_TEXTURE = 1,
|
||||
DRAW_BINDING_DEPAL_TEXTURE = 2,
|
||||
DRAW_BINDING_DYNUBO_BASE = 3,
|
||||
DRAW_BINDING_DYNUBO_LIGHT = 4,
|
||||
DRAW_BINDING_TESS_STORAGE_BUF = 5,
|
||||
DRAW_BINDING_TESS_STORAGE_BUF_WU = 6,
|
||||
DRAW_BINDING_TESS_STORAGE_BUF_WV = 7,
|
||||
DRAW_BINDING_INPUT_ATTACHMENT = 8,
|
||||
DRAW_BINDING_COUNT = 9,
|
||||
};
|
||||
|
||||
// Handles transform, lighting and drawing.
|
||||
class DrawEngineVulkan : public DrawEngineCommon {
|
||||
public:
|
||||
|
@ -207,7 +220,7 @@ private:
|
|||
void UpdateUBOs(FrameData *frame);
|
||||
FrameData &GetCurFrame();
|
||||
|
||||
VkDescriptorSet GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone, bool tess);
|
||||
VkDescriptorSet GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, bool tess);
|
||||
|
||||
Draw::DrawContext *draw_;
|
||||
|
||||
|
@ -238,8 +251,7 @@ private:
|
|||
VkImageView secondaryImageView_;
|
||||
VkImageView depalImageView_;
|
||||
VkSampler sampler_;
|
||||
VkBuffer base_, light_, bone_; // All three UBO slots will be set to this. This will usually be identical
|
||||
// for all draws in a frame, except when the buffer has to grow.
|
||||
VkBuffer base_, light_;
|
||||
bool secondaryIsInputAttachment;
|
||||
};
|
||||
|
||||
|
@ -276,8 +288,7 @@ private:
|
|||
uint64_t dirtyUniforms_;
|
||||
uint32_t baseUBOOffset;
|
||||
uint32_t lightUBOOffset;
|
||||
uint32_t boneUBOOffset;
|
||||
VkBuffer baseBuf, lightBuf, boneBuf;
|
||||
VkBuffer baseBuf, lightBuf;
|
||||
VkImageView imageView = VK_NULL_HANDLE;
|
||||
VkSampler sampler = VK_NULL_HANDLE;
|
||||
|
||||
|
|
|
@ -204,11 +204,9 @@ ShaderManagerVulkan::ShaderManagerVulkan(Draw::DrawContext *draw)
|
|||
uboAlignment_ = vulkan->GetPhysicalDeviceProperties().properties.limits.minUniformBufferOffsetAlignment;
|
||||
memset(&ub_base, 0, sizeof(ub_base));
|
||||
memset(&ub_lights, 0, sizeof(ub_lights));
|
||||
memset(&ub_bones, 0, sizeof(ub_bones));
|
||||
|
||||
static_assert(sizeof(ub_base) <= 512, "ub_base grew too big");
|
||||
static_assert(sizeof(ub_lights) <= 512, "ub_lights grew too big");
|
||||
static_assert(sizeof(ub_bones) <= 384, "ub_bones grew too big");
|
||||
}
|
||||
|
||||
ShaderManagerVulkan::~ShaderManagerVulkan() {
|
||||
|
@ -273,8 +271,6 @@ uint64_t ShaderManagerVulkan::UpdateUniforms(bool useBufferedRendering) {
|
|||
BaseUpdateUniforms(&ub_base, dirty, false, useBufferedRendering);
|
||||
if (dirty & DIRTY_LIGHT_UNIFORMS)
|
||||
LightUpdateUniforms(&ub_lights, dirty);
|
||||
if (dirty & DIRTY_BONE_UNIFORMS)
|
||||
BoneUpdateUniforms(&ub_bones, dirty);
|
||||
}
|
||||
gstate_c.CleanUniforms();
|
||||
return dirty;
|
||||
|
|
|
@ -149,10 +149,6 @@ public:
|
|||
uint32_t PushLightBuffer(VulkanPushBuffer *dest, VkBuffer *buf) {
|
||||
return dest->PushAligned(&ub_lights, sizeof(ub_lights), uboAlignment_, buf);
|
||||
}
|
||||
// TODO: Only push half the bone buffer if we only have four bones.
|
||||
uint32_t PushBoneBuffer(VulkanPushBuffer *dest, VkBuffer *buf) {
|
||||
return dest->PushAligned(&ub_bones, sizeof(ub_bones), uboAlignment_, buf);
|
||||
}
|
||||
|
||||
bool LoadCache(FILE *f);
|
||||
void SaveCache(FILE *f);
|
||||
|
@ -177,7 +173,6 @@ private:
|
|||
// Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time.
|
||||
UB_VS_FS_Base ub_base;
|
||||
UB_VS_Lights ub_lights;
|
||||
UB_VS_Bones ub_bones;
|
||||
|
||||
VulkanFragmentShader *lastFShader_ = nullptr;
|
||||
VulkanVertexShader *lastVShader_ = nullptr;
|
||||
|
|
|
@ -540,12 +540,6 @@ void GameSettingsScreen::CreateViews() {
|
|||
hwTransform->SetDisabledPtr(&g_Config.bSoftwareRendering);
|
||||
}
|
||||
|
||||
CheckBox *swSkin = graphicsSettings->Add(new CheckBox(&g_Config.bSoftwareSkinning, gr->T("Software Skinning")));
|
||||
swSkin->OnClick.Add([=](EventParams &e) {
|
||||
settingInfo_->Show(gr->T("SoftwareSkinning Tip", "Combine skinned model draws on the CPU, faster in most games"), e.v);
|
||||
return UI::EVENT_CONTINUE;
|
||||
});
|
||||
swSkin->SetDisabledPtr(&g_Config.bSoftwareRendering);
|
||||
|
||||
CheckBox *tessellationHW = graphicsSettings->Add(new CheckBox(&g_Config.bHardwareTessellation, gr->T("Hardware Tessellation")));
|
||||
tessellationHW->OnClick.Add([=](EventParams &e) {
|
||||
|
|
|
@ -306,8 +306,6 @@ int CtrlVertexList::GetRowCount() {
|
|||
rowCount_ = 0;
|
||||
}
|
||||
VertexDecoderOptions options{};
|
||||
// TODO: Maybe an option?
|
||||
options.applySkinInDecode = true;
|
||||
decoder->SetVertexType(state.vertType, options);
|
||||
return rowCount_;
|
||||
}
|
||||
|
|
|
@ -436,7 +436,6 @@ int main(int argc, const char* argv[])
|
|||
g_Config.iInternalResolution = 1;
|
||||
g_Config.iFastForwardMode = (int)FastForwardMode::CONTINUOUS;
|
||||
g_Config.bEnableLogging = fullLog;
|
||||
g_Config.bSoftwareSkinning = true;
|
||||
g_Config.bVertexDecoderJit = true;
|
||||
g_Config.bSoftwareRendering = coreParameter.gpuCore == GPUCORE_SOFTWARE;
|
||||
g_Config.bSoftwareRenderingJit = true;
|
||||
|
|
|
@ -823,15 +823,6 @@ static void check_variables(CoreParameter &coreParam)
|
|||
g_Config.bHardwareTransform = true;
|
||||
}
|
||||
|
||||
var.key = "ppsspp_software_skinning";
|
||||
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
|
||||
{
|
||||
if (!strcmp(var.value, "disabled"))
|
||||
g_Config.bSoftwareSkinning = false;
|
||||
else
|
||||
g_Config.bSoftwareSkinning = true;
|
||||
}
|
||||
|
||||
var.key = "ppsspp_vertex_cache";
|
||||
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
|
||||
{
|
||||
|
|
|
@ -407,15 +407,10 @@ bool TestVertexShaders() {
|
|||
|
||||
// The generated bits need some adjustment:
|
||||
|
||||
// We don't use these bits in the HLSL shader generator.
|
||||
id.SetBits(VS_BIT_WEIGHT_FMTSCALE, 2, 0);
|
||||
// If mode is through, we won't do hardware transform.
|
||||
if (id.Bit(VS_BIT_IS_THROUGH)) {
|
||||
id.SetBit(VS_BIT_USE_HW_TRANSFORM, 0);
|
||||
}
|
||||
if (!id.Bit(VS_BIT_USE_HW_TRANSFORM)) {
|
||||
id.SetBit(VS_BIT_ENABLE_BONES, 0);
|
||||
}
|
||||
if (id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) {
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -546,7 +546,6 @@ static bool TestVertexColor565() {
|
|||
static bool TestVertex8Skin() {
|
||||
VertexDecoderTestHarness dec;
|
||||
VertexDecoderOptions opts{};
|
||||
opts.applySkinInDecode = true;
|
||||
dec.SetOptions(opts);
|
||||
|
||||
for (int i = 0; i < 8 * 12; ++i) {
|
||||
|
@ -578,7 +577,6 @@ static bool TestVertex8Skin() {
|
|||
static bool TestVertex16Skin() {
|
||||
VertexDecoderTestHarness dec;
|
||||
VertexDecoderOptions opts{};
|
||||
opts.applySkinInDecode = true;
|
||||
dec.SetOptions(opts);
|
||||
|
||||
for (int i = 0; i < 8 * 12; ++i) {
|
||||
|
@ -610,7 +608,6 @@ static bool TestVertex16Skin() {
|
|||
static bool TestVertexFloatSkin() {
|
||||
VertexDecoderTestHarness dec;
|
||||
VertexDecoderOptions opts{};
|
||||
opts.applySkinInDecode = true;
|
||||
dec.SetOptions(opts);
|
||||
|
||||
for (int i = 0; i < 8 * 12; ++i) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue