Compare commits

...
Sign in to create a new pull request.

6 commits

Author SHA1 Message Date
Henrik Rydgård
64c2b84d5b Address feedback 2022-11-06 23:12:32 +01:00
Henrik Rydgård
c8d6d68aac Software transform cleanup 2022-11-06 20:33:50 +01:00
Henrik Rydgård
55b1c8919a Vertex decoder jit cleanup 2022-11-06 20:33:06 +01:00
Henrik Rydgård
8fab44902a GPUCommon cleanup 2022-11-06 20:33:06 +01:00
Henrik Rydgård
f8167d442f Second round of removals, and some minor refactoring in the Vulkan backend. 2022-11-06 20:33:06 +01:00
Henrik Rydgård
58454894eb Remove the option to use hardware skinning - software skinning is better. 2022-11-06 20:33:05 +01:00
41 changed files with 127 additions and 905 deletions

View file

@ -879,7 +879,6 @@ static ConfigSetting graphicsSettings[] = {
ConfigSetting("SoftwareRenderer", &g_Config.bSoftwareRendering, false, true, true),
ConfigSetting("SoftwareRendererJit", &g_Config.bSoftwareRenderingJit, true, true, true),
ReportedConfigSetting("HardwareTransform", &g_Config.bHardwareTransform, true, true, true),
ReportedConfigSetting("SoftwareSkinning", &g_Config.bSoftwareSkinning, true, true, true),
ReportedConfigSetting("TextureFiltering", &g_Config.iTexFiltering, 1, true, true),
ReportedConfigSetting("BufferFiltering", &g_Config.iBufFilter, SCALE_LINEAR, true, true),
ReportedConfigSetting("InternalResolution", &g_Config.iInternalResolution, &DefaultInternalResolution, true, true),

View file

@ -162,7 +162,6 @@ public:
bool bSoftwareRendering;
bool bSoftwareRenderingJit;
bool bHardwareTransform; // only used in the GLES backend
bool bSoftwareSkinning;
bool bVendorBugChecksEnabled;
bool bUseGeometryShader;

View file

@ -180,11 +180,10 @@ void DrawEngineCommon::NotifyConfigChanged() {
useHWTransform_ = g_Config.bHardwareTransform;
useHWTessellation_ = UpdateUseHWTessellation(g_Config.bHardwareTessellation);
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
}
u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, int lowerBound, int upperBound, u32 vertType, int *vertexSize) {
const u32 vertTypeID = GetVertTypeID(vertType, gstate.getUVGenMode(), decOptions_.applySkinInDecode);
const u32 vertTypeID = GetVertTypeID(vertType, gstate.getUVGenMode(), true);
VertexDecoder *dec = GetVertexDecoder(vertTypeID);
if (vertexSize)
*vertexSize = dec->VertexSize();
@ -232,7 +231,7 @@ void DrawEngineCommon::DispatchSubmitImm(GEPrimitiveType prim, TransformedVertex
}
int bytesRead;
uint32_t vertTypeID = GetVertTypeID(vtype, 0, decOptions_.applySkinInDecode);
uint32_t vertTypeID = GetVertTypeID(vtype, 0, true);
SubmitPrim(&temp[0], nullptr, prim, vertexCount, vertTypeID, cullMode, &bytesRead);
DispatchFlush();
@ -281,10 +280,7 @@ bool DrawEngineCommon::TestBoundingBox(const void *control_points, const void *i
}
// Force software skinning.
bool wasApplyingSkinInDecode = decOptions_.applySkinInDecode;
decOptions_.applySkinInDecode = true;
NormalizeVertices((u8 *)corners, temp_buffer, (const u8 *)control_points, indexLowerBound, indexUpperBound, vertType);
decOptions_.applySkinInDecode = wasApplyingSkinInDecode;
IndexConverter conv(vertType, inds);
for (int i = 0; i < vertexCount; i++) {
@ -499,8 +495,7 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
// implementation of the vertex decoder.
dec->DecodeVerts(bufPtr, inPtr, lowerBound, upperBound);
// OK, morphing eliminated but bones still remain to be taken care of.
// Let's do a partial software transform where we only do skinning.
// Morph and skin are both removed during decode now.
VertexReader reader(bufPtr, dec->GetDecVtxFmt(), vertType);
@ -513,80 +508,29 @@ u32 DrawEngineCommon::NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr,
(u8)gstate.getMaterialAmbientA(),
};
// Let's have two separate loops, one for non skinning and one for skinning.
if (!dec->skinInDecode && (vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) {
int numBoneWeights = vertTypeGetNumBoneWeights(vertType);
for (int i = lowerBound; i <= upperBound; i++) {
reader.Goto(i - lowerBound);
SimpleVertex &sv = sverts[i];
if (vertType & GE_VTYPE_TC_MASK) {
reader.ReadUV(sv.uv);
}
if (vertType & GE_VTYPE_COL_MASK) {
reader.ReadColor0_8888(sv.color);
} else {
memcpy(sv.color, defaultColor, 4);
}
float nrm[3], pos[3];
float bnrm[3], bpos[3];
if (vertType & GE_VTYPE_NRM_MASK) {
// Normals are generated during tessellation anyway, not sure if any need to supply
reader.ReadNrm(nrm);
} else {
nrm[0] = 0;
nrm[1] = 0;
nrm[2] = 1.0f;
}
reader.ReadPos(pos);
// Apply skinning transform directly
float weights[8];
reader.ReadWeights(weights);
// Skinning
Vec3Packedf psum(0, 0, 0);
Vec3Packedf nsum(0, 0, 0);
for (int w = 0; w < numBoneWeights; w++) {
if (weights[w] != 0.0f) {
Vec3ByMatrix43(bpos, pos, gstate.boneMatrix + w * 12);
Vec3Packedf tpos(bpos);
psum += tpos * weights[w];
Norm3ByMatrix43(bnrm, nrm, gstate.boneMatrix + w * 12);
Vec3Packedf tnorm(bnrm);
nsum += tnorm * weights[w];
}
}
sv.pos = psum;
sv.nrm = nsum;
for (int i = lowerBound; i <= upperBound; i++) {
reader.Goto(i - lowerBound);
SimpleVertex &sv = sverts[i];
if (vertType & GE_VTYPE_TC_MASK) {
reader.ReadUV(sv.uv);
} else {
sv.uv[0] = 0.0f; // This will get filled in during tessellation
sv.uv[1] = 0.0f;
}
} else {
for (int i = lowerBound; i <= upperBound; i++) {
reader.Goto(i - lowerBound);
SimpleVertex &sv = sverts[i];
if (vertType & GE_VTYPE_TC_MASK) {
reader.ReadUV(sv.uv);
} else {
sv.uv[0] = 0.0f; // This will get filled in during tessellation
sv.uv[1] = 0.0f;
}
if (vertType & GE_VTYPE_COL_MASK) {
reader.ReadColor0_8888(sv.color);
} else {
memcpy(sv.color, defaultColor, 4);
}
if (vertType & GE_VTYPE_NRM_MASK) {
// Normals are generated during tessellation anyway, not sure if any need to supply
reader.ReadNrm((float *)&sv.nrm);
} else {
sv.nrm.x = 0.0f;
sv.nrm.y = 0.0f;
sv.nrm.z = 1.0f;
}
reader.ReadPos((float *)&sv.pos);
if (vertType & GE_VTYPE_COL_MASK) {
reader.ReadColor0_8888(sv.color);
} else {
memcpy(sv.color, defaultColor, 4);
}
if (vertType & GE_VTYPE_NRM_MASK) {
// Normals are generated during tessellation anyway, not sure if any need to supply
reader.ReadNrm((float *)&sv.nrm);
} else {
sv.nrm.x = 0.0f;
sv.nrm.y = 0.0f;
sv.nrm.z = 1.0f;
}
reader.ReadPos((float *)&sv.pos);
}
// Okay, there we are! Return the new type (but keep the index bits)
@ -836,7 +780,7 @@ void DrawEngineCommon::SubmitPrim(const void *verts, const void *inds, GEPrimiti
numDrawCalls++;
vertexCountInDrawCalls_ += vertexCount;
if (decOptions_.applySkinInDecode && (vertTypeID & GE_VTYPE_WEIGHT_MASK)) {
if (vertTypeID & GE_VTYPE_WEIGHT_MASK) {
DecodeVertsStep(decoded, decodeCounter_, decodedVerts_);
decodeCounter_++;
}

View file

@ -31,6 +31,7 @@
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/ShaderUniforms.h"
#include "GPU/Common/FragmentShaderGenerator.h"
#include "GPU/Vulkan/DrawEngineVulkan.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"
@ -185,23 +186,23 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
}
WRITE(p, "layout (std140, set = 1, binding = 3) uniform baseUBO {\n%s};\n", ub_baseStr);
WRITE(p, "layout (std140, set = 1, binding = %d) uniform baseUBO {\n%s};\n", DRAW_BINDING_DYNUBO_BASE, ub_baseStr);
if (doTexture) {
WRITE(p, "layout (set = 1, binding = 0) uniform %s%s tex;\n", texture3D ? "sampler3D" : "sampler2D", arrayTexture ? "Array" : "");
WRITE(p, "layout (set = 1, binding = %d) uniform %s%s tex;\n", DRAW_BINDING_TEXTURE, texture3D ? "sampler3D" : "sampler2D", arrayTexture ? "Array" : "");
}
if (readFramebufferTex) {
// The framebuffer texture is always bound as an array.
p.C("layout (set = 1, binding = 1) uniform sampler2DArray fbotex;\n");
p.F("layout (set = 1, binding = %d) uniform sampler2DArray fbotex;\n", DRAW_BINDING_2ND_TEXTURE);
} else if (fetchFramebuffer) {
p.C("layout (input_attachment_index = 0, set = 1, binding = 9) uniform subpassInput inputColor;\n");
p.F("layout (input_attachment_index = 0, set = 1, binding = %d) uniform subpassInput inputColor;\n", DRAW_BINDING_INPUT_ATTACHMENT);
if (fragmentShaderFlags) {
*fragmentShaderFlags |= FragmentShaderFlags::INPUT_ATTACHMENT;
}
}
if (shaderDepalMode != ShaderDepalMode::OFF) {
WRITE(p, "layout (set = 1, binding = 2) uniform sampler2D pal;\n");
WRITE(p, "layout (set = 1, binding = %d) uniform sampler2D pal;\n", DRAW_BINDING_DEPAL_TEXTURE);
}
// Note: the precision qualifiers must match the vertex shader!

View file

@ -90,11 +90,7 @@ enum : uint64_t {
DIRTY_MIPBIAS = 1ULL << 37,
DIRTY_LIGHT_CONTROL = 1ULL << 38,
// space for 1 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS.
DIRTY_BONE_UNIFORMS = 0xFF000000ULL,
DIRTY_ALL_UNIFORMS = 0x7FFFFFFFFFULL,
DIRTY_ALL_UNIFORMS = 0x7F00FFFFFFULL, // 00 is where bone uniforms used to be.
DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3,
// Other dirty elements that aren't uniforms!

View file

@ -35,7 +35,6 @@ std::string VertexShaderDesc(const VShaderID &id) {
int ls1 = id.Bits(VS_BIT_LS1, 2);
if (uvgMode) desc << uvgModes[uvgMode];
if (id.Bit(VS_BIT_ENABLE_BONES)) desc << "Bones:" << (id.Bits(VS_BIT_BONES, 3) + 1) << " ";
// Lights
if (id.Bit(VS_BIT_LIGHTING_ENABLE)) {
desc << "Light: ";
@ -51,7 +50,6 @@ std::string VertexShaderDesc(const VShaderID &id) {
}
}
if (id.Bits(VS_BIT_MATERIAL_UPDATE, 3)) desc << "MatUp:" << id.Bits(VS_BIT_MATERIAL_UPDATE, 3) << " ";
if (id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2)) desc << "WScale " << id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2) << " ";
if (id.Bit(VS_BIT_FLATSHADE)) desc << "Flat ";
if (id.Bit(VS_BIT_BEZIER)) desc << "Bezier ";
@ -117,16 +115,6 @@ void ComputeVertexShaderID(VShaderID *id_out, u32 vertType, bool useHWTransform,
id.SetBits(VS_BIT_LS1, 2, gstate.getUVLS1());
}
// Bones.
bool enableBones = !useSkinInDecode && vertTypeIsSkinningEnabled(vertType);
id.SetBit(VS_BIT_ENABLE_BONES, enableBones);
if (enableBones) {
id.SetBits(VS_BIT_BONES, 3, TranslateNumBones(vertTypeGetNumBoneWeights(vertType)) - 1);
// 2 bits. We should probably send in the weight scalefactor as a uniform instead,
// or simply preconvert all weights to floats.
id.SetBits(VS_BIT_WEIGHT_FMTSCALE, 2, weightsAsFloat ? 0 : (vertType & GE_VTYPE_WEIGHT_MASK) >> GE_VTYPE_WEIGHT_SHIFT);
}
if (gstate.isLightingEnabled()) {
// doShadeMapping is stored as UVGenMode, and light type doesn't matter for shade mapping.
id.SetBit(VS_BIT_LIGHTING_ENABLE);

View file

@ -29,9 +29,7 @@ enum VShaderBit : uint8_t {
VS_BIT_UVPROJ_MODE = 18, // 2, can overlap with LS0
VS_BIT_LS0 = 18, // 2
VS_BIT_LS1 = 20, // 2
VS_BIT_BONES = 22, // 3 should be enough, not 8
// 25 - 29 are free.
VS_BIT_ENABLE_BONES = 30,
// 21 - 30 are free.
// If this is set along with LIGHTING_ENABLE, all other lighting bits below
// are passed to the shader directly instead.
@ -52,8 +50,7 @@ enum VShaderBit : uint8_t {
VS_BIT_LIGHT2_ENABLE = 54,
VS_BIT_LIGHT3_ENABLE = 55,
VS_BIT_LIGHTING_ENABLE = 56,
VS_BIT_WEIGHT_FMTSCALE = 57, // only two bits
// 59 - 61 are free.
// 57 - 61 are free.
VS_BIT_FLATSHADE = 62, // 1 bit
VS_BIT_BEZIER = 63, // 1 bit
// No more free

View file

@ -337,11 +337,3 @@ void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
}
}
}
void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms) {
for (int i = 0; i < 8; i++) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To3x4Transposed(ub->bones[i], gstate.boneMatrix + 12 * i);
}
}
}

View file

@ -120,17 +120,6 @@ R"( vec4 u_ambient;
vec3 u_lightspecular3;
)";
// With some cleverness, we could get away with uploading just half this when only the four or five first
// bones are being used. This is 384b.
struct alignas(16) UB_VS_Bones {
float bones[8][12];
};
static const char * const ub_vs_bonesStr =
R"( mat3x4 u_bone0; mat3x4 u_bone1; mat3x4 u_bone2; mat3x4 u_bone3; mat3x4 u_bone4; mat3x4 u_bone5; mat3x4 u_bone6; mat3x4 u_bone7; mat3x4 u_bone8;
)";
static const char * const ub_frameStr =
R"(
float u_rotation;
@ -145,7 +134,6 @@ void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bo
void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport, bool useBufferedRendering);
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms);
void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms);
void FrameUpdateUniforms(UB_Frame *ub, bool useBufferedRendering);
uint32_t PackLightControlBits();

View file

@ -507,7 +507,7 @@ void DrawEngineCommon::SubmitCurve(const void *control_points, const void *indic
if (indices)
GetIndexBounds(indices, num_points, vertType, &index_lower_bound, &index_upper_bound);
VertexDecoder *origVDecoder = GetVertexDecoder(GetVertTypeID(vertType, gstate.getUVGenMode(), decOptions_.applySkinInDecode));
VertexDecoder *origVDecoder = GetVertexDecoder(GetVertTypeID(vertType, gstate.getUVGenMode(), true));
*bytesRead = num_points * origVDecoder->VertexSize();
// Simplify away bones and morph before proceeding
@ -572,7 +572,7 @@ void DrawEngineCommon::SubmitCurve(const void *control_points, const void *indic
gstate_c.uv.vOff = 0;
}
uint32_t vertTypeID = GetVertTypeID(vertTypeWithIndex16, gstate.getUVGenMode(), decOptions_.applySkinInDecode);
uint32_t vertTypeID = GetVertTypeID(vertTypeWithIndex16, gstate.getUVGenMode(), true);
int generatedBytesRead;
if (output.count)
DispatchSubmitPrim(output.vertices, output.indices, PatchPrimToPrim(surface.primType), output.count, vertTypeID, gstate.getCullMode(), &generatedBytesRead);

View file

@ -103,9 +103,6 @@ static const ARMReg srcNEON = Q2;
static const ARMReg accNEON = Q3;
static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU8, &VertexDecoderJitCache::Jit_WeightsU8},
{&VertexDecoder::Step_WeightsU16, &VertexDecoderJitCache::Jit_WeightsU16},
{&VertexDecoder::Step_WeightsFloat, &VertexDecoderJitCache::Jit_WeightsFloat},
{&VertexDecoder::Step_WeightsU8Skin, &VertexDecoderJitCache::Jit_WeightsU8Skin},
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
@ -296,55 +293,6 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
return (JittedVertexDecoder)start;
}
void VertexDecoderJitCache::Jit_WeightsU8() {
// Basic implementation - a byte at a time. TODO: Optimize
int j;
for (j = 0; j < dec_->nweights; j++) {
LDRB(tempReg1, srcReg, dec_->weightoff + j);
STRB(tempReg1, dstReg, dec_->decFmt.w0off + j);
}
if (j & 3) {
// Create a zero register. Might want to make a fixed one.
EOR(scratchReg, scratchReg, scratchReg);
}
while (j & 3) {
STRB(scratchReg, dstReg, dec_->decFmt.w0off + j);
j++;
}
}
void VertexDecoderJitCache::Jit_WeightsU16() {
// Basic implementation - a short at a time. TODO: Optimize
int j;
for (j = 0; j < dec_->nweights; j++) {
LDRH(tempReg1, srcReg, dec_->weightoff + j * 2);
STRH(tempReg1, dstReg, dec_->decFmt.w0off + j * 2);
}
if (j & 3) {
// Create a zero register. Might want to make a fixed one.
EOR(scratchReg, scratchReg, scratchReg);
}
while (j & 3) {
STRH(scratchReg, dstReg, dec_->decFmt.w0off + j * 2);
j++;
}
}
void VertexDecoderJitCache::Jit_WeightsFloat() {
int j;
for (j = 0; j < dec_->nweights; j++) {
LDR(tempReg1, srcReg, dec_->weightoff + j * 4);
STR(tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
}
if (j & 3) {
EOR(tempReg1, tempReg1, tempReg1);
}
while (j & 3) { // Zero additional weights rounding up to 4.
STR(tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
j++;
}
}
static const ARMReg weightRegs[8] = { S8, S9, S10, S11, S12, S13, S14, S15 };
static const ARMReg neonWeightRegsD[4] = { D4, D5, D6, D7 };
static const ARMReg neonWeightRegsQ[2] = { Q2, Q3 };

View file

@ -80,9 +80,6 @@ static const ARM64Reg neonWeightRegsQ[2] = { Q3, Q2 }; // reverse order to prev
// Q16+ are free-for-all for matrices. In 16 registers, we can fit 4 4x4 matrices.
static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU8, &VertexDecoderJitCache::Jit_WeightsU8},
{&VertexDecoder::Step_WeightsU16, &VertexDecoderJitCache::Jit_WeightsU16},
{&VertexDecoder::Step_WeightsFloat, &VertexDecoderJitCache::Jit_WeightsFloat},
{&VertexDecoder::Step_WeightsU8Skin, &VertexDecoderJitCache::Jit_WeightsU8Skin},
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
@ -350,44 +347,6 @@ void VertexDecoderJitCache::Jit_ApplyWeights() {
}
}
void VertexDecoderJitCache::Jit_WeightsU8() {
// Basic implementation - a byte at a time. TODO: Optimize
int j;
for (j = 0; j < dec_->nweights; j++) {
LDRB(INDEX_UNSIGNED, tempReg1, srcReg, dec_->weightoff + j);
STRB(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.w0off + j);
}
while (j & 3) {
STRB(INDEX_UNSIGNED, WZR, dstReg, dec_->decFmt.w0off + j);
j++;
}
}
void VertexDecoderJitCache::Jit_WeightsU16() {
// Basic implementation - a short at a time. TODO: Optimize
int j;
for (j = 0; j < dec_->nweights; j++) {
LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->weightoff + j * 2);
STRH(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.w0off + j * 2);
}
while (j & 3) {
STRH(INDEX_UNSIGNED, WZR, dstReg, dec_->decFmt.w0off + j * 2);
j++;
}
}
void VertexDecoderJitCache::Jit_WeightsFloat() {
int j;
for (j = 0; j < dec_->nweights; j++) {
LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->weightoff + j * 4);
STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
}
while (j & 3) { // Zero additional weights rounding up to 4.
STR(INDEX_UNSIGNED, WZR, dstReg, dec_->decFmt.w0off + j * 4);
j++;
}
}
void VertexDecoderJitCache::Jit_WeightsU8Skin() {
// Weight is first so srcReg is correct.
switch (dec_->nweights) {

View file

@ -176,67 +176,6 @@ void PrintDecodedVertex(VertexReader &vtx) {
printf("P: %f %f %f\n", pos[0], pos[1], pos[2]);
}
void VertexDecoder::Step_WeightsU8() const
{
u8 *wt = (u8 *)(decoded_ + decFmt.w0off);
const u8 *wdata = (const u8*)(ptr_);
int j;
for (j = 0; j < nweights; j++)
wt[j] = wdata[j];
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0;
}
void VertexDecoder::Step_WeightsU16() const
{
u16 *wt = (u16 *)(decoded_ + decFmt.w0off);
const u16_le *wdata = (const u16_le *)(ptr_);
int j;
for (j = 0; j < nweights; j++)
wt[j] = wdata[j];
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0;
}
void VertexDecoder::Step_WeightsU8ToFloat() const
{
float *wt = (float *)(decoded_ + decFmt.w0off);
const u8 *wdata = (const u8*)(ptr_);
int j;
for (j = 0; j < nweights; j++) {
wt[j] = (float)wdata[j] * (1.0f / 128.0f);
}
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0;
}
void VertexDecoder::Step_WeightsU16ToFloat() const
{
float *wt = (float *)(decoded_ + decFmt.w0off);
const u16_le *wdata = (const u16_le *)(ptr_);
int j;
for (j = 0; j < nweights; j++) {
wt[j] = (float)wdata[j] * (1.0f / 32768.0f);
}
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0;
}
// Float weights should be uncommon, we can live with having to multiply these by 2.0
// to avoid special checks in the vertex shader generator.
// (PSP uses 0.0-2.0 fixed point numbers for weights)
void VertexDecoder::Step_WeightsFloat() const
{
float *wt = (float *)(decoded_ + decFmt.w0off);
const float_le *wdata = (const float_le *)(ptr_);
int j;
for (j = 0; j < nweights; j++) {
wt[j] = wdata[j];
}
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0.0f;
}
void VertexDecoder::ComputeSkinMatrix(const float weights[8]) const {
memset(skinMatrix, 0, sizeof(skinMatrix));
for (int j = 0; j < nweights; j++) {
@ -877,20 +816,6 @@ void VertexDecoder::Step_PosFloatMorphSkin() const {
Vec3ByMatrix43(v, pos, skinMatrix);
}
static const StepFunction wtstep[4] = {
0,
&VertexDecoder::Step_WeightsU8,
&VertexDecoder::Step_WeightsU16,
&VertexDecoder::Step_WeightsFloat,
};
static const StepFunction wtstepToFloat[4] = {
0,
&VertexDecoder::Step_WeightsU8ToFloat,
&VertexDecoder::Step_WeightsU16ToFloat,
&VertexDecoder::Step_WeightsFloat,
};
// TODO: Morph weights correctly! This is missing. Not sure if any game actually
// use this functionality at all.
@ -1089,7 +1014,7 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
DEBUG_LOG(G3D, "VTYPE: THRU=%i TC=%i COL=%i POS=%i NRM=%i WT=%i NW=%i IDX=%i MC=%i", (int)throughmode, tc, col, pos, nrm, weighttype, nweights, idx, morphcount);
}
skinInDecode = weighttype != 0 && options.applySkinInDecode;
skinInDecode = weighttype != 0;
if (weighttype) { // && nweights?
weightoff = size;
@ -1098,43 +1023,11 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
if (wtalign[weighttype] > biggest)
biggest = wtalign[weighttype];
if (skinInDecode) {
// No visible output, computes a matrix that is passed through the skinMatrix variable
// to the "nrm" and "pos" steps.
// Technically we should support morphing the weights too, but I have a hard time
// imagining that any game would use that.. but you never know.
steps_[numSteps_++] = wtstep_skin[weighttype];
} else {
int fmtBase = DEC_FLOAT_1;
if (options.expandAllWeightsToFloat) {
steps_[numSteps_++] = wtstepToFloat[weighttype];
fmtBase = DEC_FLOAT_1;
} else {
steps_[numSteps_++] = wtstep[weighttype];
if (weighttype == GE_VTYPE_WEIGHT_8BIT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_U8_1;
} else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_U16_1;
} else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_FLOAT_1;
}
}
int numWeights = TranslateNumBones(nweights);
if (numWeights <= 4) {
decFmt.w0off = decOff;
decFmt.w0fmt = fmtBase + numWeights - 1;
decOff += DecFmtSize(decFmt.w0fmt);
} else {
decFmt.w0off = decOff;
decFmt.w0fmt = fmtBase + 3;
decOff += DecFmtSize(decFmt.w0fmt);
decFmt.w1off = decOff;
decFmt.w1fmt = fmtBase + numWeights - 5;
decOff += DecFmtSize(decFmt.w1fmt);
}
}
// No visible output, computes a matrix that is passed through the skinMatrix variable
// to the "nrm" and "pos" steps.
// Technically we should support morphing the weights too, but I have a hard time
// imagining that any game would use that.. but you never know.
steps_[numSteps_++] = wtstep_skin[weighttype];
}
if (tc) {

View file

@ -333,7 +333,6 @@ typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);
struct VertexDecoderOptions {
bool expandAllWeightsToFloat;
bool expand8BitNormalsToFloat;
bool applySkinInDecode;
};
class VertexDecoder {
@ -353,12 +352,6 @@ public:
std::string GetString(DebugShaderStringType stringType);
void Step_WeightsU8() const;
void Step_WeightsU16() const;
void Step_WeightsU8ToFloat() const;
void Step_WeightsU16ToFloat() const;
void Step_WeightsFloat() const;
void ComputeSkinMatrix(const float weights[8]) const;
void Step_WeightsU8Skin() const;
@ -512,12 +505,6 @@ public:
JittedVertexDecoder Compile(const VertexDecoder &dec, int32_t *jittedSize);
void Clear();
void Jit_WeightsU8();
void Jit_WeightsU16();
void Jit_WeightsU8ToFloat();
void Jit_WeightsU16ToFloat();
void Jit_WeightsFloat();
void Jit_WeightsU8Skin();
void Jit_WeightsU16Skin();
void Jit_WeightsFloatSkin();

View file

@ -94,16 +94,10 @@ static const X64Reg fpScratchReg4 = XMM4;
// on the interpreter if the compiler fails.
static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_WeightsU8, &VertexDecoderJitCache::Jit_WeightsU8},
{&VertexDecoder::Step_WeightsU16, &VertexDecoderJitCache::Jit_WeightsU16},
{&VertexDecoder::Step_WeightsFloat, &VertexDecoderJitCache::Jit_WeightsFloat},
{&VertexDecoder::Step_WeightsU8Skin, &VertexDecoderJitCache::Jit_WeightsU8Skin},
{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
{&VertexDecoder::Step_WeightsU8ToFloat, &VertexDecoderJitCache::Jit_WeightsU8ToFloat},
{&VertexDecoder::Step_WeightsU16ToFloat, &VertexDecoderJitCache::Jit_WeightsU16ToFloat},
{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
@ -297,175 +291,6 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
return (JittedVertexDecoder)start;
}
void VertexDecoderJitCache::Jit_WeightsU8() {
switch (dec_->nweights) {
case 1:
MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->weightoff));
break;
case 2:
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->weightoff));
break;
case 3:
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
AND(32, R(tempReg1), Imm32(0x00FFFFFF));
break;
case 4:
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
break;
case 5:
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
MOVZX(32, 8, tempReg2, MDisp(srcReg, dec_->weightoff + 4));
break;
case 6:
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
MOVZX(32, 16, tempReg2, MDisp(srcReg, dec_->weightoff + 4));
break;
case 7:
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
MOV(32, R(tempReg2), MDisp(srcReg, dec_->weightoff + 4));
AND(32, R(tempReg2), Imm32(0x00FFFFFF));
break;
case 8:
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
MOV(32, R(tempReg2), MDisp(srcReg, dec_->weightoff + 4));
break;
}
if (dec_->nweights <= 4) {
MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
} else {
MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
MOV(32, MDisp(dstReg, dec_->decFmt.w1off), R(tempReg2));
}
}
void VertexDecoderJitCache::Jit_WeightsU16() {
switch (dec_->nweights) {
case 1:
MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->weightoff));
MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
MOV(32, MDisp(dstReg, dec_->decFmt.w0off + 4), Imm32(0));
return;
case 2:
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
MOV(32, MDisp(dstReg, dec_->decFmt.w0off + 4), Imm32(0));
return;
case 3:
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
MOVZX(32, 16, tempReg2, MDisp(srcReg, dec_->weightoff + 4));
MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
MOV(32, MDisp(dstReg, dec_->decFmt.w0off + 4), R(tempReg2));
return;
case 4:
// Anything above 4 will do 4 here, and then the rest after.
case 5:
case 6:
case 7:
case 8:
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
MOV(32, R(tempReg2), MDisp(srcReg, dec_->weightoff + 4));
MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
MOV(32, MDisp(dstReg, dec_->decFmt.w0off + 4), R(tempReg2));
break;
}
// Basic implementation - a short at a time. TODO: Optimize
int j;
for (j = 4; j < dec_->nweights; j++) {
MOV(16, R(tempReg1), MDisp(srcReg, dec_->weightoff + j * 2));
MOV(16, MDisp(dstReg, dec_->decFmt.w0off + j * 2), R(tempReg1));
}
while (j & 3) {
MOV(16, MDisp(dstReg, dec_->decFmt.w0off + j * 2), Imm16(0));
j++;
}
}
void VertexDecoderJitCache::Jit_WeightsU8ToFloat() {
if (dec_->nweights >= 4) {
Jit_AnyU8ToFloat(dec_->weightoff, 32);
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
if (dec_->nweights > 4) {
Jit_AnyU8ToFloat(dec_->weightoff + 4, (dec_->nweights - 4) * 8);
MOVUPS(MDisp(dstReg, dec_->decFmt.w1off), XMM3);
}
} else {
Jit_AnyU8ToFloat(dec_->weightoff, dec_->nweights * 8);
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
}
}
void VertexDecoderJitCache::Jit_WeightsU16ToFloat() {
if (dec_->nweights >= 4) {
Jit_AnyU16ToFloat(dec_->weightoff, 64);
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
if (dec_->nweights > 4) {
Jit_AnyU16ToFloat(dec_->weightoff + 4 * 2, (dec_->nweights - 4) * 16);
MOVUPS(MDisp(dstReg, dec_->decFmt.w1off), XMM3);
}
} else {
Jit_AnyU16ToFloat(dec_->weightoff, dec_->nweights * 16);
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
}
}
void VertexDecoderJitCache::Jit_WeightsFloat() {
int j;
switch (dec_->nweights) {
case 1:
// MOVSS: When the source operand is a memory location and destination operand is an XMM register, the three high-order doublewords of the destination operand are cleared to all 0s.
MOVSS(XMM3, MDisp(srcReg, dec_->weightoff));
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
break;
case 2:
MOVQ_xmm(XMM3, MDisp(srcReg, dec_->weightoff));
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
break;
case 4:
MOVUPS(XMM3, MDisp(srcReg, dec_->weightoff));
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
break;
case 5:
MOVUPS(XMM3, MDisp(srcReg, dec_->weightoff));
MOVSS(XMM4, MDisp(srcReg, dec_->weightoff + 16));
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off + 16), XMM4);
break;
case 6:
MOVUPS(XMM3, MDisp(srcReg, dec_->weightoff));
MOVQ_xmm(XMM4, MDisp(srcReg, dec_->weightoff + 16));
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off + 16), XMM4);
break;
case 8:
MOVUPS(XMM3, MDisp(srcReg, dec_->weightoff));
MOVUPS(XMM4, MDisp(srcReg, dec_->weightoff + 16));
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
MOVUPS(MDisp(dstReg, dec_->decFmt.w0off + 16), XMM4);
break;
default:
for (j = 0; j < dec_->nweights; j++) {
MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff + j * 4));
MOV(32, MDisp(dstReg, dec_->decFmt.w0off + j * 4), R(tempReg1));
}
while (j & 3) { // Zero additional weights rounding up to 4.
MOV(32, MDisp(dstReg, dec_->decFmt.w0off + j * 4), Imm32(0));
j++;
}
break;
}
}
void VertexDecoderJitCache::Jit_WeightsU8Skin() {
MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));

View file

@ -30,6 +30,7 @@
#include "GPU/Common/ShaderUniforms.h"
#include "GPU/Common/VertexDecoderCommon.h"
#include "GPU/Common/VertexShaderGenerator.h"
#include "GPU/Vulkan/DrawEngineVulkan.h"
#undef WRITE
@ -190,7 +191,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
bool flipNormal = id.Bit(VS_BIT_NORM_REVERSE);
int ls0 = id.Bits(VS_BIT_LS0, 2);
int ls1 = id.Bits(VS_BIT_LS1, 2);
bool enableBones = id.Bit(VS_BIT_ENABLE_BONES) && useHWTransform;
bool enableLighting = id.Bit(VS_BIT_LIGHTING_ENABLE);
int matUpdate = id.Bits(VS_BIT_MATERIAL_UPDATE, 3);
@ -200,9 +200,8 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
return false;
}
// Apparently we don't support bezier/spline together with bones.
bool doBezier = id.Bit(VS_BIT_BEZIER) && !enableBones && useHWTransform;
bool doSpline = id.Bit(VS_BIT_SPLINE) && !enableBones && useHWTransform;
bool doBezier = id.Bit(VS_BIT_BEZIER) && useHWTransform;
bool doSpline = id.Bit(VS_BIT_SPLINE) && useHWTransform;
if (doBezier || doSpline) {
if (!hasNormal) {
// Bad usage.
@ -235,11 +234,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
}
}
int numBoneWeights = 0;
int boneWeightScale = id.Bits(VS_BIT_WEIGHT_FMTSCALE, 2);
if (enableBones) {
numBoneWeights = 1 + id.Bits(VS_BIT_BONES, 3);
}
bool texCoordInVec3 = false;
bool vertexRangeCulling = id.Bit(VS_BIT_VERTEX_RANGE_CULLING) && !isModeThrough;
@ -254,15 +248,9 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, "layout (std140, set = 0, binding = 0) uniform frameVars {\n%s};\n", ub_frameStr);
}
WRITE(p, "layout (std140, set = 1, binding = 3) uniform baseVars {\n%s};\n", ub_baseStr);
WRITE(p, "layout (std140, set = 1, binding = %d) uniform baseVars {\n%s};\n", DRAW_BINDING_DYNUBO_BASE, ub_baseStr);
if (enableLighting || doShadeMapping)
WRITE(p, "layout (std140, set = 1, binding = 4) uniform lightVars {\n%s};\n", ub_vs_lightsStr);
if (enableBones)
WRITE(p, "layout (std140, set = 1, binding = 5) uniform boneVars {\n%s};\n", ub_vs_bonesStr);
if (enableBones) {
WRITE(p, "%s", boneWeightDecl[numBoneWeights]);
}
WRITE(p, "layout (std140, set = 1, binding = %d) uniform lightVars {\n%s};\n", DRAW_BINDING_DYNUBO_LIGHT, ub_vs_lightsStr);
if (useHWTransform)
WRITE(p, "layout (location = %d) in vec3 position;\n", (int)PspAttributeLocation::POSITION);
@ -304,7 +292,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
if (compat.shaderLanguage == HLSL_D3D11) {
WRITE(p, "cbuffer base : register(b0) {\n%s};\n", ub_baseStr);
WRITE(p, "cbuffer lights: register(b1) {\n%s};\n", ub_vs_lightsStr);
WRITE(p, "cbuffer bones : register(b2) {\n%s};\n", ub_vs_bonesStr);
} else {
WRITE(p, "#pragma warning( disable : 3571 )\n");
if (isModeThrough) {
@ -325,15 +312,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, "mat3x4 u_view : register(c%i);\n", CONST_VS_VIEW);
if (doTextureTransform)
WRITE(p, "mat3x4 u_texmtx : register(c%i);\n", CONST_VS_TEXMTX);
if (enableBones) {
#ifdef USE_BONE_ARRAY
WRITE(p, "mat3x4 u_bone[%i] : register(c%i);\n", numBones, CONST_VS_BONE0);
#else
for (int i = 0; i < numBoneWeights; i++) {
WRITE(p, "mat3x4 u_bone%i : register(c%i);\n", i, CONST_VS_BONE0 + i * 3);
}
#endif
}
if (doTexture) {
WRITE(p, "vec4 u_uvscaleoffset : register(c%i);\n", CONST_VS_UVSCALEOFFSET);
}
@ -385,9 +363,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
if ((doSpline || doBezier) && compat.shaderLanguage == HLSL_D3D11) {
WRITE(p, " uint instanceId : SV_InstanceID;\n");
}
if (enableBones) {
WRITE(p, " %s", boneWeightAttrDeclHLSL[numBoneWeights]);
}
if (doTexture && hasTexcoord) {
WRITE(p, " vec2 texcoord : TEXCOORD0;\n");
}
@ -450,17 +425,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
}
WRITE(p, "};\n");
} else {
if (enableBones) {
const char * const * boneWeightDecl = boneWeightAttrDecl;
if (!strcmp(compat.attribute, "in")) {
boneWeightDecl = boneWeightInDecl;
}
WRITE(p, "%s", boneWeightDecl[numBoneWeights]);
*attrMask |= 1 << ATTR_W1;
if (numBoneWeights >= 5)
*attrMask |= 1 << ATTR_W2;
}
if (useHWTransform)
WRITE(p, "%s vec3 position;\n", compat.attribute);
else
@ -523,12 +487,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, "uniform mediump mat4 u_texmtx;\n");
*uniformMask |= DIRTY_TEXMATRIX;
}
if (enableBones) {
for (int i = 0; i < numBoneWeights; i++) {
WRITE(p, "uniform mat4 u_bone%i;\n", i);
*uniformMask |= DIRTY_BONEMATRIX0 << i;
}
}
if (doTexture) {
WRITE(p, "uniform vec4 u_uvscaleoffset;\n");
*uniformMask |= DIRTY_UVSCALEOFFSET;
@ -636,7 +594,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " vec4 tex;\n");
WRITE(p, " vec4 col;\n");
WRITE(p, "};\n");
WRITE(p, "layout (std430, set = 1, binding = 6) readonly buffer s_tess_data {\n");
WRITE(p, "layout (std430, set = 1, binding = %d) readonly buffer s_tess_data {\n", DRAW_BINDING_TESS_STORAGE_BUF);
WRITE(p, " TessData tess_data[];\n");
WRITE(p, "};\n");
@ -644,10 +602,10 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
WRITE(p, " vec4 basis;\n");
WRITE(p, " vec4 deriv;\n");
WRITE(p, "};\n");
WRITE(p, "layout (std430, set = 1, binding = 7) readonly buffer s_tess_weights_u {\n");
WRITE(p, "layout (std430, set = 1, binding = %d) readonly buffer s_tess_weights_u {\n", DRAW_BINDING_TESS_STORAGE_BUF_WU);
WRITE(p, " TessWeight tess_weights_u[];\n");
WRITE(p, "};\n");
WRITE(p, "layout (std430, set = 1, binding = 8) readonly buffer s_tess_weights_v {\n");
WRITE(p, "layout (std430, set = 1, binding = %d) readonly buffer s_tess_weights_v {\n", DRAW_BINDING_TESS_STORAGE_BUF_WV);
WRITE(p, " TessWeight tess_weights_v[];\n");
WRITE(p, "};\n");
} else if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
@ -823,9 +781,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
if (!useHWTransform) {
WRITE(p, " float fog = In.fog;\n");
}
if (enableBones) {
WRITE(p, "%s", boneWeightAttrInitHLSL[numBoneWeights]);
}
}
if (!useHWTransform) {
@ -873,7 +828,7 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
}
} else {
// Step 1: World Transform / Skinning
if (!enableBones) {
if (true) {
if (doBezier || doSpline) {
// Hardware tessellation
WRITE(p, " Tess tess;\n");
@ -897,44 +852,6 @@ bool GenerateVertexShader(const VShaderID &id, char *buffer, const ShaderLanguag
else
WRITE(p, " mediump vec3 worldnormal = normalizeOr001(mul(vec4(0.0, 0.0, %s1.0, 0.0), u_world).xyz);\n", flipNormal ? "-" : "");
}
} else {
static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
const char *factor = rescale[boneWeightScale];
static const char * const boneWeightAttr[8] = {
"w1.x", "w1.y", "w1.z", "w1.w",
"w2.x", "w2.y", "w2.z", "w2.w",
};
const char *boneMatrix = compat.forceMatrix4x4 ? "mat4" : "mat3x4";
// Uncomment this to screw up bone shaders to check the vertex shader software fallback
// WRITE(p, "THIS SHOULD ERROR! #error");
if (numBoneWeights == 1 && ShaderLanguageIsOpenGL(compat.shaderLanguage))
WRITE(p, " %s skinMatrix = mul(w1, u_bone0)", boneMatrix);
else
WRITE(p, " %s skinMatrix = mul(w1.x, u_bone0)", boneMatrix);
for (int i = 1; i < numBoneWeights; i++) {
const char *weightAttr = boneWeightAttr[i];
// workaround for "cant do .x of scalar" issue.
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
if (numBoneWeights == 1 && i == 0) weightAttr = "w1";
if (numBoneWeights == 5 && i == 4) weightAttr = "w2";
}
WRITE(p, " + mul(%s, u_bone%i)", weightAttr, i);
}
WRITE(p, ";\n");
WRITE(p, " vec3 skinnedpos = mul(vec4(position, 1.0), skinMatrix).xyz%s;\n", factor);
WRITE(p, " vec3 worldpos = mul(vec4(skinnedpos, 1.0), u_world).xyz;\n");
if (hasNormal) {
WRITE(p, " mediump vec3 skinnednormal = mul(vec4(%snormal, 0.0), skinMatrix).xyz%s;\n", flipNormal ? "-" : "", factor);
} else {
WRITE(p, " mediump vec3 skinnednormal = mul(vec4(0.0, 0.0, %s1.0, 0.0), skinMatrix).xyz%s;\n", flipNormal ? "-" : "", factor);
}
WRITE(p, " mediump vec3 worldnormal = normalizeOr001(mul(vec4(skinnednormal, 0.0), u_world).xyz);\n");
}
std::string matrixPostfix;

View file

@ -55,16 +55,7 @@ enum {
CONST_VS_LIGHTSPECULAR = 44,
CONST_VS_LIGHTAMBIENT = 48,
CONST_VS_DEPTHRANGE = 52,
CONST_VS_BONE0 = 53,
CONST_VS_BONE1 = 56,
CONST_VS_BONE2 = 59,
CONST_VS_BONE3 = 62,
CONST_VS_BONE4 = 65,
CONST_VS_BONE5 = 68,
CONST_VS_BONE6 = 71,
CONST_VS_BONE7 = 74,
CONST_VS_BONE8 = 77,
CONST_VS_CULLRANGEMIN = 80,
CONST_VS_CULLRANGEMAX = 81,
CONST_VS_ROTATION = 82,
CONST_VS_CULLRANGEMIN = 53,
CONST_VS_CULLRANGEMAX = 54,
CONST_VS_ROTATION = 55,
};

View file

@ -364,8 +364,8 @@ void DrawEngineD3D11::DoFlush() {
// Cannot cache vertex data with morph enabled.
bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
// Also avoid caching when software skinning.
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK))
// Also avoid caching when skinning.
if (lastVType_ & GE_VTYPE_WEIGHT_MASK)
useCache = false;
if (useCache) {
@ -538,7 +538,7 @@ rotateVBO:
D3D11VertexShader *vshader;
D3D11FragmentShader *fshader;
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, useHWTransform, useHWTessellation_, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode);
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, useHWTransform, useHWTessellation_, decOptions_.expandAllWeightsToFloat, true);
ID3D11InputLayout *inputLayout = SetupDecFmtForDraw(vshader, dec_->GetDecVtxFmt(), dec_->VertexType());
context_->PSSetShader(fshader->GetShader(), nullptr, 0);
context_->VSSetShader(vshader->GetShader(), nullptr, 0);
@ -581,7 +581,6 @@ rotateVBO:
}
} else {
PROFILE_THIS_SCOPE("soft");
decOptions_.applySkinInDecode = true;
DecodeVerts(decoded);
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
@ -649,7 +648,7 @@ rotateVBO:
if (result.action == SW_DRAW_PRIMITIVES) {
D3D11VertexShader *vshader;
D3D11FragmentShader *fshader;
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode);
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, true);
context_->PSSetShader(fshader->GetShader(), nullptr, 0);
context_->VSSetShader(vshader->GetShader(), nullptr, 0);
shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
@ -710,7 +709,6 @@ rotateVBO:
framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor);
}
}
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
}
gpuStats.numDrawCalls += numDrawCalls;

View file

@ -95,24 +95,19 @@ ShaderManagerD3D11::ShaderManagerD3D11(Draw::DrawContext *draw, ID3D11Device *de
codeBuffer_ = new char[CODE_BUFFER_SIZE];
memset(&ub_base, 0, sizeof(ub_base));
memset(&ub_lights, 0, sizeof(ub_lights));
memset(&ub_bones, 0, sizeof(ub_bones));
static_assert(sizeof(ub_base) <= 512, "ub_base grew too big");
static_assert(sizeof(ub_lights) <= 512, "ub_lights grew too big");
static_assert(sizeof(ub_bones) <= 384, "ub_bones grew too big");
D3D11_BUFFER_DESC desc{sizeof(ub_base), D3D11_USAGE_DYNAMIC, D3D11_BIND_CONSTANT_BUFFER, D3D11_CPU_ACCESS_WRITE };
ASSERT_SUCCESS(device_->CreateBuffer(&desc, nullptr, &push_base));
desc.ByteWidth = sizeof(ub_lights);
ASSERT_SUCCESS(device_->CreateBuffer(&desc, nullptr, &push_lights));
desc.ByteWidth = sizeof(ub_bones);
ASSERT_SUCCESS(device_->CreateBuffer(&desc, nullptr, &push_bones));
}
ShaderManagerD3D11::~ShaderManagerD3D11() {
push_base->Release();
push_lights->Release();
push_bones->Release();
ClearShaders();
delete[] codeBuffer_;
}
@ -161,19 +156,13 @@ uint64_t ShaderManagerD3D11::UpdateUniforms(bool useBufferedRendering) {
memcpy(map.pData, &ub_lights, sizeof(ub_lights));
context_->Unmap(push_lights, 0);
}
if (dirty & DIRTY_BONE_UNIFORMS) {
BoneUpdateUniforms(&ub_bones, dirty);
context_->Map(push_bones, 0, D3D11_MAP_WRITE_DISCARD, 0, &map);
memcpy(map.pData, &ub_bones, sizeof(ub_bones));
context_->Unmap(push_bones, 0);
}
}
gstate_c.CleanUniforms();
return dirty;
}
void ShaderManagerD3D11::BindUniforms() {
ID3D11Buffer *vs_cbs[3] = { push_base, push_lights, push_bones };
ID3D11Buffer *vs_cbs[3] = { push_base, push_lights };
ID3D11Buffer *ps_cbs[1] = { push_base };
context_->VSSetConstantBuffers(0, 3, vs_cbs);
context_->PSSetConstantBuffers(0, 1, ps_cbs);

View file

@ -85,8 +85,7 @@ public:
ShaderManagerD3D11(Draw::DrawContext *draw, ID3D11Device *device, ID3D11DeviceContext *context, D3D_FEATURE_LEVEL featureLevel);
~ShaderManagerD3D11();
void GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat, bool useSkinInDecode);
void ClearShaders();
void GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat, bool useSkinInDecode); void ClearShaders();
void DirtyLastShader() override;
int GetNumVertexShaders() const { return (int)vsCache_.size(); }
@ -122,12 +121,10 @@ private:
// Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time.
UB_VS_FS_Base ub_base;
UB_VS_Lights ub_lights;
UB_VS_Bones ub_bones;
// Not actual pushbuffers, requires D3D11.1, let's try to live without that first.
ID3D11Buffer *push_base;
ID3D11Buffer *push_lights;
ID3D11Buffer *push_bones;
D3D11FragmentShader *lastFShader_ = nullptr;
D3D11VertexShader *lastVShader_ = nullptr;

View file

@ -348,8 +348,8 @@ void DrawEngineDX9::DoFlush() {
// Cannot cache vertex data with morph enabled.
bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK);
// Also avoid caching when software skinning.
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK))
// Also avoid caching when skinning.
if (lastVType_ & GE_VTYPE_WEIGHT_MASK)
useCache = false;
if (useCache) {
@ -522,7 +522,7 @@ rotateVBO:
ApplyDrawState(prim);
ApplyDrawStateLate();
VSShader *vshader = shaderManager_->ApplyShader(true, useHWTessellation_, lastVType_, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode, pipelineState_);
VSShader *vshader = shaderManager_->ApplyShader(true, useHWTessellation_, lastVType_, decOptions_.expandAllWeightsToFloat, true, pipelineState_);
IDirect3DVertexDeclaration9 *pHardwareVertexDecl = SetupDecFmtForDraw(vshader, dec_->GetDecVtxFmt(), dec_->VertexType());
if (pHardwareVertexDecl) {
@ -546,7 +546,6 @@ rotateVBO:
}
}
} else {
decOptions_.applySkinInDecode = true;
DecodeVerts(decoded);
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
@ -614,7 +613,7 @@ rotateVBO:
ApplyDrawStateLate();
VSShader *vshader = shaderManager_->ApplyShader(false, false, lastVType_, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode, pipelineState_);
VSShader *vshader = shaderManager_->ApplyShader(false, false, lastVType_, decOptions_.expandAllWeightsToFloat, true, pipelineState_);
if (result.action == SW_DRAW_PRIMITIVES) {
if (result.setStencil) {
@ -654,7 +653,6 @@ rotateVBO:
framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, clearColor);
}
}
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
}
gpuStats.numDrawCalls += numDrawCalls;

View file

@ -324,7 +324,7 @@ void ShaderManagerDX9::PSUpdateUniforms(u64 dirtyUniforms) {
}
const uint64_t vsUniforms = DIRTY_PROJMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX |
DIRTY_FOGCOEF | DIRTY_BONE_UNIFORMS | DIRTY_UVSCALEOFFSET | DIRTY_DEPTHRANGE | DIRTY_CULLRANGE |
DIRTY_FOGCOEF | DIRTY_UVSCALEOFFSET | DIRTY_DEPTHRANGE | DIRTY_CULLRANGE |
DIRTY_AMBIENT | DIRTY_MATAMBIENTALPHA | DIRTY_MATSPECULAR | DIRTY_MATDIFFUSE | DIRTY_MATEMISSIVE | DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3;
void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
@ -387,38 +387,6 @@ void ShaderManagerDX9::VSUpdateUniforms(u64 dirtyUniforms) {
}
VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2);
}
// TODO: Could even set all bones in one go if they're all dirty.
#ifdef USE_BONE_ARRAY
if (u_bone != 0) {
float allBones[8 * 16];
bool allDirty = true;
for (int i = 0; i < numBones; i++) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To4x4(allBones + 16 * i, gstate.boneMatrix + 12 * i);
} else {
allDirty = false;
}
}
if (allDirty) {
// Set them all with one call
//glUniformMatrix4fv(u_bone, numBones, GL_FALSE, allBones);
} else {
// Set them one by one. Could try to coalesce two in a row etc but too lazy.
for (int i = 0; i < numBones; i++) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
//glUniformMatrix4fv(u_bone + i, 1, GL_FALSE, allBones + 16 * i);
}
}
}
}
#else
for (int i = 0; i < 8; i++) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
VSSetMatrix4x3_3(CONST_VS_BONE0 + 3 * i, gstate.boneMatrix + 12 * i);
}
}
#endif
// Texturing
if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {

View file

@ -267,7 +267,7 @@ void DrawEngineGLES::DoFlush() {
GEPrimitiveType prim = prevPrim_;
VShaderID vsid;
Shader *vshader = shaderManager_->ApplyVertexShader(CanUseHardwareTransform(prim), useHWTessellation_, lastVType_, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode, &vsid);
Shader *vshader = shaderManager_->ApplyVertexShader(CanUseHardwareTransform(prim), useHWTessellation_, lastVType_, decOptions_.expandAllWeightsToFloat, true, &vsid);
GLRBuffer *vertexBuffer = nullptr;
GLRBuffer *indexBuffer = nullptr;
@ -278,8 +278,8 @@ void DrawEngineGLES::DoFlush() {
int vertexCount = 0;
bool useElements = true;
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
// If software skinning, we've already predecoded into "decoded". So push that content.
if (lastVType_ & GE_VTYPE_WEIGHT_MASK) {
// Since we're software skinning, we've already predecoded into "decoded". So push that content.
size_t size = decodedVerts_ * dec_->GetDecVtxFmt().stride;
u8 *dest = (u8 *)frameData.pushVertex->Push(size, &vertexBufferOffset, &vertexBuffer);
memcpy(dest, decoded, size);
@ -331,7 +331,6 @@ void DrawEngineGLES::DoFlush() {
}
} else {
PROFILE_THIS_SCOPE("soft");
decOptions_.applySkinInDecode = true;
DecodeVerts(decoded);
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
@ -448,7 +447,6 @@ void DrawEngineGLES::DoFlush() {
}
gstate_c.Dirty(DIRTY_BLEND_STATE); // Make sure the color mask gets re-applied.
}
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
}
gpuStats.numDrawCalls += numDrawCalls;

View file

@ -121,10 +121,6 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
queries.push_back({ &u_world, "u_world" });
queries.push_back({ &u_texmtx, "u_texmtx" });
if (VSID.Bit(VS_BIT_ENABLE_BONES))
numBones = TranslateNumBones(VSID.Bits(VS_BIT_BONES, 3) + 1);
else
numBones = 0;
queries.push_back({ &u_depthRange, "u_depthRange" });
queries.push_back({ &u_cullRangeMin, "u_cullRangeMin" });
queries.push_back({ &u_cullRangeMax, "u_cullRangeMax" });
@ -134,15 +130,6 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
queries.push_back({ &u_scaleX, "u_scaleX" });
queries.push_back({ &u_scaleY, "u_scaleY" });
#ifdef USE_BONE_ARRAY
queries.push_back({ &u_bone, "u_bone" });
#else
static const char * const boneNames[8] = { "u_bone0", "u_bone1", "u_bone2", "u_bone3", "u_bone4", "u_bone5", "u_bone6", "u_bone7", };
for (int i = 0; i < 8; i++) {
queries.push_back({ &u_bone[i], boneNames[i] });
}
#endif
// Lighting, texturing
queries.push_back({ &u_ambient, "u_ambient" });
queries.push_back({ &u_matambientalpha, "u_matambientalpha" });
@ -584,13 +571,6 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu
float f = (float)gstate.getStencilTestRef() * (1.0f / 255.0f);
render_->SetUniformF(&u_stencilReplaceValue, 1, &f);
}
float bonetemp[16];
for (int i = 0; i < numBones; i++) {
if (dirty & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To4x4Transposed(bonetemp, gstate.boneMatrix + 12 * i);
render_->SetUniformM4x4(&u_bone[i], bonetemp);
}
}
if (dirty & DIRTY_SHADERBLEND) {
if (u_blendFixA != -1) {

View file

@ -67,13 +67,6 @@ public:
int u_scaleX;
int u_scaleY;
#ifdef USE_BONE_ARRAY
int u_bone; // array, size is numBones
#else
int u_bone[8];
#endif
int numBones;
// Shader blending.
int u_fbotex;
int u_blendFixA;

View file

@ -66,8 +66,8 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_BEZIER, FLAG_EXECUTE, 0, &GPUCommon::Execute_Bezier },
{ GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommon::Execute_Spline },
// Changing the vertex type requires us to flush.
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
// Changing the vertex type requires us to flush, unless we just change the weight count - need to handle in a func.
{ GE_CMD_VERTEXTYPE, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_VertexType },
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommon::Execute_LoadClut },
@ -436,14 +436,6 @@ GPUCommon::~GPUCommon() {
}
void GPUCommon::UpdateCmdInfo() {
if (g_Config.bSoftwareSkinning) {
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexTypeSkinning;
} else {
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommon::Execute_VertexType;
}
if (g_Config.bFastMemory) {
cmdInfo_[GE_CMD_JUMP].func = &GPUCommon::Execute_JumpFast;
cmdInfo_[GE_CMD_CALL].func = &GPUCommon::Execute_CallFast;
@ -782,7 +774,7 @@ void GPUCommon::ResetMatrices() {
matrixVisible.tgen[i] = toFloat24(gstate.tgenMatrix[i]);
// Assume all the matrices changed, so dirty things related to them.
gstate_c.Dirty(DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_PROJMATRIX | DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE | DIRTY_BONE_UNIFORMS);
gstate_c.Dirty(DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_PROJMATRIX | DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE);
}
u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head) {
@ -1677,22 +1669,12 @@ void GPUCommon::Execute_TexSize0(u32 op, u32 diff) {
}
}
void GPUCommon::Execute_VertexType(u32 op, u32 diff) {
if (diff)
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
if (diff & (GE_VTYPE_TC_MASK | GE_VTYPE_THROUGH_MASK)) {
gstate_c.Dirty(DIRTY_UVSCALEOFFSET);
if (diff & GE_VTYPE_THROUGH_MASK)
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
}
}
void GPUCommon::Execute_LoadClut(u32 op, u32 diff) {
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes());
}
void GPUCommon::Execute_VertexTypeSkinning(u32 op, u32 diff) {
void GPUCommon::Execute_VertexType(u32 op, u32 diff) {
// Don't flush when weight count changes.
if (diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) {
// Restore and flush
@ -1834,7 +1816,7 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
// cull mode
int cullMode = gstate.getCullMode();
uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), true);
drawEngineCommon_->SubmitPrim(verts, inds, prim, count, vertTypeID, cullMode, &bytesRead);
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
// Some games rely on this, they don't bother reloading VADDR and IADDR.
@ -1853,8 +1835,6 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
// between each prim, we just change the triangle winding right here to still be able to join draw calls.
uint32_t vtypeCheckMask = ~GE_VTYPE_WEIGHTCOUNT_MASK;
if (!g_Config.bSoftwareSkinning)
vtypeCheckMask = 0xFFFFFFFF;
if (debugRecording_)
goto bail;
@ -1892,7 +1872,7 @@ void GPUCommon::Execute_Prim(u32 op, u32 diff) {
goto bail;
} else {
vertexType = data;
vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), true);
}
break;
}
@ -2033,10 +2013,6 @@ void GPUCommon::Execute_Bezier(u32 op, u32 diff) {
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
}
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
if (flushOnParams_)
drawEngineCommon_->DispatchFlush();
@ -2106,10 +2082,6 @@ void GPUCommon::Execute_Spline(u32 op, u32 diff) {
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
}
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
if (flushOnParams_)
drawEngineCommon_->DispatchFlush();
@ -2451,35 +2423,16 @@ void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
}
if (fastLoad) {
// If we can't use software skinning, we have to flush and dirty.
if (!g_Config.bSoftwareSkinning) {
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
const u32 newVal = src[i] << 8;
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
}
if (++i >= end) {
break;
}
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
dst[i] = src[i] << 8;
if (++i >= end) {
break;
}
}
const unsigned int numPlusCount = (op & 0x7F) + i;
for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) {
gstate_c.Dirty(DIRTY_BONEMATRIX0 << (num / 12));
}
} else {
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
dst[i] = src[i] << 8;
if (++i >= end) {
break;
}
}
const unsigned int numPlusCount = (op & 0x7F) + i;
for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) {
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
}
const unsigned int numPlusCount = (op & 0x7F) + i;
for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) {
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
}
}
@ -2496,13 +2449,8 @@ void GPUCommon::Execute_BoneMtxData(u32 op, u32 diff) {
int num = gstate.boneMatrixNumber & 0x00FFFFFF;
u32 newVal = op << 8;
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
// Bone matrices should NOT flush when software skinning is enabled!
if (!g_Config.bSoftwareSkinning) {
Flush();
gstate_c.Dirty(DIRTY_BONEMATRIX0 << (num / 12));
} else {
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
}
// Bone matrices should NOT flush since we're software skinning!
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
((u32 *)gstate.boneMatrix)[num] = newVal;
}
num++;
@ -2528,7 +2476,7 @@ void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
int prim = (op >> 8) & 0x7;
if (prim != GE_PRIM_KEEP_PREVIOUS) {
// Flush before changing the prim type. Only continue can be used to continue a prim.
// Flush before changing the prim type. Only continue can be used to continue a prim.
FlushImm();
}
@ -2692,13 +2640,7 @@ void GPUCommon::FastLoadBoneMatrix(u32 target) {
uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7);
}
if (!g_Config.bSoftwareSkinning) {
if (flushOnParams_)
Flush();
gstate_c.Dirty(uniformsToDirty);
} else {
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
}
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
gstate.FastLoadBoneMatrix(target);
}

View file

@ -146,7 +146,6 @@ public:
void Execute_End(u32 op, u32 diff);
void Execute_VertexType(u32 op, u32 diff);
void Execute_VertexTypeSkinning(u32 op, u32 diff);
void Execute_Prim(u32 op, u32 diff);
void Execute_Bezier(u32 op, u32 diff);

View file

@ -260,10 +260,6 @@ void GPUgstate::Restore(u32_le *ptr) {
gpu->ResetMatrices();
}
bool vertTypeIsSkinningEnabled(u32 vertType) {
return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE);
}
struct GPUStateCache_v0 {
u32 vertexAddr;
u32 indexAddr;

View file

@ -448,8 +448,6 @@ struct GPUgstate {
void Restore(u32_le *ptr);
};
bool vertTypeIsSkinningEnabled(u32 vertType);
inline int vertTypeGetNumBoneWeights(u32 vertType) { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); }
inline int vertTypeGetWeightMask(u32 vertType) { return vertType & GE_VTYPE_WEIGHT_MASK; }

View file

@ -889,7 +889,7 @@ void SoftGPU::Execute_Bezier(u32 op, u32 diff) {
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
if ((gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) || vertTypeIsSkinningEnabled(gstate.vertType)) {
if (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) {
DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
}
@ -941,7 +941,7 @@ void SoftGPU::Execute_Spline(u32 op, u32 diff) {
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
}
if ((gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) || vertTypeIsSkinningEnabled(gstate.vertType)) {
if (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) {
DEBUG_LOG_REPORT(G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
}

View file

@ -64,7 +64,6 @@ SoftwareDrawEngine::~SoftwareDrawEngine() {
void SoftwareDrawEngine::NotifyConfigChanged() {
DrawEngineCommon::NotifyConfigChanged();
decOptions_.applySkinInDecode = true;
}
void SoftwareDrawEngine::DispatchFlush() {
@ -940,7 +939,6 @@ bool TransformUnit::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVert
VertexDecoder vdecoder;
VertexDecoderOptions options{};
options.applySkinInDecode = true;
vdecoder.SetVertexType(gstate.vertType, options);
if (!Memory::IsValidRange(gstate_c.vertexAddr, (indexUpperBound + 1) * vdecoder.VertexSize()))

View file

@ -62,19 +62,6 @@ enum {
enum { VAI_KILL_AGE = 120, VAI_UNRELIABLE_KILL_AGE = 240, VAI_UNRELIABLE_KILL_MAX = 4 };
enum {
DRAW_BINDING_TEXTURE = 0,
DRAW_BINDING_2ND_TEXTURE = 1,
DRAW_BINDING_DEPAL_TEXTURE = 2,
DRAW_BINDING_DYNUBO_BASE = 3,
DRAW_BINDING_DYNUBO_LIGHT = 4,
DRAW_BINDING_DYNUBO_BONE = 5,
DRAW_BINDING_TESS_STORAGE_BUF = 6,
DRAW_BINDING_TESS_STORAGE_BUF_WU = 7,
DRAW_BINDING_TESS_STORAGE_BUF_WV = 8,
DRAW_BINDING_INPUT_ATTACHMENT = 9,
};
enum {
TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex)
};
@ -97,7 +84,7 @@ void DrawEngineVulkan::InitDeviceObjects() {
// TODO: Make things more flexible, so we at least have specialized layouts for input attachments and tess.
// Note that it becomes a support matrix..
VkDescriptorSetLayoutBinding bindings[10]{};
VkDescriptorSetLayoutBinding bindings[DRAW_BINDING_COUNT]{};
bindings[0].descriptorCount = 1;
bindings[0].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
bindings[0].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
@ -120,27 +107,23 @@ void DrawEngineVulkan::InitDeviceObjects() {
bindings[4].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[4].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[4].binding = DRAW_BINDING_DYNUBO_LIGHT;
bindings[5].descriptorCount = 1;
bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
bindings[5].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[5].binding = DRAW_BINDING_DYNUBO_BONE;
// Used only for hardware tessellation.
bindings[5].descriptorCount = 1;
bindings[5].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[5].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[5].binding = DRAW_BINDING_TESS_STORAGE_BUF;
bindings[6].descriptorCount = 1;
bindings[6].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[6].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[6].binding = DRAW_BINDING_TESS_STORAGE_BUF;
bindings[6].binding = DRAW_BINDING_TESS_STORAGE_BUF_WU;
bindings[7].descriptorCount = 1;
bindings[7].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[7].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[7].binding = DRAW_BINDING_TESS_STORAGE_BUF_WU;
bindings[7].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
bindings[8].descriptorCount = 1;
bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
bindings[8].stageFlags = VK_SHADER_STAGE_VERTEX_BIT;
bindings[8].binding = DRAW_BINDING_TESS_STORAGE_BUF_WV;
bindings[9].descriptorCount = 1;
bindings[9].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
bindings[9].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[9].binding = DRAW_BINDING_INPUT_ATTACHMENT;
bindings[8].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
bindings[8].stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT;
bindings[8].binding = DRAW_BINDING_INPUT_ATTACHMENT;
VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
VkDevice device = vulkan->GetDevice();
@ -155,7 +138,7 @@ void DrawEngineVulkan::InitDeviceObjects() {
static constexpr int DEFAULT_DESC_POOL_SIZE = 512;
std::vector<VkDescriptorPoolSize> dpTypes;
dpTypes.resize(5);
dpTypes[0].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3;
dpTypes[0].descriptorCount = DEFAULT_DESC_POOL_SIZE * 2;
dpTypes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
dpTypes[1].descriptorCount = DEFAULT_DESC_POOL_SIZE * 3; // Don't use these for tess anymore, need max three per set.
dpTypes[1].type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
@ -384,10 +367,9 @@ void DrawEngineVulkan::DecodeVertsToPushBuffer(VulkanPushBuffer *push, uint32_t
DecodeVerts(dest);
}
VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone, bool tess) {
VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, bool tess) {
_dbg_assert_(base != VK_NULL_HANDLE);
_dbg_assert_(light != VK_NULL_HANDLE);
_dbg_assert_(bone != VK_NULL_HANDLE);
DescriptorSetKey key{};
key.imageView_ = imageView;
@ -396,7 +378,6 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
key.depalImageView_ = boundDepal_;
key.base_ = base;
key.light_ = light;
key.bone_ = bone;
key.secondaryIsInputAttachment = boundSecondaryIsInputAttachment_;
FrameData &frame = GetCurFrame();
@ -506,10 +487,6 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
buf[count].offset = 0;
buf[count].range = sizeof(UB_VS_Lights);
count++;
buf[count].buffer = bone;
buf[count].offset = 0;
buf[count].range = sizeof(UB_VS_Bones);
count++;
for (int i = 0; i < count; i++) {
writes[n].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
writes[n].pNext = nullptr;
@ -533,11 +510,9 @@ VkDescriptorSet DrawEngineVulkan::GetOrCreateDescriptorSet(VkImageView imageView
void DrawEngineVulkan::DirtyAllUBOs() {
baseUBOOffset = 0;
lightUBOOffset = 0;
boneUBOOffset = 0;
baseBuf = VK_NULL_HANDLE;
lightBuf = VK_NULL_HANDLE;
boneBuf = VK_NULL_HANDLE;
dirtyUniforms_ = DIRTY_BASE_UNIFORMS | DIRTY_LIGHT_UNIFORMS | DIRTY_BONE_UNIFORMS;
dirtyUniforms_ = DIRTY_BASE_UNIFORMS | DIRTY_LIGHT_UNIFORMS;
imageView = VK_NULL_HANDLE;
sampler = VK_NULL_HANDLE;
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
@ -599,7 +574,7 @@ void DrawEngineVulkan::DoFlush() {
// Also avoid caching when software skinning.
VkBuffer vbuf = VK_NULL_HANDLE;
VkBuffer ibuf = VK_NULL_HANDLE;
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
if (lastVType_ & GE_VTYPE_WEIGHT_MASK) {
useCache = false;
}
@ -739,7 +714,7 @@ void DrawEngineVulkan::DoFlush() {
break;
}
} else {
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
if (lastVType_ & GE_VTYPE_WEIGHT_MASK) {
// If software skinning, we've already predecoded into "decoded". So push that content.
VkDeviceSize size = decodedVerts_ * dec_->GetDecVtxFmt().stride;
u8 *dest = (u8 *)frameData.pushVertex->Push(size, &vbOffset, &vbuf);
@ -784,7 +759,7 @@ void DrawEngineVulkan::DoFlush() {
VulkanFragmentShader *fshader = nullptr;
VulkanGeometryShader *gshader = nullptr;
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, &gshader, pipelineState_, true, useHWTessellation_, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode);
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, &gshader, pipelineState_, true, useHWTessellation_, decOptions_.expandAllWeightsToFloat, true);
if (!vshader) {
// We're screwed.
return;
@ -826,10 +801,10 @@ void DrawEngineVulkan::DoFlush() {
dirtyUniforms_ |= shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
UpdateUBOs(&frameData);
VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf, tess);
VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, tess);
const uint32_t dynamicUBOOffsets[3] = {
baseUBOOffset, lightUBOOffset, boneUBOOffset,
const uint32_t dynamicUBOOffsets[] = {
baseUBOOffset, lightUBOOffset,
};
if (useElements) {
if (!ibuf) {
@ -841,7 +816,6 @@ void DrawEngineVulkan::DoFlush() {
}
} else {
PROFILE_THIS_SCOPE("soft");
decOptions_.applySkinInDecode = true;
DecodeVerts(decoded);
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
if (gstate.isModeThrough()) {
@ -920,7 +894,7 @@ void DrawEngineVulkan::DoFlush() {
VulkanFragmentShader *fshader = nullptr;
VulkanGeometryShader *gshader = nullptr;
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, &gshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode);
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, &gshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, true);
_dbg_assert_msg_(!vshader->UseHWTransform(), "Bad vshader");
VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(renderManager, pipelineLayout_, pipelineKey_, &dec_->decFmt, vshader, fshader, gshader, false, 0);
if (!pipeline || !pipeline->pipeline) {
@ -928,7 +902,6 @@ void DrawEngineVulkan::DoFlush() {
decodedVerts_ = 0;
numDrawCalls = 0;
decodeCounter_ = 0;
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
return;
}
BindShaderBlendTex(); // This might cause copies so super important to do before BindPipeline.
@ -964,9 +937,9 @@ void DrawEngineVulkan::DoFlush() {
// Even if the first draw is through-mode, make sure we at least have one copy of these uniforms buffered
UpdateUBOs(&frameData);
VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, boneBuf, tess);
const uint32_t dynamicUBOOffsets[3] = {
baseUBOOffset, lightUBOOffset, boneUBOOffset,
VkDescriptorSet ds = GetOrCreateDescriptorSet(imageView, sampler, baseBuf, lightBuf, tess);
const uint32_t dynamicUBOOffsets[] = {
baseUBOOffset, lightUBOOffset,
};
PROFILE_THIS_SCOPE("renderman_q");
@ -996,7 +969,6 @@ void DrawEngineVulkan::DoFlush() {
framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, result.color);
}
}
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
}
gpuStats.numDrawCalls += numDrawCalls;
@ -1056,10 +1028,6 @@ void DrawEngineVulkan::UpdateUBOs(FrameData *frame) {
lightUBOOffset = shaderManager_->PushLightBuffer(frame->pushUBO, &lightBuf);
dirtyUniforms_ &= ~DIRTY_LIGHT_UNIFORMS;
}
if ((dirtyUniforms_ & DIRTY_BONE_UNIFORMS) || boneBuf == VK_NULL_HANDLE) {
boneUBOOffset = shaderManager_->PushBoneBuffer(frame->pushUBO, &boneBuf);
dirtyUniforms_ &= ~DIRTY_BONE_UNIFORMS;
}
}
DrawEngineVulkan::FrameData &DrawEngineVulkan::GetCurFrame() {

View file

@ -121,6 +121,19 @@ private:
VkDescriptorBufferInfo bufInfo_[3]{};
};
enum {
DRAW_BINDING_TEXTURE = 0,
DRAW_BINDING_2ND_TEXTURE = 1,
DRAW_BINDING_DEPAL_TEXTURE = 2,
DRAW_BINDING_DYNUBO_BASE = 3,
DRAW_BINDING_DYNUBO_LIGHT = 4,
DRAW_BINDING_TESS_STORAGE_BUF = 5,
DRAW_BINDING_TESS_STORAGE_BUF_WU = 6,
DRAW_BINDING_TESS_STORAGE_BUF_WV = 7,
DRAW_BINDING_INPUT_ATTACHMENT = 8,
DRAW_BINDING_COUNT = 9,
};
// Handles transform, lighting and drawing.
class DrawEngineVulkan : public DrawEngineCommon {
public:
@ -207,7 +220,7 @@ private:
void UpdateUBOs(FrameData *frame);
FrameData &GetCurFrame();
VkDescriptorSet GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, VkBuffer bone, bool tess);
VkDescriptorSet GetOrCreateDescriptorSet(VkImageView imageView, VkSampler sampler, VkBuffer base, VkBuffer light, bool tess);
Draw::DrawContext *draw_;
@ -238,8 +251,7 @@ private:
VkImageView secondaryImageView_;
VkImageView depalImageView_;
VkSampler sampler_;
VkBuffer base_, light_, bone_; // All three UBO slots will be set to this. This will usually be identical
// for all draws in a frame, except when the buffer has to grow.
VkBuffer base_, light_;
bool secondaryIsInputAttachment;
};
@ -276,8 +288,7 @@ private:
uint64_t dirtyUniforms_;
uint32_t baseUBOOffset;
uint32_t lightUBOOffset;
uint32_t boneUBOOffset;
VkBuffer baseBuf, lightBuf, boneBuf;
VkBuffer baseBuf, lightBuf;
VkImageView imageView = VK_NULL_HANDLE;
VkSampler sampler = VK_NULL_HANDLE;

View file

@ -204,11 +204,9 @@ ShaderManagerVulkan::ShaderManagerVulkan(Draw::DrawContext *draw)
uboAlignment_ = vulkan->GetPhysicalDeviceProperties().properties.limits.minUniformBufferOffsetAlignment;
memset(&ub_base, 0, sizeof(ub_base));
memset(&ub_lights, 0, sizeof(ub_lights));
memset(&ub_bones, 0, sizeof(ub_bones));
static_assert(sizeof(ub_base) <= 512, "ub_base grew too big");
static_assert(sizeof(ub_lights) <= 512, "ub_lights grew too big");
static_assert(sizeof(ub_bones) <= 384, "ub_bones grew too big");
}
ShaderManagerVulkan::~ShaderManagerVulkan() {
@ -273,8 +271,6 @@ uint64_t ShaderManagerVulkan::UpdateUniforms(bool useBufferedRendering) {
BaseUpdateUniforms(&ub_base, dirty, false, useBufferedRendering);
if (dirty & DIRTY_LIGHT_UNIFORMS)
LightUpdateUniforms(&ub_lights, dirty);
if (dirty & DIRTY_BONE_UNIFORMS)
BoneUpdateUniforms(&ub_bones, dirty);
}
gstate_c.CleanUniforms();
return dirty;

View file

@ -149,10 +149,6 @@ public:
uint32_t PushLightBuffer(VulkanPushBuffer *dest, VkBuffer *buf) {
return dest->PushAligned(&ub_lights, sizeof(ub_lights), uboAlignment_, buf);
}
// TODO: Only push half the bone buffer if we only have four bones.
uint32_t PushBoneBuffer(VulkanPushBuffer *dest, VkBuffer *buf) {
return dest->PushAligned(&ub_bones, sizeof(ub_bones), uboAlignment_, buf);
}
bool LoadCache(FILE *f);
void SaveCache(FILE *f);
@ -177,7 +173,6 @@ private:
// Uniform block scratchpad. These (the relevant ones) are copied to the current pushbuffer at draw time.
UB_VS_FS_Base ub_base;
UB_VS_Lights ub_lights;
UB_VS_Bones ub_bones;
VulkanFragmentShader *lastFShader_ = nullptr;
VulkanVertexShader *lastVShader_ = nullptr;

View file

@ -540,12 +540,6 @@ void GameSettingsScreen::CreateViews() {
hwTransform->SetDisabledPtr(&g_Config.bSoftwareRendering);
}
CheckBox *swSkin = graphicsSettings->Add(new CheckBox(&g_Config.bSoftwareSkinning, gr->T("Software Skinning")));
swSkin->OnClick.Add([=](EventParams &e) {
settingInfo_->Show(gr->T("SoftwareSkinning Tip", "Combine skinned model draws on the CPU, faster in most games"), e.v);
return UI::EVENT_CONTINUE;
});
swSkin->SetDisabledPtr(&g_Config.bSoftwareRendering);
CheckBox *tessellationHW = graphicsSettings->Add(new CheckBox(&g_Config.bHardwareTessellation, gr->T("Hardware Tessellation")));
tessellationHW->OnClick.Add([=](EventParams &e) {

View file

@ -306,8 +306,6 @@ int CtrlVertexList::GetRowCount() {
rowCount_ = 0;
}
VertexDecoderOptions options{};
// TODO: Maybe an option?
options.applySkinInDecode = true;
decoder->SetVertexType(state.vertType, options);
return rowCount_;
}

View file

@ -436,7 +436,6 @@ int main(int argc, const char* argv[])
g_Config.iInternalResolution = 1;
g_Config.iFastForwardMode = (int)FastForwardMode::CONTINUOUS;
g_Config.bEnableLogging = fullLog;
g_Config.bSoftwareSkinning = true;
g_Config.bVertexDecoderJit = true;
g_Config.bSoftwareRendering = coreParameter.gpuCore == GPUCORE_SOFTWARE;
g_Config.bSoftwareRenderingJit = true;

View file

@ -823,15 +823,6 @@ static void check_variables(CoreParameter &coreParam)
g_Config.bHardwareTransform = true;
}
var.key = "ppsspp_software_skinning";
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{
if (!strcmp(var.value, "disabled"))
g_Config.bSoftwareSkinning = false;
else
g_Config.bSoftwareSkinning = true;
}
var.key = "ppsspp_vertex_cache";
if (environ_cb(RETRO_ENVIRONMENT_GET_VARIABLE, &var) && var.value)
{

View file

@ -407,15 +407,10 @@ bool TestVertexShaders() {
// The generated bits need some adjustment:
// We don't use these bits in the HLSL shader generator.
id.SetBits(VS_BIT_WEIGHT_FMTSCALE, 2, 0);
// If mode is through, we won't do hardware transform.
if (id.Bit(VS_BIT_IS_THROUGH)) {
id.SetBit(VS_BIT_USE_HW_TRANSFORM, 0);
}
if (!id.Bit(VS_BIT_USE_HW_TRANSFORM)) {
id.SetBit(VS_BIT_ENABLE_BONES, 0);
}
if (id.Bit(VS_BIT_VERTEX_RANGE_CULLING)) {
continue;
}

View file

@ -546,7 +546,6 @@ static bool TestVertexColor565() {
static bool TestVertex8Skin() {
VertexDecoderTestHarness dec;
VertexDecoderOptions opts{};
opts.applySkinInDecode = true;
dec.SetOptions(opts);
for (int i = 0; i < 8 * 12; ++i) {
@ -578,7 +577,6 @@ static bool TestVertex8Skin() {
static bool TestVertex16Skin() {
VertexDecoderTestHarness dec;
VertexDecoderOptions opts{};
opts.applySkinInDecode = true;
dec.SetOptions(opts);
for (int i = 0; i < 8 * 12; ++i) {
@ -610,7 +608,6 @@ static bool TestVertex16Skin() {
static bool TestVertexFloatSkin() {
VertexDecoderTestHarness dec;
VertexDecoderOptions opts{};
opts.applySkinInDecode = true;
dec.SetOptions(opts);
for (int i = 0; i < 8 * 12; ++i) {