diff --git a/GPU/Common/VertexDecoderArm.cpp b/GPU/Common/VertexDecoderArm.cpp
index 1f7d65a83..79e6584ae 100644
--- a/GPU/Common/VertexDecoderArm.cpp
+++ b/GPU/Common/VertexDecoderArm.cpp
@@ -111,9 +111,6 @@ static const ARMReg srcNEON = Q2;
 static const ARMReg accNEON = Q3;
 
 static const JitLookup jitLookup[] = {
-	{&VertexDecoder::Step_WeightsU8, &VertexDecoderJitCache::Jit_WeightsU8},
-	{&VertexDecoder::Step_WeightsU16, &VertexDecoderJitCache::Jit_WeightsU16},
-	{&VertexDecoder::Step_WeightsFloat, &VertexDecoderJitCache::Jit_WeightsFloat},
 	{&VertexDecoder::Step_WeightsU8Skin, &VertexDecoderJitCache::Jit_WeightsU8Skin},
 	{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
 	{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
@@ -326,55 +323,6 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
 	return (JittedVertexDecoder)start;
 }
 
-void VertexDecoderJitCache::Jit_WeightsU8() {
-	// Basic implementation - a byte at a time. TODO: Optimize
-	int j;
-	for (j = 0; j < dec_->nweights; j++) {
-		LDRB(tempReg1, srcReg, dec_->weightoff + j);
-		STRB(tempReg1, dstReg, dec_->decFmt.w0off + j);
-	}
-	if (j & 3) {
-		// Create a zero register. Might want to make a fixed one.
-		EOR(scratchReg, scratchReg, scratchReg);
-	}
-	while (j & 3) {
-		STRB(scratchReg, dstReg, dec_->decFmt.w0off + j);
-		j++;
-	}
-}
-
-void VertexDecoderJitCache::Jit_WeightsU16() {
-	// Basic implementation - a short at a time. TODO: Optimize
-	int j;
-	for (j = 0; j < dec_->nweights; j++) {
-		LDRH(tempReg1, srcReg, dec_->weightoff + j * 2);
-		STRH(tempReg1, dstReg, dec_->decFmt.w0off + j * 2);
-	}
-	if (j & 3) {
-		// Create a zero register. Might want to make a fixed one.
-		EOR(scratchReg, scratchReg, scratchReg);
-	}
-	while (j & 3) {
-		STRH(scratchReg, dstReg, dec_->decFmt.w0off + j * 2);
-		j++;
-	}
-}
-
-void VertexDecoderJitCache::Jit_WeightsFloat() {
-	int j;
-	for (j = 0; j < dec_->nweights; j++) {
-		LDR(tempReg1, srcReg, dec_->weightoff + j * 4);
-		STR(tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
-	}
-	if (j & 3) {
-		EOR(tempReg1, tempReg1, tempReg1);
-	}
-	while (j & 3) {  // Zero additional weights rounding up to 4.
-		STR(tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
-		j++;
-	}
-}
-
 static const ARMReg weightRegs[8] = { S8, S9, S10, S11, S12, S13, S14, S15 };
 static const ARMReg neonWeightRegsD[4] = { D4, D5, D6, D7 };
 static const ARMReg neonWeightRegsQ[2] = { Q2, Q3 };
diff --git a/GPU/Common/VertexDecoderArm64.cpp b/GPU/Common/VertexDecoderArm64.cpp
index aab6b4ca3..acfa230ea 100644
--- a/GPU/Common/VertexDecoderArm64.cpp
+++ b/GPU/Common/VertexDecoderArm64.cpp
@@ -85,9 +85,6 @@ static const ARM64Reg neonWeightRegsQ[2] = { Q3, Q2 };  // reverse order to prev
 // Q16+ are free-for-all for matrices. In 16 registers, we can fit 4 4x4 matrices.
 
 static const JitLookup jitLookup[] = {
-	{&VertexDecoder::Step_WeightsU8, &VertexDecoderJitCache::Jit_WeightsU8},
-	{&VertexDecoder::Step_WeightsU16, &VertexDecoderJitCache::Jit_WeightsU16},
-	{&VertexDecoder::Step_WeightsFloat, &VertexDecoderJitCache::Jit_WeightsFloat},
 	{&VertexDecoder::Step_WeightsU8Skin, &VertexDecoderJitCache::Jit_WeightsU8Skin},
 	{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
 	{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
@@ -356,44 +353,6 @@ void VertexDecoderJitCache::Jit_ApplyWeights() {
 	}
 }
 
-void VertexDecoderJitCache::Jit_WeightsU8() {
-	// Basic implementation - a byte at a time. TODO: Optimize
-	int j;
-	for (j = 0; j < dec_->nweights; j++) {
-		LDRB(INDEX_UNSIGNED, tempReg1, srcReg, dec_->weightoff + j);
-		STRB(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.w0off + j);
-	}
-	while (j & 3) {
-		STRB(INDEX_UNSIGNED, WZR, dstReg, dec_->decFmt.w0off + j);
-		j++;
-	}
-}
-
-void VertexDecoderJitCache::Jit_WeightsU16() {
-	// Basic implementation - a short at a time. TODO: Optimize
-	int j;
-	for (j = 0; j < dec_->nweights; j++) {
-		LDRH(INDEX_UNSIGNED, tempReg1, srcReg, dec_->weightoff + j * 2);
-		STRH(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.w0off + j * 2);
-	}
-	while (j & 3) {
-		STRH(INDEX_UNSIGNED, WZR, dstReg, dec_->decFmt.w0off + j * 2);
-		j++;
-	}
-}
-
-void VertexDecoderJitCache::Jit_WeightsFloat() {
-	int j;
-	for (j = 0; j < dec_->nweights; j++) {
-		LDR(INDEX_UNSIGNED, tempReg1, srcReg, dec_->weightoff + j * 4);
-		STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.w0off + j * 4);
-	}
-	while (j & 3) {  // Zero additional weights rounding up to 4.
-		STR(INDEX_UNSIGNED, WZR, dstReg, dec_->decFmt.w0off + j * 4);
-		j++;
-	}
-}
-
 void VertexDecoderJitCache::Jit_WeightsU8Skin() {
 	// Weight is first so srcReg is correct.
 	switch (dec_->nweights) {
diff --git a/GPU/Common/VertexDecoderCommon.cpp b/GPU/Common/VertexDecoderCommon.cpp
index 53c327e19..af3f63d5a 100644
--- a/GPU/Common/VertexDecoderCommon.cpp
+++ b/GPU/Common/VertexDecoderCommon.cpp
@@ -158,67 +158,6 @@ void PrintDecodedVertex(VertexReader &vtx) {
 VertexDecoder::VertexDecoder() : decoded_(nullptr), ptr_(nullptr), jitted_(0), jittedSize_(0) {
 }
 
-void VertexDecoder::Step_WeightsU8() const
-{
-	u8 *wt = (u8 *)(decoded_ + decFmt.w0off);
-	const u8 *wdata = (const u8*)(ptr_);
-	int j;
-	for (j = 0; j < nweights; j++)
-		wt[j] = wdata[j];
-	while (j & 3)   // Zero additional weights rounding up to 4.
-		wt[j++] = 0;
-}
-
-void VertexDecoder::Step_WeightsU16() const
-{
-	u16 *wt = (u16 *)(decoded_ + decFmt.w0off);
-	const u16 *wdata = (const u16*)(ptr_);
-	int j;
-	for (j = 0; j < nweights; j++)
-		wt[j] = wdata[j];
-	while (j & 3)   // Zero additional weights rounding up to 4.
-		wt[j++] = 0;
-}
-
-void VertexDecoder::Step_WeightsU8ToFloat() const
-{
-	float *wt = (float *)(decoded_ + decFmt.w0off);
-	const u8 *wdata = (const u8*)(ptr_);
-	int j;
-	for (j = 0; j < nweights; j++) {
-		wt[j] = (float)wdata[j] * (1.0f / 128.0f);
-	}
-	while (j & 3)   // Zero additional weights rounding up to 4.
-		wt[j++] = 0;
-}
-
-void VertexDecoder::Step_WeightsU16ToFloat() const
-{
-	float *wt = (float *)(decoded_ + decFmt.w0off);
-	const u16 *wdata = (const u16*)(ptr_);
-	int j;
-	for (j = 0; j < nweights; j++) {
-		wt[j] = (float)wdata[j] * (1.0f / 32768.0f);
-	}
-	while (j & 3)   // Zero additional weights rounding up to 4.
-		wt[j++] = 0;
-}
-
-// Float weights should be uncommon, we can live with having to multiply these by 2.0
-// to avoid special checks in the vertex shader generator.
-// (PSP uses 0.0-2.0 fixed point numbers for weights)
-void VertexDecoder::Step_WeightsFloat() const
-{
-	float *wt = (float *)(decoded_ + decFmt.w0off);
-	const float *wdata = (const float*)(ptr_);
-	int j;
-	for (j = 0; j < nweights; j++) {
-		wt[j] = wdata[j];
-	}
-	while (j & 3)   // Zero additional weights rounding up to 4.
-		wt[j++] = 0.0f;
-}
-
 void VertexDecoder::ComputeSkinMatrix(const float weights[8]) const {
 	memset(skinMatrix, 0, sizeof(skinMatrix));
 	for (int j = 0; j < nweights; j++) {
@@ -851,20 +790,6 @@ void VertexDecoder::Step_PosFloatMorphSkin() const {
 	Vec3ByMatrix43(v, pos, skinMatrix);
 }
 
-static const StepFunction wtstep[4] = {
-	0,
-	&VertexDecoder::Step_WeightsU8,
-	&VertexDecoder::Step_WeightsU16,
-	&VertexDecoder::Step_WeightsFloat,
-};
-
-static const StepFunction wtstepToFloat[4] = {
-	0,
-	&VertexDecoder::Step_WeightsU8ToFloat,
-	&VertexDecoder::Step_WeightsU16ToFloat,
-	&VertexDecoder::Step_WeightsFloat,
-};
-
 // TODO: Morph weights correctly! This is missing. Not sure if any game actually
 // use this functionality at all.
 
diff --git a/GPU/Common/VertexDecoderCommon.h b/GPU/Common/VertexDecoderCommon.h
index cabfc5138..d5395704f 100644
--- a/GPU/Common/VertexDecoderCommon.h
+++ b/GPU/Common/VertexDecoderCommon.h
@@ -460,12 +460,6 @@ public:
 
 	std::string GetString(DebugShaderStringType stringType);
 
-	void Step_WeightsU8() const;
-	void Step_WeightsU16() const;
-	void Step_WeightsU8ToFloat() const;
-	void Step_WeightsU16ToFloat() const;
-	void Step_WeightsFloat() const;
-
 	void ComputeSkinMatrix(const float weights[8]) const;
 
 	void Step_WeightsU8Skin() const;
@@ -618,12 +612,6 @@ public:
 	JittedVertexDecoder Compile(const VertexDecoder &dec, int32_t *jittedSize);
 	void Clear();
 
-	void Jit_WeightsU8();
-	void Jit_WeightsU16();
-	void Jit_WeightsU8ToFloat();
-	void Jit_WeightsU16ToFloat();
-	void Jit_WeightsFloat();
-
 	void Jit_WeightsU8Skin();
 	void Jit_WeightsU16Skin();
 	void Jit_WeightsFloatSkin();
diff --git a/GPU/Common/VertexDecoderX86.cpp b/GPU/Common/VertexDecoderX86.cpp
index 5098b2a7d..70c5131e8 100644
--- a/GPU/Common/VertexDecoderX86.cpp
+++ b/GPU/Common/VertexDecoderX86.cpp
@@ -87,16 +87,10 @@ static const X64Reg fpScratchReg4 = XMM4;
 // on the interpreter if the compiler fails.
 
 static const JitLookup jitLookup[] = {
-	{&VertexDecoder::Step_WeightsU8, &VertexDecoderJitCache::Jit_WeightsU8},
-	{&VertexDecoder::Step_WeightsU16, &VertexDecoderJitCache::Jit_WeightsU16},
-	{&VertexDecoder::Step_WeightsFloat, &VertexDecoderJitCache::Jit_WeightsFloat},
 	{&VertexDecoder::Step_WeightsU8Skin, &VertexDecoderJitCache::Jit_WeightsU8Skin},
 	{&VertexDecoder::Step_WeightsU16Skin, &VertexDecoderJitCache::Jit_WeightsU16Skin},
 	{&VertexDecoder::Step_WeightsFloatSkin, &VertexDecoderJitCache::Jit_WeightsFloatSkin},
 
-	{&VertexDecoder::Step_WeightsU8ToFloat, &VertexDecoderJitCache::Jit_WeightsU8ToFloat},
-	{&VertexDecoder::Step_WeightsU16ToFloat, &VertexDecoderJitCache::Jit_WeightsU16ToFloat},
-
 	{&VertexDecoder::Step_TcFloat, &VertexDecoderJitCache::Jit_TcFloat},
 	{&VertexDecoder::Step_TcU8ToFloat, &VertexDecoderJitCache::Jit_TcU8ToFloat},
 	{&VertexDecoder::Step_TcU16ToFloat, &VertexDecoderJitCache::Jit_TcU16ToFloat},
@@ -281,175 +275,6 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
 	return (JittedVertexDecoder)start;
 }
 
-void VertexDecoderJitCache::Jit_WeightsU8() {
-	switch (dec_->nweights) {
-	case 1:
-		MOVZX(32, 8, tempReg1, MDisp(srcReg, dec_->weightoff));
-		break;
-	case 2:
-		MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->weightoff));
-		break;
-	case 3:
-		MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
-		AND(32, R(tempReg1), Imm32(0x00FFFFFF));
-		break;
-	case 4:
-		MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
-		break;
-	case 5:
-		MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
-		MOVZX(32, 8, tempReg2, MDisp(srcReg, dec_->weightoff + 4));
-		break;
-	case 6:
-		MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
-		MOVZX(32, 16, tempReg2, MDisp(srcReg, dec_->weightoff + 4));
-		break;
-	case 7:
-		MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
-		MOV(32, R(tempReg2), MDisp(srcReg, dec_->weightoff + 4));
-		AND(32, R(tempReg2), Imm32(0x00FFFFFF));
-		break;
-	case 8:
-		MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
-		MOV(32, R(tempReg2), MDisp(srcReg, dec_->weightoff + 4));
-		break;
-	}
-
-	if (dec_->nweights <= 4) {
-		MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
-	} else {
-		MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
-		MOV(32, MDisp(dstReg, dec_->decFmt.w1off), R(tempReg2));
-	}
-}
-
-void VertexDecoderJitCache::Jit_WeightsU16() {
-	switch (dec_->nweights) {
-	case 1:
-		MOVZX(32, 16, tempReg1, MDisp(srcReg, dec_->weightoff));
-		MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
-		MOV(32, MDisp(dstReg, dec_->decFmt.w0off + 4), Imm32(0));
-		return;
-
-	case 2:
-		MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
-		MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
-		MOV(32, MDisp(dstReg, dec_->decFmt.w0off + 4), Imm32(0));
-		return;
-
-	case 3:
-		MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
-		MOVZX(32, 16, tempReg2, MDisp(srcReg, dec_->weightoff + 4));
-		MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
-		MOV(32, MDisp(dstReg, dec_->decFmt.w0off + 4), R(tempReg2));
-		return;
-
-	case 4:
-		// Anything above 4 will do 4 here, and then the rest after.
-	case 5:
-	case 6:
-	case 7:
-	case 8:
-		MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff));
-		MOV(32, R(tempReg2), MDisp(srcReg, dec_->weightoff + 4));
-		MOV(32, MDisp(dstReg, dec_->decFmt.w0off), R(tempReg1));
-		MOV(32, MDisp(dstReg, dec_->decFmt.w0off + 4), R(tempReg2));
-		break;
-	}
-
-	// Basic implementation - a short at a time. TODO: Optimize
-	int j;
-	for (j = 4; j < dec_->nweights; j++) {
-		MOV(16, R(tempReg1), MDisp(srcReg, dec_->weightoff + j * 2));
-		MOV(16, MDisp(dstReg, dec_->decFmt.w0off + j * 2), R(tempReg1));
-	}
-	while (j & 3) {
-		MOV(16, MDisp(dstReg, dec_->decFmt.w0off + j * 2), Imm16(0));
-		j++;
-	}
-}
-
-void VertexDecoderJitCache::Jit_WeightsU8ToFloat() {
-	if (dec_->nweights >= 4) {
-		Jit_AnyU8ToFloat(dec_->weightoff, 32);
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
-		if (dec_->nweights > 4) {
-			Jit_AnyU8ToFloat(dec_->weightoff + 4, (dec_->nweights - 4) * 8);
-			MOVUPS(MDisp(dstReg, dec_->decFmt.w1off), XMM3);
-		}
-	} else {
-		Jit_AnyU8ToFloat(dec_->weightoff, dec_->nweights * 8);
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
-	}
-}
-
-void VertexDecoderJitCache::Jit_WeightsU16ToFloat() {
-	if (dec_->nweights >= 4) {
-		Jit_AnyU16ToFloat(dec_->weightoff, 64);
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
-		if (dec_->nweights > 4) {
-			Jit_AnyU16ToFloat(dec_->weightoff + 4 * 2, (dec_->nweights - 4) * 16);
-			MOVUPS(MDisp(dstReg, dec_->decFmt.w1off), XMM3);
-		}
-	} else {
-		Jit_AnyU16ToFloat(dec_->weightoff, dec_->nweights * 16);
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
-	}
-}
-
-void VertexDecoderJitCache::Jit_WeightsFloat() {
-	int j;
-	switch (dec_->nweights) {
-	case 1:
-		// MOVSS: When the source operand is a memory location and destination operand is an XMM register, the three high-order doublewords of the destination operand are cleared to all 0s.
-		MOVSS(XMM3, MDisp(srcReg, dec_->weightoff));
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
-		break;
-
-	case 2:
-		MOVQ_xmm(XMM3, MDisp(srcReg, dec_->weightoff));
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
-		break;
-
-	case 4:
-		MOVUPS(XMM3, MDisp(srcReg, dec_->weightoff));
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
-		break;
-
-	case 5:
-		MOVUPS(XMM3, MDisp(srcReg, dec_->weightoff));
-		MOVSS(XMM4, MDisp(srcReg, dec_->weightoff + 16));
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off + 16), XMM4);
-		break;
-
-	case 6:
-		MOVUPS(XMM3, MDisp(srcReg, dec_->weightoff));
-		MOVQ_xmm(XMM4, MDisp(srcReg, dec_->weightoff + 16));
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off + 16), XMM4);
-		break;
-
-	case 8:
-		MOVUPS(XMM3, MDisp(srcReg, dec_->weightoff));
-		MOVUPS(XMM4, MDisp(srcReg, dec_->weightoff + 16));
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off), XMM3);
-		MOVUPS(MDisp(dstReg, dec_->decFmt.w0off + 16), XMM4);
-		break;
-
-	default:
-		for (j = 0; j < dec_->nweights; j++) {
-			MOV(32, R(tempReg1), MDisp(srcReg, dec_->weightoff + j * 4));
-			MOV(32, MDisp(dstReg, dec_->decFmt.w0off + j * 4), R(tempReg1));
-		}
-		while (j & 3) {  // Zero additional weights rounding up to 4.
-			MOV(32, MDisp(dstReg, dec_->decFmt.w0off + j * 4), Imm32(0));
-			j++;
-		}
-		break;
-	}
-}
-
 void VertexDecoderJitCache::Jit_WeightsU8Skin() {
 	MOV(PTRBITS, R(tempReg2), ImmPtr(&bones));