From 7cf05e44a28b51f36d4a6d53ff71c2278c01b55a Mon Sep 17 00:00:00 2001
From: Henrik Rydgard <hrydgard@gmail.com>
Date: Wed, 10 Sep 2014 10:16:42 +0200
Subject: [PATCH] Add option to vertexdecoder to expand UVs to floats

---
 GPU/Directx9/VertexDecoderDX9.cpp  |   1 -
 GPU/GLES/TransformPipeline.cpp     |   4 +-
 GPU/GLES/TransformPipeline.h       |   1 +
 GPU/GLES/VertexDecoder.cpp         | 109 ++++++++++++++++++++++++-----
 GPU/GLES/VertexDecoder.h           |  15 ++--
 GPU/Software/TransformUnit.cpp     |  15 +++-
 Windows/GEDebugger/TabVertices.cpp |   5 +-
 7 files changed, 123 insertions(+), 27 deletions(-)

diff --git a/GPU/Directx9/VertexDecoderDX9.cpp b/GPU/Directx9/VertexDecoderDX9.cpp
index 5cd7a9aa1..f0209e0d5 100644
--- a/GPU/Directx9/VertexDecoderDX9.cpp
+++ b/GPU/Directx9/VertexDecoderDX9.cpp
@@ -27,7 +27,6 @@
 
 namespace DX9 {
 
-
 // Always use float for decoding data
 #define USE_WEIGHT_HACK
 #define USE_TC_HACK
diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp
index 270780e4e..937d7797d 100644
--- a/GPU/GLES/TransformPipeline.cpp
+++ b/GPU/GLES/TransformPipeline.cpp
@@ -131,6 +131,8 @@ TransformDrawEngine::TransformDrawEngine()
 		uvScale(0),
 		fboTexBound_(false) {
 	decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
+	memset(&decOptions_, 0, sizeof(decOptions_));
+	decOptions_.expandAllUVtoFloat = false;
 	// Allocate nicely aligned memory. Maybe graphics drivers will
 	// appreciate it.
 	// All this is a LOT of memory, need to see if we can cut down somehow.
@@ -249,7 +251,7 @@ VertexDecoder *TransformDrawEngine::GetVertexDecoder(u32 vtype) {
 	if (iter != decoderMap_.end())
 		return iter->second;
 	VertexDecoder *dec = new VertexDecoder();
-	dec->SetVertexType(vtype, decJitCache_);
+	dec->SetVertexType(vtype, decOptions_, decJitCache_);
 	decoderMap_[vtype] = dec;
 	return dec;
 }
diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h
index 811679d3c..3aec29590 100644
--- a/GPU/GLES/TransformPipeline.h
+++ b/GPU/GLES/TransformPipeline.h
@@ -255,4 +255,5 @@ private:
 	UVScale *uvScale;
 
 	bool fboTexBound_;
+	VertexDecoderOptions decOptions_;
 };
diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp
index fcf6215ec..37cb28fd1 100644
--- a/GPU/GLES/VertexDecoder.cpp
+++ b/GPU/GLES/VertexDecoder.cpp
@@ -144,6 +144,15 @@ void VertexDecoder::Step_TcU8() const
 	*uv = *uvdata;
 }
 
+void VertexDecoder::Step_TcU8ToFloat() const
+{
+	// u32 to write two bytes of zeroes for free.
+	float *uv = (float *)(decoded_ + decFmt.uvoff);
+	const u8 *uvdata = (const u8*)(ptr_ + tcoff);
+	uv[0] = uvdata[0] * (1.0f / 128.0f);
+	uv[1] = uvdata[1] * (1.0f / 128.0f);
+}
+
 void VertexDecoder::Step_TcU16() const
 {
 	u32 *uv = (u32 *)(decoded_ + decFmt.uvoff);
@@ -151,11 +160,18 @@ void VertexDecoder::Step_TcU16() const
 	*uv = *uvdata;
 }
 
+void VertexDecoder::Step_TcU16ToFloat() const
+{
+	u32 *uv = (u32 *)(decoded_ + decFmt.uvoff);
+	const u16 *uvdata = (const u16*)(ptr_ + tcoff);
+	uv[0] = uvdata[0] * (1.0f / 32768.0f);
+	uv[1] = uvdata[1] * (1.0f / 32768.0f);
+}
+
 void VertexDecoder::Step_TcU16Double() const
 {
 	u16 *uv = (u16*)(decoded_ + decFmt.uvoff);
 	const u16 *uvdata = (const u16*)(ptr_ + tcoff);
-	*uv = *uvdata;
 	uv[0] = uvdata[0] * 2;
 	uv[1] = uvdata[1] * 2;
 }
@@ -176,6 +192,30 @@ void VertexDecoder::Step_TcU16ThroughDouble() const
 	uv[1] = uvdata[1] * 2;
 }
 
+void VertexDecoder::Step_TcU16DoubleToFloat() const
+{
+	float *uv = (float*)(decoded_ + decFmt.uvoff);
+	const u16 *uvdata = (const u16*)(ptr_ + tcoff);
+	uv[0] = uvdata[0] * (1.0f / 16384.0f);
+	uv[1] = uvdata[1] * (1.0f / 16384.0f);
+}
+
+void VertexDecoder::Step_TcU16ThroughToFloat() const
+{
+	float *uv = (float *)(decoded_ + decFmt.uvoff);
+	const u16 *uvdata = (const u16*)(ptr_ + tcoff);
+	uv[0] = uvdata[0];
+	uv[1] = uvdata[1];
+}
+
+void VertexDecoder::Step_TcU16ThroughDoubleToFloat() const
+{
+	float *uv = (float *)(decoded_ + decFmt.uvoff);
+	const u16 *uvdata = (const u16*)(ptr_ + tcoff);
+	uv[0] = uvdata[0] * 2;
+	uv[1] = uvdata[1] * 2;
+}
+
 void VertexDecoder::Step_TcFloat() const
 {
 	float *uv = (float *)(decoded_ + decFmt.uvoff);
@@ -540,6 +580,13 @@ static const StepFunction tcstep[4] = {
 	&VertexDecoder::Step_TcFloat,
 };
 
+static const StepFunction tcstepToFloat[4] = {
+	0,
+	&VertexDecoder::Step_TcU8ToFloat,
+	&VertexDecoder::Step_TcU16ToFloat,
+	&VertexDecoder::Step_TcFloat,
+};
+
 static const StepFunction tcstep_prescale[4] = {
 	0,
 	&VertexDecoder::Step_TcU8Prescale,
@@ -554,6 +601,13 @@ static const StepFunction tcstep_through[4] = {
 	&VertexDecoder::Step_TcFloatThrough,
 };
 
+static const StepFunction tcstep_throughToFloat[4] = {
+	0,
+	&VertexDecoder::Step_TcU8ToFloat,
+	&VertexDecoder::Step_TcU16ThroughToFloat,
+	&VertexDecoder::Step_TcFloatThrough,
+};
+
 // Some HD Remaster games double the u16 texture coordinates.
 static const StepFunction tcstep_Remaster[4] = {
 	0,
@@ -562,6 +616,13 @@ static const StepFunction tcstep_Remaster[4] = {
 	&VertexDecoder::Step_TcFloat,
 };
 
+static const StepFunction tcstep_RemasterToFloat[4] = {
+	0,
+	&VertexDecoder::Step_TcU8ToFloat,
+	&VertexDecoder::Step_TcU16DoubleToFloat,
+	&VertexDecoder::Step_TcFloat,
+};
+
 static const StepFunction tcstep_through_Remaster[4] = {
 	0,
 	&VertexDecoder::Step_TcU8,
@@ -569,6 +630,14 @@ static const StepFunction tcstep_through_Remaster[4] = {
 	&VertexDecoder::Step_TcFloatThrough,
 };
 
+static const StepFunction tcstep_through_RemasterToFloat[4] = {
+	0,
+	&VertexDecoder::Step_TcU8ToFloat,
+	&VertexDecoder::Step_TcU16ThroughDoubleToFloat,
+	&VertexDecoder::Step_TcFloatThrough,
+};
+
+
 // TODO: Tc Morph
 
 static const StepFunction colstep[8] = {
@@ -636,7 +705,7 @@ static const StepFunction posstep_through[4] = {
 	&VertexDecoder::Step_PosFloatThrough,
 };
 
-void VertexDecoder::SetVertexType(u32 fmt, VertexDecoderJitCache *jitCache) {
+void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache) {
 	fmt_ = fmt;
 	throughmode = (fmt & GE_VTYPE_THROUGH) != 0;
 	numSteps_ = 0;
@@ -715,21 +784,29 @@ void VertexDecoder::SetVertexType(u32 fmt, VertexDecoderJitCache *jitCache) {
 			steps_[numSteps_++] = tcstep_prescale[tc];
 			decFmt.uvfmt = DEC_FLOAT_2;
 		} else {
-			if (g_DoubleTextureCoordinates)
-				steps_[numSteps_++] = throughmode ? tcstep_through_Remaster[tc] : tcstep_Remaster[tc];
-			else
-				steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc];
-
-			switch (tc) {
-			case GE_VTYPE_TC_8BIT >> GE_VTYPE_TC_SHIFT:
-				decFmt.uvfmt = throughmode ? DEC_U8A_2 : DEC_U8_2;
-				break;
-			case GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT:
-				decFmt.uvfmt = throughmode ? DEC_U16A_2 : DEC_U16_2;
-				break;
-			case GE_VTYPE_TC_FLOAT >> GE_VTYPE_TC_SHIFT:
+			if (options.expandAllUVtoFloat) {
+				if (g_DoubleTextureCoordinates)
+					steps_[numSteps_++] = throughmode ? tcstep_through_RemasterToFloat[tc] : tcstep_RemasterToFloat[tc];
+				else
+					steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc];
 				decFmt.uvfmt = DEC_FLOAT_2;
-				break;
+			} else {
+				if (g_DoubleTextureCoordinates)
+					steps_[numSteps_++] = throughmode ? tcstep_through_Remaster[tc] : tcstep_Remaster[tc];
+				else
+					steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc];
+
+				switch (tc) {
+				case GE_VTYPE_TC_8BIT >> GE_VTYPE_TC_SHIFT:
+					decFmt.uvfmt = throughmode ? DEC_U8A_2 : DEC_U8_2;
+					break;
+				case GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT:
+					decFmt.uvfmt = throughmode ? DEC_U16A_2 : DEC_U16_2;
+					break;
+				case GE_VTYPE_TC_FLOAT >> GE_VTYPE_TC_SHIFT:
+					decFmt.uvfmt = DEC_FLOAT_2;
+					break;
+				}
 			}
 		}
 
diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h
index f98728240..de741b357 100644
--- a/GPU/GLES/VertexDecoder.h
+++ b/GPU/GLES/VertexDecoder.h
@@ -41,17 +41,17 @@ struct JitLookup {
 
 typedef void (*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);
 
-// Right now
-//   - compiles into list of called functions
-// Future TODO
-//   - will compile into lighting fast specialized x86 and ARM
+struct VertexDecoderOptions {
+	bool expandAllUVtoFloat;
+};
+
 class VertexDecoder
 {
 public:
 	VertexDecoder();
 
 	// A jit cache is not mandatory, we don't use it in the sw renderer
-	void SetVertexType(u32 vtype, VertexDecoderJitCache *jitCache = 0);
+	void SetVertexType(u32 vtype, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache = 0);
 
 	u32 VertexType() const { return fmt_; }
 
@@ -73,6 +73,8 @@ public:
 
 	void Step_TcU8() const;
 	void Step_TcU16() const;
+	void Step_TcU8ToFloat() const;
+	void Step_TcU16ToFloat() const;
 	void Step_TcFloat() const;
 
 	void Step_TcU8Prescale() const;
@@ -82,6 +84,9 @@ public:
 	void Step_TcU16Double() const;
 	void Step_TcU16Through() const;
 	void Step_TcU16ThroughDouble() const;
+	void Step_TcU16DoubleToFloat() const;
+	void Step_TcU16ThroughToFloat() const;
+	void Step_TcU16ThroughDoubleToFloat() const;
 	void Step_TcFloatThrough() const;
 
 	void Step_Color4444() const;
diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp
index 501709db7..8dff82ca3 100644
--- a/GPU/Software/TransformUnit.cpp
+++ b/GPU/Software/TransformUnit.cpp
@@ -209,7 +209,10 @@ int TransformUnit::patchBufferSize_ = 0;
 void TransformUnit::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertex_type)
 {
 	VertexDecoder vdecoder;
-	vdecoder.SetVertexType(vertex_type);
+	VertexDecoderOptions options;
+	memset(&options, 0, sizeof(options));
+	options.expandAllUVtoFloat = false;
+	vdecoder.SetVertexType(vertex_type, options);
 	const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt();
 
 	static u8 buf[65536 * 48]; // yolo
@@ -290,7 +293,10 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type
 {
 	// TODO: Cache VertexDecoder objects
 	VertexDecoder vdecoder;
-	vdecoder.SetVertexType(vertex_type);
+	VertexDecoderOptions options;
+	memset(&options, 0, sizeof(options));
+	options.expandAllUVtoFloat = false;
+	vdecoder.SetVertexType(vertex_type, options);
 	const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt();
 
 	if (bytesRead)
@@ -528,7 +534,10 @@ bool TransformUnit::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVert
 	simpleVertices.resize(indexUpperBound + 1);
 
 	VertexDecoder vdecoder;
-	vdecoder.SetVertexType(gstate.vertType);
+	VertexDecoderOptions options;
+	memset(&options, 0, sizeof(options));
+	options.expandAllUVtoFloat = false;  // TODO: True should be fine here
+	vdecoder.SetVertexType(gstate.vertType, options);
 	TransformDrawEngine::NormalizeVertices((u8 *)(&simpleVertices[0]), (u8 *)(&temp_buffer[0]), Memory::GetPointer(gstate_c.vertexAddr), &vdecoder, indexLowerBound, indexUpperBound, gstate.vertType);
 
 	float world[16];
diff --git a/Windows/GEDebugger/TabVertices.cpp b/Windows/GEDebugger/TabVertices.cpp
index 7b66a2ecf..597a005a1 100644
--- a/Windows/GEDebugger/TabVertices.cpp
+++ b/Windows/GEDebugger/TabVertices.cpp
@@ -288,7 +288,10 @@ int CtrlVertexList::GetRowCount() {
 	if (!gpuDebug->GetCurrentSimpleVertices(rowCount_, vertices, indices)) {
 		rowCount_ = 0;
 	}
-	decoder->SetVertexType(state.vertType);
+	VertexDecoderOptions options;
+	memset(&options, 0, sizeof(options));
+	options.expandAllUVtoFloat = false;
+	decoder->SetVertexType(state.vertType, options);
 	return rowCount_;
 }