From 7cf05e44a28b51f36d4a6d53ff71c2278c01b55a Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Wed, 10 Sep 2014 10:16:42 +0200 Subject: [PATCH] Add option to vertexdecoder to expand UVs to floats --- GPU/Directx9/VertexDecoderDX9.cpp | 1 - GPU/GLES/TransformPipeline.cpp | 4 +- GPU/GLES/TransformPipeline.h | 1 + GPU/GLES/VertexDecoder.cpp | 109 ++++++++++++++++++++++++----- GPU/GLES/VertexDecoder.h | 15 ++-- GPU/Software/TransformUnit.cpp | 15 +++- Windows/GEDebugger/TabVertices.cpp | 5 +- 7 files changed, 123 insertions(+), 27 deletions(-) diff --git a/GPU/Directx9/VertexDecoderDX9.cpp b/GPU/Directx9/VertexDecoderDX9.cpp index 5cd7a9aa1..f0209e0d5 100644 --- a/GPU/Directx9/VertexDecoderDX9.cpp +++ b/GPU/Directx9/VertexDecoderDX9.cpp @@ -27,7 +27,6 @@ namespace DX9 { - // Always use float for decoding data #define USE_WEIGHT_HACK #define USE_TC_HACK diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 270780e4e..937d7797d 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -131,6 +131,8 @@ TransformDrawEngine::TransformDrawEngine() uvScale(0), fboTexBound_(false) { decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; + memset(&decOptions_, 0, sizeof(decOptions_)); + decOptions_.expandAllUVtoFloat = false; // Allocate nicely aligned memory. Maybe graphics drivers will // appreciate it. // All this is a LOT of memory, need to see if we can cut down somehow. @@ -249,7 +251,7 @@ VertexDecoder *TransformDrawEngine::GetVertexDecoder(u32 vtype) { if (iter != decoderMap_.end()) return iter->second; VertexDecoder *dec = new VertexDecoder(); - dec->SetVertexType(vtype, decJitCache_); + dec->SetVertexType(vtype, decOptions_, decJitCache_); decoderMap_[vtype] = dec; return dec; } diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 811679d3c..3aec29590 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -255,4 +255,5 @@ private: UVScale *uvScale; bool fboTexBound_; + VertexDecoderOptions decOptions_; }; diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index fcf6215ec..37cb28fd1 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -144,6 +144,15 @@ void VertexDecoder::Step_TcU8() const *uv = *uvdata; } +void VertexDecoder::Step_TcU8ToFloat() const +{ + // u32 to write two bytes of zeroes for free. + float *uv = (float *)(decoded_ + decFmt.uvoff); + const u8 *uvdata = (const u8*)(ptr_ + tcoff); + uv[0] = uvdata[0] * (1.0f / 128.0f); + uv[1] = uvdata[1] * (1.0f / 128.0f); +} + void VertexDecoder::Step_TcU16() const { u32 *uv = (u32 *)(decoded_ + decFmt.uvoff); @@ -151,11 +160,18 @@ void VertexDecoder::Step_TcU16() const *uv = *uvdata; } +void VertexDecoder::Step_TcU16ToFloat() const +{ + u32 *uv = (u32 *)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = uvdata[0] * (1.0f / 32768.0f); + uv[1] = uvdata[1] * (1.0f / 32768.0f); +} + void VertexDecoder::Step_TcU16Double() const { u16 *uv = (u16*)(decoded_ + decFmt.uvoff); const u16 *uvdata = (const u16*)(ptr_ + tcoff); - *uv = *uvdata; uv[0] = uvdata[0] * 2; uv[1] = uvdata[1] * 2; } @@ -176,6 +192,30 @@ void VertexDecoder::Step_TcU16ThroughDouble() const uv[1] = uvdata[1] * 2; } +void VertexDecoder::Step_TcU16DoubleToFloat() const +{ + float *uv = (float*)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = uvdata[0] * (1.0f / 16384.0f); + uv[1] = uvdata[1] * (1.0f / 16384.0f); +} + +void VertexDecoder::Step_TcU16ThroughToFloat() const +{ + float *uv = (float *)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = uvdata[0]; + uv[1] = uvdata[1]; +} + +void VertexDecoder::Step_TcU16ThroughDoubleToFloat() const +{ + float *uv = (float *)(decoded_ + decFmt.uvoff); + const u16 *uvdata = (const u16*)(ptr_ + tcoff); + uv[0] = uvdata[0] * 2; + uv[1] = uvdata[1] * 2; +} + void VertexDecoder::Step_TcFloat() const { float *uv = (float *)(decoded_ + decFmt.uvoff); @@ -540,6 +580,13 @@ static const StepFunction tcstep[4] = { &VertexDecoder::Step_TcFloat, }; +static const StepFunction tcstepToFloat[4] = { + 0, + &VertexDecoder::Step_TcU8ToFloat, + &VertexDecoder::Step_TcU16ToFloat, + &VertexDecoder::Step_TcFloat, +}; + static const StepFunction tcstep_prescale[4] = { 0, &VertexDecoder::Step_TcU8Prescale, @@ -554,6 +601,13 @@ static const StepFunction tcstep_through[4] = { &VertexDecoder::Step_TcFloatThrough, }; +static const StepFunction tcstep_throughToFloat[4] = { + 0, + &VertexDecoder::Step_TcU8ToFloat, + &VertexDecoder::Step_TcU16ThroughToFloat, + &VertexDecoder::Step_TcFloatThrough, +}; + // Some HD Remaster games double the u16 texture coordinates. static const StepFunction tcstep_Remaster[4] = { 0, @@ -562,6 +616,13 @@ static const StepFunction tcstep_Remaster[4] = { &VertexDecoder::Step_TcFloat, }; +static const StepFunction tcstep_RemasterToFloat[4] = { + 0, + &VertexDecoder::Step_TcU8ToFloat, + &VertexDecoder::Step_TcU16DoubleToFloat, + &VertexDecoder::Step_TcFloat, +}; + static const StepFunction tcstep_through_Remaster[4] = { 0, &VertexDecoder::Step_TcU8, @@ -569,6 +630,14 @@ static const StepFunction tcstep_through_Remaster[4] = { &VertexDecoder::Step_TcFloatThrough, }; +static const StepFunction tcstep_through_RemasterToFloat[4] = { + 0, + &VertexDecoder::Step_TcU8ToFloat, + &VertexDecoder::Step_TcU16ThroughDoubleToFloat, + &VertexDecoder::Step_TcFloatThrough, +}; + + // TODO: Tc Morph static const StepFunction colstep[8] = { @@ -636,7 +705,7 @@ static const StepFunction posstep_through[4] = { &VertexDecoder::Step_PosFloatThrough, }; -void VertexDecoder::SetVertexType(u32 fmt, VertexDecoderJitCache *jitCache) { +void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache) { fmt_ = fmt; throughmode = (fmt & GE_VTYPE_THROUGH) != 0; numSteps_ = 0; @@ -715,21 +784,29 @@ void VertexDecoder::SetVertexType(u32 fmt, VertexDecoderJitCache *jitCache) { steps_[numSteps_++] = tcstep_prescale[tc]; decFmt.uvfmt = DEC_FLOAT_2; } else { - if (g_DoubleTextureCoordinates) - steps_[numSteps_++] = throughmode ? tcstep_through_Remaster[tc] : tcstep_Remaster[tc]; - else - steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc]; - - switch (tc) { - case GE_VTYPE_TC_8BIT >> GE_VTYPE_TC_SHIFT: - decFmt.uvfmt = throughmode ? DEC_U8A_2 : DEC_U8_2; - break; - case GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT: - decFmt.uvfmt = throughmode ? DEC_U16A_2 : DEC_U16_2; - break; - case GE_VTYPE_TC_FLOAT >> GE_VTYPE_TC_SHIFT: + if (options.expandAllUVtoFloat) { + if (g_DoubleTextureCoordinates) + steps_[numSteps_++] = throughmode ? tcstep_through_RemasterToFloat[tc] : tcstep_RemasterToFloat[tc]; + else + steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc]; decFmt.uvfmt = DEC_FLOAT_2; - break; + } else { + if (g_DoubleTextureCoordinates) + steps_[numSteps_++] = throughmode ? tcstep_through_Remaster[tc] : tcstep_Remaster[tc]; + else + steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc]; + + switch (tc) { + case GE_VTYPE_TC_8BIT >> GE_VTYPE_TC_SHIFT: + decFmt.uvfmt = throughmode ? DEC_U8A_2 : DEC_U8_2; + break; + case GE_VTYPE_TC_16BIT >> GE_VTYPE_TC_SHIFT: + decFmt.uvfmt = throughmode ? DEC_U16A_2 : DEC_U16_2; + break; + case GE_VTYPE_TC_FLOAT >> GE_VTYPE_TC_SHIFT: + decFmt.uvfmt = DEC_FLOAT_2; + break; + } } } diff --git a/GPU/GLES/VertexDecoder.h b/GPU/GLES/VertexDecoder.h index f98728240..de741b357 100644 --- a/GPU/GLES/VertexDecoder.h +++ b/GPU/GLES/VertexDecoder.h @@ -41,17 +41,17 @@ struct JitLookup { typedef void (*JittedVertexDecoder)(const u8 *src, u8 *dst, int count); -// Right now -// - compiles into list of called functions -// Future TODO -// - will compile into lighting fast specialized x86 and ARM +struct VertexDecoderOptions { + bool expandAllUVtoFloat; +}; + class VertexDecoder { public: VertexDecoder(); // A jit cache is not mandatory, we don't use it in the sw renderer - void SetVertexType(u32 vtype, VertexDecoderJitCache *jitCache = 0); + void SetVertexType(u32 vtype, const VertexDecoderOptions &options, VertexDecoderJitCache *jitCache = 0); u32 VertexType() const { return fmt_; } @@ -73,6 +73,8 @@ public: void Step_TcU8() const; void Step_TcU16() const; + void Step_TcU8ToFloat() const; + void Step_TcU16ToFloat() const; void Step_TcFloat() const; void Step_TcU8Prescale() const; @@ -82,6 +84,9 @@ public: void Step_TcU16Double() const; void Step_TcU16Through() const; void Step_TcU16ThroughDouble() const; + void Step_TcU16DoubleToFloat() const; + void Step_TcU16ThroughToFloat() const; + void Step_TcU16ThroughDoubleToFloat() const; void Step_TcFloatThrough() const; void Step_Color4444() const; diff --git a/GPU/Software/TransformUnit.cpp b/GPU/Software/TransformUnit.cpp index 501709db7..8dff82ca3 100644 --- a/GPU/Software/TransformUnit.cpp +++ b/GPU/Software/TransformUnit.cpp @@ -209,7 +209,10 @@ int TransformUnit::patchBufferSize_ = 0; void TransformUnit::SubmitSpline(void* control_points, void* indices, int count_u, int count_v, int type_u, int type_v, GEPatchPrimType prim_type, u32 vertex_type) { VertexDecoder vdecoder; - vdecoder.SetVertexType(vertex_type); + VertexDecoderOptions options; + memset(&options, 0, sizeof(options)); + options.expandAllUVtoFloat = false; + vdecoder.SetVertexType(vertex_type, options); const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt(); static u8 buf[65536 * 48]; // yolo @@ -290,7 +293,10 @@ void TransformUnit::SubmitPrimitive(void* vertices, void* indices, u32 prim_type { // TODO: Cache VertexDecoder objects VertexDecoder vdecoder; - vdecoder.SetVertexType(vertex_type); + VertexDecoderOptions options; + memset(&options, 0, sizeof(options)); + options.expandAllUVtoFloat = false; + vdecoder.SetVertexType(vertex_type, options); const DecVtxFormat& vtxfmt = vdecoder.GetDecVtxFmt(); if (bytesRead) @@ -528,7 +534,10 @@ bool TransformUnit::GetCurrentSimpleVertices(int count, std::vectorGetCurrentSimpleVertices(rowCount_, vertices, indices)) { rowCount_ = 0; } - decoder->SetVertexType(state.vertType); + VertexDecoderOptions options; + memset(&options, 0, sizeof(options)); + options.expandAllUVtoFloat = false; + decoder->SetVertexType(state.vertType, options); return rowCount_; }