Correct uv bound pointers in arm vertexjit.

2015-09-13 14:52:10 -07:00 · 2015-09-13 14:52:10 -07:00 · dff4aeb30f
commit dff4aeb30f
parent 93e9d6cd56
11 changed files with 66 additions and 62 deletions
--- a/GPU/Common/VertexDecoderArm.cpp
+++ b/GPU/Common/VertexDecoderArm.cpp
@ -572,7 +572,7 @@ void VertexDecoderJitCache::Jit_TcU16Through() {
 	LDRH(tempReg2, srcReg, dec_->tcoff + 2);
 	// TODO: Cleanup.
-	MOVP2R(scratchReg, &gstate_c.vertMinU);
+	MOVP2R(scratchReg, &gstate_c.vertBounds.minU);
 	auto updateSide = [&](ARMReg r, CCFlags cc, u32 off) {
 		LDRH(tempReg3, scratchReg, off);
@ -583,10 +583,10 @@ void VertexDecoderJitCache::Jit_TcU16Through() {
 	};
 	// TODO: Can this actually be fast?  Hmm, floats aren't better.
-	updateSide(tempReg1, CC_LT, 0);
+	updateSide(tempReg1, CC_LT, offsetof(KnownVertexBounds, minU));
-	updateSide(tempReg1, CC_GT, 2);
+	updateSide(tempReg1, CC_GT, offsetof(KnownVertexBounds, maxU));
-	updateSide(tempReg2, CC_LT, 4);
+	updateSide(tempReg2, CC_LT, offsetof(KnownVertexBounds, minV));
-	updateSide(tempReg2, CC_GT, 6);
+	updateSide(tempReg2, CC_GT, offsetof(KnownVertexBounds, maxV));
 	ORR(tempReg1, tempReg1, Operand2(tempReg2, ST_LSL, 16));
 	STR(tempReg1, dstReg, dec_->decFmt.uvoff);
--- a/GPU/Common/VertexDecoderArm64.cpp
+++ b/GPU/Common/VertexDecoderArm64.cpp
@ -563,7 +563,7 @@ void VertexDecoderJitCache::Jit_TcU16Through() {
 	LDRH(INDEX_UNSIGNED, tempReg2, srcReg, dec_->tcoff + 2);
 	// TODO: Cleanup.
-	MOVP2R(scratchReg64, &gstate_c.vertMinU);
+	MOVP2R(scratchReg64, &gstate_c.vertBounds.minU);
 	auto updateSide = [&](ARM64Reg r, CCFlags cc, u32 off) {
 		LDRH(INDEX_UNSIGNED, tempReg3, scratchReg64, off);
@ -574,10 +574,10 @@ void VertexDecoderJitCache::Jit_TcU16Through() {
 	};
 	// TODO: Can this actually be fast?  Hmm, floats aren't better.
-	updateSide(tempReg1, CC_LT, 0);
+	updateSide(tempReg1, CC_LT, offsetof(KnownVertexBounds, minU));
-	updateSide(tempReg1, CC_GT, 2);
+	updateSide(tempReg1, CC_GT, offsetof(KnownVertexBounds, maxU));
-	updateSide(tempReg2, CC_LT, 4);
+	updateSide(tempReg2, CC_LT, offsetof(KnownVertexBounds, minV));
-	updateSide(tempReg2, CC_GT, 6);
+	updateSide(tempReg2, CC_GT, offsetof(KnownVertexBounds, maxV));
 	ORR(tempReg1, tempReg1, tempReg2, ArithOption(tempReg2, ST_LSL, 16));
 	STR(INDEX_UNSIGNED, tempReg1, dstReg, dec_->decFmt.uvoff);
--- a/GPU/Common/VertexDecoderCommon.cpp
+++ b/GPU/Common/VertexDecoderCommon.cpp
@ -296,10 +296,10 @@ void VertexDecoder::Step_TcU16Through() const
 	uv[0] = uvdata[0];
 	uv[1] = uvdata[1];
-	gstate_c.vertMinU = std::min(gstate_c.vertMinU, uvdata[0]);
+	gstate_c.vertBounds.minU = std::min(gstate_c.vertBounds.minU, uvdata[0]);
-	gstate_c.vertMaxU = std::max(gstate_c.vertMaxU, uvdata[0]);
+	gstate_c.vertBounds.maxU = std::max(gstate_c.vertBounds.maxU, uvdata[0]);
-	gstate_c.vertMinV = std::min(gstate_c.vertMinV, uvdata[1]);
+	gstate_c.vertBounds.minV = std::min(gstate_c.vertBounds.minV, uvdata[1]);
-	gstate_c.vertMaxV = std::max(gstate_c.vertMaxV, uvdata[1]);
+	gstate_c.vertBounds.maxV = std::max(gstate_c.vertBounds.maxV, uvdata[1]);
 }
 void VertexDecoder::Step_TcU16ThroughDouble() const
@ -325,10 +325,10 @@ void VertexDecoder::Step_TcU16ThroughToFloat() const
 	uv[0] = uvdata[0];
 	uv[1] = uvdata[1];
-	gstate_c.vertMinU = std::min(gstate_c.vertMinU, uvdata[0]);
+	gstate_c.vertBounds.minU = std::min(gstate_c.vertBounds.minU, uvdata[0]);
-	gstate_c.vertMaxU = std::max(gstate_c.vertMaxU, uvdata[0]);
+	gstate_c.vertBounds.maxU = std::max(gstate_c.vertBounds.maxU, uvdata[0]);
-	gstate_c.vertMinV = std::min(gstate_c.vertMinV, uvdata[1]);
+	gstate_c.vertBounds.minV = std::min(gstate_c.vertBounds.minV, uvdata[1]);
-	gstate_c.vertMaxV = std::max(gstate_c.vertMaxV, uvdata[1]);
+	gstate_c.vertBounds.maxV = std::max(gstate_c.vertBounds.maxV, uvdata[1]);
 }
 void VertexDecoder::Step_TcU16ThroughDoubleToFloat() const
@ -354,10 +354,10 @@ void VertexDecoder::Step_TcFloatThrough() const
 	uv[0] = uvdata[0];
 	uv[1] = uvdata[1];
-	gstate_c.vertMinU = std::min(gstate_c.vertMinU, (u16)uvdata[0]);
+	gstate_c.vertBounds.minU = std::min(gstate_c.vertBounds.minU, (u16)uvdata[0]);
-	gstate_c.vertMaxU = std::max(gstate_c.vertMaxU, (u16)uvdata[0]);
+	gstate_c.vertBounds.maxU = std::max(gstate_c.vertBounds.maxU, (u16)uvdata[0]);
-	gstate_c.vertMinV = std::min(gstate_c.vertMinV, (u16)uvdata[1]);
+	gstate_c.vertBounds.minV = std::min(gstate_c.vertBounds.minV, (u16)uvdata[1]);
-	gstate_c.vertMaxV = std::max(gstate_c.vertMaxV, (u16)uvdata[1]);
+	gstate_c.vertBounds.maxV = std::max(gstate_c.vertBounds.maxV, (u16)uvdata[1]);
 }
 void VertexDecoder::Step_TcU8Prescale() const {
--- a/GPU/Common/VertexDecoderX86.cpp
+++ b/GPU/Common/VertexDecoderX86.cpp
@ -721,10 +721,10 @@ void VertexDecoderJitCache::Jit_TcU16Through() {
 	};
 	// TODO: Can this actually be fast?  Hmm, floats aren't better.
-	updateSide(tempReg1, CC_GE, &gstate_c.vertMinU);
+	updateSide(tempReg1, CC_GE, &gstate_c.vertBounds.minU);
-	updateSide(tempReg1, CC_LE, &gstate_c.vertMaxU);
+	updateSide(tempReg1, CC_LE, &gstate_c.vertBounds.maxU);
-	updateSide(tempReg2, CC_GE, &gstate_c.vertMinV);
+	updateSide(tempReg2, CC_GE, &gstate_c.vertBounds.minV);
-	updateSide(tempReg2, CC_LE, &gstate_c.vertMaxV);
+	updateSide(tempReg2, CC_LE, &gstate_c.vertBounds.maxV);
 }
 void VertexDecoderJitCache::Jit_TcU16ThroughToFloat() {
--- a/GPU/Directx9/FramebufferDX9.cpp
+++ b/GPU/Directx9/FramebufferDX9.cpp
@ -639,11 +639,11 @@ namespace DX9 {
 				// If max is not > min, we probably could not detect it.  Skip.
 				// See the vertex decoder, where this is updated.
-				if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) != 0 && gstate_c.vertMaxU > gstate_c.vertMinU) {
+				if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) != 0 && gstate_c.vertBounds.maxU > gstate_c.vertBounds.minU) {
-					x = gstate_c.vertMinU;
+					x = gstate_c.vertBounds.minU;
-					y = gstate_c.vertMinV;
+					y = gstate_c.vertBounds.minV;
-					w = gstate_c.vertMaxU - x;
+					w = gstate_c.vertBounds.maxU - x;
-					h = gstate_c.vertMaxV - y;
+					h = gstate_c.vertBounds.maxV - y;
 					// If we bound a framebuffer, apply the byte offset as pixels to the copy too.
 					if (flags & BINDFBCOLOR_APPLY_TEX_OFFSET) {
--- a/GPU/Directx9/TextureCacheDX9.cpp
+++ b/GPU/Directx9/TextureCacheDX9.cpp
@ -926,7 +926,7 @@ void TextureCacheDX9::ApplyTexture() {
 			// Texture scale/offset and gen modes don't apply in through.
 			// So we can optimize how much of the texture we look at.
 			if (gstate.isModeThrough()) {
-				nextTexture_->maxSeenV = std::max(nextTexture_->maxSeenV, gstate_c.vertMaxV);
+				nextTexture_->maxSeenV = std::max(nextTexture_->maxSeenV, gstate_c.vertBounds.maxV);
 			} else {
 				// Otherwise, we need to reset to ensure we use the whole thing.
 				// Can't tell how much is used.
@ -987,17 +987,17 @@ void TextureCacheDX9::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFrame
 		};
 		// If min is not < max, then we don't have values (wasn't set during decode.)
-		if (gstate_c.vertMinV < gstate_c.vertMaxV) {
+		if (gstate_c.vertBounds.minV < gstate_c.vertBounds.maxV) {
 			const float invWidth = 1.0f / (float)framebuffer->bufferWidth;
 			const float invHeight = 1.0f / (float)framebuffer->bufferHeight;
 			// Inverse of half = double.
 			const float invHalfWidth = invWidth * 2.0f;
 			const float invHalfHeight = invHeight * 2.0f;
-			const int u1 = gstate_c.vertMinU + gstate_c.curTextureXOffset;
+			const int u1 = gstate_c.vertBounds.minU + gstate_c.curTextureXOffset;
-			const int v1 = gstate_c.vertMinV + gstate_c.curTextureYOffset;
+			const int v1 = gstate_c.vertBounds.minV + gstate_c.curTextureYOffset;
-			const int u2 = gstate_c.vertMaxU + gstate_c.curTextureXOffset;
+			const int u2 = gstate_c.vertBounds.maxU + gstate_c.curTextureXOffset;
-			const int v2 = gstate_c.vertMaxV + gstate_c.curTextureYOffset;
+			const int v2 = gstate_c.vertBounds.maxV + gstate_c.curTextureYOffset;
 			const float left = u1 * invHalfWidth - 1.0f + xoff;
 			const float right = u2 * invHalfWidth - 1.0f + xoff;
--- a/GPU/Directx9/TransformPipelineDX9.cpp
+++ b/GPU/Directx9/TransformPipelineDX9.cpp
@ -695,7 +695,7 @@ void TransformDrawEngineDX9::DoFlush() {
 							vai->numVerts = indexGen.PureCount();
 						}
-						_dbg_assert_msg_(G3D, gstate_c.vertMinV >= gstate_c.vertMaxV, "Should not have checked UVs when caching.");
+						_dbg_assert_msg_(G3D, gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching.");
 						void * pVb;
 						u32 size = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex();
@ -888,10 +888,10 @@ rotateVBO:
 	framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
 	// Now seems as good a time as any to reset the min/max coords, which we may examine later.
-	gstate_c.vertMinU = 512;
+	gstate_c.vertBounds.minU = 512;
-	gstate_c.vertMinV = 512;
+	gstate_c.vertBounds.minV = 512;
-	gstate_c.vertMaxU = 0;
+	gstate_c.vertBounds.maxU = 0;
-	gstate_c.vertMaxV = 0;
+	gstate_c.vertBounds.maxV = 0;
 	host->GPUNotifyDraw();
 }
--- a/GPU/GLES/Framebuffer.cpp
+++ b/GPU/GLES/Framebuffer.cpp
@ -874,11 +874,11 @@ void FramebufferManager::BindFramebufferColor(int stage, u32 fbRawAddress, Virtu
 			// If max is not > min, we probably could not detect it.  Skip.
 			// See the vertex decoder, where this is updated.
-			if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) != 0 && gstate_c.vertMaxU > gstate_c.vertMinU) {
+			if ((flags & BINDFBCOLOR_MAY_COPY_WITH_UV) != 0 && gstate_c.vertBounds.maxU > gstate_c.vertBounds.minU) {
-				x = gstate_c.vertMinU;
+				x = gstate_c.vertBounds.minU;
-				y = gstate_c.vertMinV;
+				y = gstate_c.vertBounds.minV;
-				w = gstate_c.vertMaxU - x;
+				w = gstate_c.vertBounds.maxU - x;
-				h = gstate_c.vertMaxV - y;
+				h = gstate_c.vertBounds.maxV - y;
 				// If we bound a framebuffer, apply the byte offset as pixels to the copy too.
 				if (flags & BINDFBCOLOR_APPLY_TEX_OFFSET) {
--- a/GPU/GLES/TextureCache.cpp
+++ b/GPU/GLES/TextureCache.cpp
@ -996,7 +996,7 @@ void TextureCache::ApplyTexture() {
 			// Texture scale/offset and gen modes don't apply in through.
 			// So we can optimize how much of the texture we look at.
 			if (gstate.isModeThrough()) {
-				nextTexture_->maxSeenV = std::max(nextTexture_->maxSeenV, gstate_c.vertMaxV);
+				nextTexture_->maxSeenV = std::max(nextTexture_->maxSeenV, gstate_c.vertBounds.maxV);
 			} else {
 				// Otherwise, we need to reset to ensure we use the whole thing.
 				// Can't tell how much is used.
@ -1053,17 +1053,17 @@ void TextureCache::ApplyTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuf
 		static const GLubyte indices[4] = { 0, 1, 3, 2 };
 		// If min is not < max, then we don't have values (wasn't set during decode.)
-		if (gstate_c.vertMinV < gstate_c.vertMaxV) {
+		if (gstate_c.vertBounds.minV < gstate_c.vertBounds.maxV) {
 			const float invWidth = 1.0f / (float)framebuffer->bufferWidth;
 			const float invHeight = 1.0f / (float)framebuffer->bufferHeight;
 			// Inverse of half = double.
 			const float invHalfWidth = invWidth * 2.0f;
 			const float invHalfHeight = invHeight * 2.0f;
-			const int u1 = gstate_c.vertMinU + gstate_c.curTextureXOffset;
+			const int u1 = gstate_c.vertBounds.minU + gstate_c.curTextureXOffset;
-			const int v1 = gstate_c.vertMinV + gstate_c.curTextureYOffset;
+			const int v1 = gstate_c.vertBounds.minV + gstate_c.curTextureYOffset;
-			const int u2 = gstate_c.vertMaxU + gstate_c.curTextureXOffset;
+			const int u2 = gstate_c.vertBounds.maxU + gstate_c.curTextureXOffset;
-			const int v2 = gstate_c.vertMaxV + gstate_c.curTextureYOffset;
+			const int v2 = gstate_c.vertBounds.maxV + gstate_c.curTextureYOffset;
 			const float left = u1 * invHalfWidth - 1.0f;
 			const float right = u2 * invHalfWidth - 1.0f;
--- a/GPU/GLES/TransformPipeline.cpp
+++ b/GPU/GLES/TransformPipeline.cpp
@ -684,7 +684,7 @@ void TransformDrawEngine::DoFlush() {
 							vai->numVerts = indexGen.PureCount();
 						}
-						_dbg_assert_msg_(G3D, gstate_c.vertMinV >= gstate_c.vertMaxV, "Should not have checked UVs when caching.");
+						_dbg_assert_msg_(G3D, gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching.");
 						vai->vbo = AllocateBuffer();
 						glstate.arrayBuffer.bind(vai->vbo);
@ -885,10 +885,10 @@ rotateVBO:
 	framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
 	// Now seems as good a time as any to reset the min/max coords, which we may examine later.
-	gstate_c.vertMinU = 512;
+	gstate_c.vertBounds.minU = 512;
-	gstate_c.vertMinV = 512;
+	gstate_c.vertBounds.minV = 512;
-	gstate_c.vertMaxU = 0;
+	gstate_c.vertBounds.maxU = 0;
-	gstate_c.vertMaxV = 0;
+	gstate_c.vertBounds.maxV = 0;
 #ifndef MOBILE_DEVICE
 	host->GPUNotifyDraw();
--- a/GPU/GPUState.h
+++ b/GPU/GPUState.h
@ -464,6 +464,13 @@ enum {
 	GPU_PREFER_REVERSE_COLOR_ORDER = FLAG_BIT(31),
 };
 struct KnownVertexBounds {
 	u16 minU;
 	u16 minV;
 	u16 maxU;
 	u16 maxV;
 };
 struct GPUStateCache {
 	bool Supports(int flag) { return (featureFlags & flag) != 0; }
@ -507,10 +514,7 @@ struct GPUStateCache {
 	float vpWidthScale;
 	float vpHeightScale;
-	u16 vertMinU;
+	KnownVertexBounds vertBounds;
 	u16 vertMinV;
 	u16 vertMaxU;
 	u16 vertMaxV;
 	// TODO: These should be accessed from the current VFB object directly.
 	u32 curRTWidth;