diff --git a/GPU/Common/SoftwareTransformCommon.cpp b/GPU/Common/SoftwareTransformCommon.cpp index 9aff2c301..de4695954 100644 --- a/GPU/Common/SoftwareTransformCommon.cpp +++ b/GPU/Common/SoftwareTransformCommon.cpp @@ -547,7 +547,7 @@ void SoftwareTransform::DetectOffsetTexture(int maxIndex) { } // NOTE: The viewport must be up to date! -void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *&inds, int &maxIndex, SoftwareTransformResult *result) { +void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *inds, int &indsOffset, int indexBufferSize, int &maxIndex, SoftwareTransformResult *result) { TransformedVertex *transformed = params_.transformed; TransformedVertex *transformedExpanded = params_.transformedExpanded; bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0; @@ -560,7 +560,11 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy bool useBufferedRendering = fbman->UseBufferedRendering(); if (prim == GE_PRIM_RECTANGLES) { - ExpandRectangles(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode); + if (!ExpandRectangles(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) { + result->drawIndexed = false; + result->drawNumTrans = 0; + return; + } result->drawBuffer = transformedExpanded; result->drawIndexed = true; @@ -578,11 +582,19 @@ void SoftwareTransform::BuildDrawingParams(int prim, int vertexCount, u32 vertTy } } } else if (prim == GE_PRIM_POINTS) { - ExpandPoints(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode); + if (!ExpandPoints(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) { + result->drawIndexed = false; + result->drawNumTrans = 0; + return; + } result->drawBuffer = transformedExpanded; result->drawIndexed = true; } else if (prim == GE_PRIM_LINES) { - ExpandLines(vertexCount, maxIndex, inds, transformed, transformedExpanded, numTrans, throughmode); + if (!ExpandLines(vertexCount, maxIndex, inds, indsOffset, indexBufferSize, transformed, transformedExpanded, numTrans, throughmode)) { + result->drawIndexed = false; + result->drawNumTrans = 0; + return; + } result->drawBuffer = transformedExpanded; result->drawIndexed = true; } else { @@ -674,15 +686,21 @@ void SoftwareTransform::CalcCullParams(float &minZValue, float &maxZValue) { std::swap(minZValue, maxZValue); } -void SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { +bool SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { + // Before we start, do a sanity check - does the output fit? + if ((vertexCount / 2) * 6 > indexBufferSize - indsOffset) { + // Won't fit, kill the draw. + return false; + } + // Rectangles always need 2 vertices, disregard the last one if there's an odd number. vertexCount = vertexCount & ~1; numTrans = 0; TransformedVertex *trans = &transformedExpanded[0]; - const u16 *indsIn = (const u16 *)inds; - u16 *newInds = inds + vertexCount; - u16 *indsOut = newInds; + const u16 *indsIn = (const u16 *)(inds + indsOffset); + int newIndsOffset = indsOffset + vertexCount; + u16 *indsOut = inds + newIndsOffset; maxIndex = 4 * (vertexCount / 2); for (int i = 0; i < vertexCount; i += 2) { @@ -727,23 +745,33 @@ void SoftwareTransform::ExpandRectangles(int vertexCount, int &maxIndex, u16 *&i indsOut[3] = i * 2 + 3; indsOut[4] = i * 2 + 0; indsOut[5] = i * 2 + 2; + trans += 4; indsOut += 6; numTrans += 6; } - inds = newInds; + + indsOffset = newIndsOffset; + return true; } -void SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { +bool SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { + // Before we start, do a sanity check - does the output fit? + if ((vertexCount / 2) * 6 > indexBufferSize - indsOffset) { + // Won't fit, kill the draw. + return false; + } + // Lines always need 2 vertices, disregard the last one if there's an odd number. vertexCount = vertexCount & ~1; numTrans = 0; TransformedVertex *trans = &transformedExpanded[0]; - const u16 *indsIn = (const u16 *)inds; - u16 *newInds = inds + vertexCount; - u16 *indsOut = newInds; + + const u16 *indsIn = (const u16 *)(inds + indsOffset); + int newIndsOffset = indsOffset + vertexCount; + u16 *indsOut = inds + newIndsOffset; float dx = 1.0f * gstate_c.vpWidthScale * (1.0f / fabsf(gstate.getViewportXScale())); float dy = 1.0f * gstate_c.vpHeightScale * (1.0f / fabsf(gstate.getViewportYScale())); @@ -856,16 +884,23 @@ void SoftwareTransform::ExpandLines(int vertexCount, int &maxIndex, u16 *&inds, } } - inds = newInds; + indsOffset = newIndsOffset; + return true; } -void SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { +bool SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode) { + // Before we start, do a sanity check - does the output fit? + if (vertexCount * 6 > indexBufferSize - indsOffset) { + // Won't fit, kill the draw. + return false; + } + numTrans = 0; TransformedVertex *trans = &transformedExpanded[0]; - const u16 *indsIn = (const u16 *)inds; - u16 *newInds = inds + vertexCount; - u16 *indsOut = newInds; + const u16 *indsIn = (const u16 *)(inds + indsOffset); + int newIndsOffset = indsOffset + vertexCount; + u16 *indsOut = inds + newIndsOffset; float dx = 1.0f * gstate_c.vpWidthScale * (1.0f / gstate.getViewportXScale()); float dy = 1.0f * gstate_c.vpHeightScale * (1.0f / gstate.getViewportYScale()); @@ -924,5 +959,7 @@ void SoftwareTransform::ExpandPoints(int vertexCount, int &maxIndex, u16 *&inds, numTrans += 6; } - inds = newInds; + + indsOffset = newIndsOffset; + return true; } diff --git a/GPU/Common/SoftwareTransformCommon.h b/GPU/Common/SoftwareTransformCommon.h index 480bd18e5..da15ffad9 100644 --- a/GPU/Common/SoftwareTransformCommon.h +++ b/GPU/Common/SoftwareTransformCommon.h @@ -62,19 +62,18 @@ struct SoftwareTransformParams { class SoftwareTransform { public: - SoftwareTransform(SoftwareTransformParams ¶ms) : params_(params) { - } + SoftwareTransform(SoftwareTransformParams ¶ms) : params_(params) {} void SetProjMatrix(const float mtx[14], bool invertedX, bool invertedY, const Lin::Vec3 &trans, const Lin::Vec3 &scale); void Decode(int prim, u32 vertexType, const DecVtxFormat &decVtxFormat, int maxIndex, SoftwareTransformResult *result); void DetectOffsetTexture(int maxIndex); - void BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *&inds, int &maxIndex, SoftwareTransformResult *result); + void BuildDrawingParams(int prim, int vertexCount, u32 vertType, u16 *inds, int &indsOffset, int indexBufferSize, int &maxIndex, SoftwareTransformResult *result); protected: void CalcCullParams(float &minZValue, float &maxZValue); - void ExpandRectangles(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); - void ExpandLines(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); - void ExpandPoints(int vertexCount, int &maxIndex, u16 *&inds, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); + bool ExpandRectangles(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); + bool ExpandLines(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); + bool ExpandPoints(int vertexCount, int &maxIndex, u16 *inds, int &indsOffset, int indexBufferSize, const TransformedVertex *transformed, TransformedVertex *transformedExpanded, int &numTrans, bool throughmode); const SoftwareTransformParams ¶ms_; Lin::Matrix4x4 projMatrix_; diff --git a/GPU/D3D11/DrawEngineD3D11.cpp b/GPU/D3D11/DrawEngineD3D11.cpp index 9426074cd..bc812ce22 100644 --- a/GPU/D3D11/DrawEngineD3D11.cpp +++ b/GPU/D3D11/DrawEngineD3D11.cpp @@ -595,7 +595,7 @@ rotateVBO: prim = GE_PRIM_TRIANGLES; VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount()); - u16 *inds = decIndex; + u16 * const inds = decIndex; SoftwareTransformResult result{}; SoftwareTransformParams params{}; params.decoded = decoded; @@ -641,8 +641,9 @@ rotateVBO: // Need to ApplyDrawState after ApplyTexture because depal can launch a render pass and that wrecks the state. ApplyDrawState(prim); + int indsOffset = 0; if (result.action == SW_NOT_READY) - swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result); + swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result); if (result.setSafeSize) framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight); @@ -680,11 +681,11 @@ rotateVBO: UINT iOffset; int iSize = sizeof(uint16_t) * result.drawNumTrans; uint8_t *iptr = pushInds_->BeginPush(context_, &iOffset, iSize); - memcpy(iptr, inds, iSize); + memcpy(iptr, inds + indsOffset, iSize); pushInds_->EndPush(context_); context_->IASetIndexBuffer(pushInds_->Buf(), DXGI_FORMAT_R16_UINT, iOffset); context_->DrawIndexed(result.drawNumTrans, 0, 0); - } else { + } else if (result.drawNumTrans > 0) { context_->Draw(result.drawNumTrans, 0); } } else if (result.action == SW_CLEAR) { diff --git a/GPU/Directx9/DrawEngineDX9.cpp b/GPU/Directx9/DrawEngineDX9.cpp index 0043250fe..8c240e9ff 100644 --- a/GPU/Directx9/DrawEngineDX9.cpp +++ b/GPU/Directx9/DrawEngineDX9.cpp @@ -558,7 +558,7 @@ rotateVBO: prim = GE_PRIM_TRIANGLES; VERBOSE_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount()); - u16 *inds = decIndex; + u16 * const inds = decIndex; SoftwareTransformResult result{}; SoftwareTransformParams params{}; params.decoded = decoded; @@ -607,8 +607,9 @@ rotateVBO: ApplyDrawState(prim); + int indsOffset = 0; if (result.action == SW_NOT_READY) - swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result); + swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result); if (result.setSafeSize) framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight); @@ -628,8 +629,8 @@ rotateVBO: device_->SetVertexDeclaration(transformedVertexDecl_); if (result.drawIndexed) { - device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, maxIndex, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex)); - } else { + device_->DrawIndexedPrimitiveUP(d3d_prim[prim], 0, maxIndex, D3DPrimCount(d3d_prim[prim], result.drawNumTrans), inds + indsOffset, D3DFMT_INDEX16, result.drawBuffer, sizeof(TransformedVertex)); + } else if (result.drawNumTrans > 0) { device_->DrawPrimitiveUP(d3d_prim[prim], D3DPrimCount(d3d_prim[prim], result.drawNumTrans), result.drawBuffer, sizeof(TransformedVertex)); } } else if (result.action == SW_CLEAR) { diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index b19101421..869a34e4e 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -350,7 +350,7 @@ void DrawEngineGLES::DoFlush() { if (prim == GE_PRIM_TRIANGLE_STRIP) prim = GE_PRIM_TRIANGLES; - u16 *inds = decIndex; + u16 * const inds = decIndex; SoftwareTransformResult result{}; // TODO: Keep this static? Faster than repopulating? SoftwareTransformParams params{}; @@ -407,8 +407,9 @@ void DrawEngineGLES::DoFlush() { // Need to ApplyDrawState after ApplyTexture because depal can launch a render pass and that wrecks the state. ApplyDrawState(prim); + int indsOffset = 0; if (result.action == SW_NOT_READY) - swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, maxIndex, &result); + swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result); if (result.setSafeSize) framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight); @@ -423,11 +424,11 @@ void DrawEngineGLES::DoFlush() { if (result.action == SW_DRAW_PRIMITIVES) { if (result.drawIndexed) { vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), &vertexBuffer); - indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds, sizeof(uint16_t) * result.drawNumTrans, &indexBuffer); + indexBufferOffset = (uint32_t)frameData.pushIndex->Push(inds + indsOffset, sizeof(uint16_t) * result.drawNumTrans, &indexBuffer); render_->BindVertexBuffer(softwareInputLayout_, vertexBuffer, vertexBufferOffset); render_->BindIndexBuffer(indexBuffer); render_->DrawIndexed(glprim[prim], result.drawNumTrans, GL_UNSIGNED_SHORT, (void *)(intptr_t)indexBufferOffset); - } else { + } else if (result.drawNumTrans > 0) { vertexBufferOffset = (uint32_t)frameData.pushVertex->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), &vertexBuffer); render_->BindVertexBuffer(softwareInputLayout_, vertexBuffer, vertexBufferOffset); render_->Draw(glprim[prim], 0, result.drawNumTrans); diff --git a/GPU/Vulkan/DrawEngineVulkan.cpp b/GPU/Vulkan/DrawEngineVulkan.cpp index 0e72dab08..14e7b0335 100644 --- a/GPU/Vulkan/DrawEngineVulkan.cpp +++ b/GPU/Vulkan/DrawEngineVulkan.cpp @@ -855,7 +855,7 @@ void DrawEngineVulkan::DoFlush() { if (prim == GE_PRIM_TRIANGLE_STRIP) prim = GE_PRIM_TRIANGLES; - u16 *inds = decIndex; + u16 * const inds = decIndex; SoftwareTransformResult result{}; SoftwareTransformParams params{}; params.decoded = decoded; @@ -895,9 +895,10 @@ void DrawEngineVulkan::DoFlush() { // Games sometimes expect exact matches (see #12626, for example) for equal comparisons. if (result.action == SW_CLEAR && everUsedEqualDepth_ && gstate.isClearModeDepthMask() && result.depth > 0.0f && result.depth < 1.0f) result.action = SW_NOT_READY; + int indsOffset = 0; if (result.action == SW_NOT_READY) { swTransform.DetectOffsetTexture(maxIndex); - swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, maxIndex, &result); + swTransform.BuildDrawingParams(prim, indexGen.VertexCount(), dec_->VertexType(), inds, indsOffset, DECODED_INDEX_BUFFER_SIZE / sizeof(uint16_t), maxIndex, &result); } if (result.setSafeSize) @@ -967,9 +968,9 @@ void DrawEngineVulkan::DoFlush() { if (result.drawIndexed) { VkBuffer vbuf, ibuf; vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, maxIndex * sizeof(TransformedVertex), 4, &vbuf); - ibOffset = (uint32_t)pushIndex_->Push(inds, sizeof(short) * result.drawNumTrans, 4, &ibuf); + ibOffset = (uint32_t)pushIndex_->Push(inds + indsOffset, sizeof(short) * result.drawNumTrans, 4, &ibuf); renderManager->DrawIndexed(ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1, VK_INDEX_TYPE_UINT16); - } else { + } else if (result.drawNumTrans > 0) { VkBuffer vbuf; vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, result.drawNumTrans * sizeof(TransformedVertex), 4, &vbuf); renderManager->Draw(ds, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, result.drawNumTrans);