Compare commits

...
Sign in to create a new pull request.

4 commits

Author SHA1 Message Date
Henrik Rydgård
1987169c81 OpenGL: When possible, avoid rebinding vertex arrays between glDrawArrays
Profitable optimization in DrawArrays-heavy games like GTA.
2023-05-17 18:42:23 +02:00
Henrik Rydgård
a9f2a7d7cd OpenGL: For contiguous DrawArrays, avoid re-binding the vertex buffer if possible. 2023-05-17 17:57:47 +02:00
Henrik Rydgård
78834a7424 Break out EnableDisableVertexArrays 2023-05-17 17:47:00 +02:00
Henrik Rydgård
1409d2dec4 Remove vestigial support for multiple vertex streams in OpenGL renderer
Unused, and made things more complex. Might do this later for all
backends.
2023-05-17 17:44:08 +02:00
7 changed files with 86 additions and 45 deletions

View file

@ -39,6 +39,7 @@ struct GLQueueProfileContext {
bool enabled; bool enabled;
double cpuStartTime; double cpuStartTime;
double cpuEndTime; double cpuEndTime;
int drawArraysRebindsAvoided;
}; };

View file

@ -651,7 +651,7 @@ retry_depth:
currentReadHandle_ = fbo->handle; currentReadHandle_ = fbo->handle;
} }
void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCalls, bool keepSteps, bool useVR) { void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCalls, bool keepSteps, bool useVR, GLQueueProfileContext &profile) {
if (skipGLCalls) { if (skipGLCalls) {
if (keepSteps) { if (keepSteps) {
return; return;
@ -713,9 +713,9 @@ void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCal
if (IsVREnabled()) { if (IsVREnabled()) {
GLRStep vrStep = step; GLRStep vrStep = step;
PreprocessStepVR(&vrStep); PreprocessStepVR(&vrStep);
PerformRenderPass(vrStep, renderCount == 1, renderCount == totalRenderCount); PerformRenderPass(vrStep, renderCount == 1, renderCount == totalRenderCount, profile);
} else { } else {
PerformRenderPass(step, renderCount == 1, renderCount == totalRenderCount); PerformRenderPass(step, renderCount == 1, renderCount == totalRenderCount, profile);
} }
break; break;
case GLRStepType::COPY: case GLRStepType::COPY:
@ -778,7 +778,20 @@ void GLQueueRunner::PerformBlit(const GLRStep &step) {
} }
} }
void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last) { static void EnableDisableVertexArrays(uint32_t prevAttr, uint32_t newAttr) {
int enable = (~prevAttr) & newAttr;
int disable = prevAttr & (~newAttr);
for (int i = 0; i < 7; i++) { // SEM_MAX
if (enable & (1 << i)) {
glEnableVertexAttribArray(i);
}
if (disable & (1 << i)) {
glDisableVertexAttribArray(i);
}
}
}
void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last, GLQueueProfileContext &profile) {
CHECK_GL_ERROR_IF_DEBUG(); CHECK_GL_ERROR_IF_DEBUG();
PerformBindFramebufferAsRenderTarget(step); PerformBindFramebufferAsRenderTarget(step);
@ -821,6 +834,11 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
bool logicEnabled = false; bool logicEnabled = false;
#endif #endif
bool clipDistanceEnabled[8]{}; bool clipDistanceEnabled[8]{};
bool lastDrawIsArray = false;
int lastBindOffset = 0;
GLRInputLayout *lastDrawLayout = nullptr;
GLuint blendEqColor = (GLuint)-1; GLuint blendEqColor = (GLuint)-1;
GLuint blendEqAlpha = (GLuint)-1; GLuint blendEqAlpha = (GLuint)-1;
@ -1163,25 +1181,19 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
if (buf != curArrayBuffer) { if (buf != curArrayBuffer) {
glBindBuffer(GL_ARRAY_BUFFER, buf); glBindBuffer(GL_ARRAY_BUFFER, buf);
curArrayBuffer = buf; curArrayBuffer = buf;
// Invalidate any draw offset caching.
lastDrawLayout = nullptr;
} }
if (attrMask != layout->semanticsMask_) { if (attrMask != layout->semanticsMask_) {
int enable = layout->semanticsMask_ & ~attrMask; EnableDisableVertexArrays(attrMask, layout->semanticsMask_);
int disable = (~layout->semanticsMask_) & attrMask;
for (int i = 0; i < 7; i++) { // SEM_MAX
if (enable & (1 << i)) {
glEnableVertexAttribArray(i);
}
if (disable & (1 << i)) {
glDisableVertexAttribArray(i);
}
}
attrMask = layout->semanticsMask_; attrMask = layout->semanticsMask_;
} }
for (size_t i = 0; i < layout->entries.size(); i++) {
auto &entry = layout->entries[i];
glVertexAttribPointer(entry.location, entry.count, entry.type, entry.normalized, entry.stride, (const void *)(c.draw.offset + entry.offset));
}
if (c.draw.indexBuffer) { if (c.draw.indexBuffer) {
for (size_t i = 0; i < layout->entries.size(); i++) {
auto &entry = layout->entries[i];
glVertexAttribPointer(entry.location, entry.count, entry.type, entry.normalized, layout->stride, (const void *)(c.draw.offset + entry.offset));
}
GLuint buf = c.draw.indexBuffer->buffer_; GLuint buf = c.draw.indexBuffer->buffer_;
_dbg_assert_(!(c.draw.indexBuffer && c.draw.indexBuffer->Mapped())); _dbg_assert_(!(c.draw.indexBuffer && c.draw.indexBuffer->Mapped()));
if (buf != curElemArrayBuffer) { if (buf != curElemArrayBuffer) {
@ -1193,9 +1205,32 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
} else { } else {
glDrawElementsInstanced(c.draw.mode, c.draw.count, c.draw.indexType, c.draw.indices, c.draw.instances); glDrawElementsInstanced(c.draw.mode, c.draw.count, c.draw.indexType, c.draw.indices, c.draw.instances);
} }
lastDrawIsArray = false;
} else { } else {
glDrawArrays(c.draw.mode, c.draw.first, c.draw.count); // See if we can avoid calling glVertexAttribPointer.
int offset = 0;
bool rebind = true;
if (lastDrawIsArray && layout == lastDrawLayout) {
unsigned int diff = (unsigned int)c.draw.offset - (unsigned int)lastBindOffset;
if (diff % layout->stride == 0) {
// Compatible draws.
offset = diff / layout->stride;
rebind = false;
profile.drawArraysRebindsAvoided++;
}
}
if (rebind) {
// Rebind.
for (size_t i = 0; i < layout->entries.size(); i++) {
auto &entry = layout->entries[i];
glVertexAttribPointer(entry.location, entry.count, entry.type, entry.normalized, layout->stride, (const void *)(c.draw.offset + entry.offset));
}
lastBindOffset = (int)c.draw.offset;
}
glDrawArrays(c.draw.mode, c.draw.first + offset, c.draw.count);
lastDrawIsArray = true;
} }
lastDrawLayout = layout;
CHECK_GL_ERROR_IF_DEBUG(); CHECK_GL_ERROR_IF_DEBUG();
break; break;
} }

View file

@ -357,7 +357,7 @@ public:
void RunInitSteps(const std::vector<GLRInitStep> &steps, bool skipGLCalls); void RunInitSteps(const std::vector<GLRInitStep> &steps, bool skipGLCalls);
void RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCalls, bool keepSteps, bool useVR); void RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCalls, bool keepSteps, bool useVR, GLQueueProfileContext &profile);
void CreateDeviceObjects(); void CreateDeviceObjects();
void DestroyDeviceObjects(); void DestroyDeviceObjects();
@ -382,7 +382,7 @@ private:
void InitCreateFramebuffer(const GLRInitStep &step); void InitCreateFramebuffer(const GLRInitStep &step);
void PerformBindFramebufferAsRenderTarget(const GLRStep &pass); void PerformBindFramebufferAsRenderTarget(const GLRStep &pass);
void PerformRenderPass(const GLRStep &pass, bool first, bool last); void PerformRenderPass(const GLRStep &pass, bool first, bool last, GLQueueProfileContext &profile);
void PerformCopy(const GLRStep &pass); void PerformCopy(const GLRStep &pass);
void PerformBlit(const GLRStep &pass); void PerformBlit(const GLRStep &pass);
void PerformReadback(const GLRStep &pass); void PerformReadback(const GLRStep &pass);

View file

@ -193,7 +193,11 @@ std::string GLRenderManager::GetGpuProfileString() const {
const GLQueueProfileContext &profile = frameData_[curFrame].profile; const GLQueueProfileContext &profile = frameData_[curFrame].profile;
float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime); float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime);
return StringFromFormat("CPU time to run the list: %0.2f ms", cputime_ms); return StringFromFormat(
"CPU time to run the list: %0.2f ms\n"
"Avoided DrawArrays rebinds: %d",
cputime_ms,
profile.drawArraysRebindsAvoided);
} }
void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) { void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
@ -429,17 +433,18 @@ bool GLRenderManager::Run(GLRRenderThreadTask &task) {
if (frameData.profile.enabled) { if (frameData.profile.enabled) {
frameData.profile.cpuStartTime = time_now_d(); frameData.profile.cpuStartTime = time_now_d();
frameData.profile.drawArraysRebindsAvoided = 0;
} }
if (IsVREnabled()) { if (IsVREnabled()) {
int passes = GetVRPassesCount(); int passes = GetVRPassesCount();
for (int i = 0; i < passes; i++) { for (int i = 0; i < passes; i++) {
PreVRFrameRender(i); PreVRFrameRender(i);
queueRunner_.RunSteps(task.steps, skipGLCalls_, i < passes - 1, true); queueRunner_.RunSteps(task.steps, skipGLCalls_, i < passes - 1, true, frameData.profile);
PostVRFrameRender(); PostVRFrameRender();
} }
} else { } else {
queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false); queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false, frameData.profile);
} }
if (frameData.profile.enabled) { if (frameData.profile.enabled) {

View file

@ -359,10 +359,10 @@ public:
int count; int count;
GLenum type; GLenum type;
GLboolean normalized; GLboolean normalized;
int stride;
intptr_t offset; intptr_t offset;
}; };
std::vector<Entry> entries; std::vector<Entry> entries;
unsigned int stride;
int semanticsMask_ = 0; int semanticsMask_ = 0;
}; };
@ -487,10 +487,11 @@ public:
return step.create_program.program; return step.create_program.program;
} }
GLRInputLayout *CreateInputLayout(const std::vector<GLRInputLayout::Entry> &entries) { GLRInputLayout *CreateInputLayout(const std::vector<GLRInputLayout::Entry> &entries, int stride) {
GLRInitStep step{ GLRInitStepType::CREATE_INPUT_LAYOUT }; GLRInitStep step{ GLRInitStepType::CREATE_INPUT_LAYOUT };
step.create_input_layout.inputLayout = new GLRInputLayout(); step.create_input_layout.inputLayout = new GLRInputLayout();
step.create_input_layout.inputLayout->entries = entries; step.create_input_layout.inputLayout->entries = entries;
step.create_input_layout.inputLayout->stride = stride;
for (auto &iter : step.create_input_layout.inputLayout->entries) { for (auto &iter : step.create_input_layout.inputLayout->entries) {
step.create_input_layout.inputLayout->semanticsMask_ |= 1 << iter.location; step.create_input_layout.inputLayout->semanticsMask_ |= 1 << iter.location;
} }

View file

@ -1393,13 +1393,13 @@ OpenGLInputLayout::~OpenGLInputLayout() {
void OpenGLInputLayout::Compile(const InputLayoutDesc &desc) { void OpenGLInputLayout::Compile(const InputLayoutDesc &desc) {
// TODO: This is only accurate if there's only one stream. But whatever, for now we // TODO: This is only accurate if there's only one stream. But whatever, for now we
// never use multiple streams anyway. // never use multiple streams anyway.
stride = desc.bindings.empty() ? 0 : (GLsizei)desc.bindings[0].stride; _dbg_assert_(desc.bindings.size() == 1);
stride = (GLsizei)desc.bindings[0].stride;
std::vector<GLRInputLayout::Entry> entries; std::vector<GLRInputLayout::Entry> entries;
for (auto &attr : desc.attributes) { for (auto &attr : desc.attributes) {
GLRInputLayout::Entry entry; GLRInputLayout::Entry entry;
entry.location = attr.location; entry.location = attr.location;
entry.stride = (GLsizei)desc.bindings[attr.binding].stride;
entry.offset = attr.offset; entry.offset = attr.offset;
switch (attr.format) { switch (attr.format) {
case DataFormat::R32G32_FLOAT: case DataFormat::R32G32_FLOAT:
@ -1431,7 +1431,7 @@ void OpenGLInputLayout::Compile(const InputLayoutDesc &desc) {
entries.push_back(entry); entries.push_back(entry);
} }
if (!entries.empty()) { if (!entries.empty()) {
inputLayout_ = render_->CreateInputLayout(entries); inputLayout_ = render_->CreateInputLayout(entries, stride);
} else { } else {
inputLayout_ = nullptr; inputLayout_ = nullptr;
} }

View file

@ -103,12 +103,12 @@ void DrawEngineGLES::InitDeviceObjects() {
int vertexSize = sizeof(TransformedVertex); int vertexSize = sizeof(TransformedVertex);
std::vector<GLRInputLayout::Entry> entries; std::vector<GLRInputLayout::Entry> entries;
entries.push_back({ ATTR_POSITION, 4, GL_FLOAT, GL_FALSE, vertexSize, offsetof(TransformedVertex, x) }); entries.push_back({ ATTR_POSITION, 4, GL_FLOAT, GL_FALSE, offsetof(TransformedVertex, x) });
entries.push_back({ ATTR_TEXCOORD, 3, GL_FLOAT, GL_FALSE, vertexSize, offsetof(TransformedVertex, u) }); entries.push_back({ ATTR_TEXCOORD, 3, GL_FLOAT, GL_FALSE, offsetof(TransformedVertex, u) });
entries.push_back({ ATTR_COLOR0, 4, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, offsetof(TransformedVertex, color0) }); entries.push_back({ ATTR_COLOR0, 4, GL_UNSIGNED_BYTE, GL_TRUE, offsetof(TransformedVertex, color0) });
entries.push_back({ ATTR_COLOR1, 3, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, offsetof(TransformedVertex, color1) }); entries.push_back({ ATTR_COLOR1, 3, GL_UNSIGNED_BYTE, GL_TRUE, offsetof(TransformedVertex, color1) });
entries.push_back({ ATTR_NORMAL, 1, GL_FLOAT, GL_FALSE, vertexSize, offsetof(TransformedVertex, fog) }); entries.push_back({ ATTR_NORMAL, 1, GL_FLOAT, GL_FALSE, offsetof(TransformedVertex, fog) });
softwareInputLayout_ = render_->CreateInputLayout(entries); softwareInputLayout_ = render_->CreateInputLayout(entries, vertexSize);
draw_->SetInvalidationCallback(std::bind(&DrawEngineGLES::Invalidate, this, std::placeholders::_1)); draw_->SetInvalidationCallback(std::bind(&DrawEngineGLES::Invalidate, this, std::placeholders::_1));
} }
@ -189,7 +189,7 @@ static const GlTypeInfo GLComp[] = {
{GL_UNSIGNED_SHORT, 4, GL_TRUE},// DEC_U16_4, {GL_UNSIGNED_SHORT, 4, GL_TRUE},// DEC_U16_4,
}; };
static inline void VertexAttribSetup(int attrib, int fmt, int stride, int offset, std::vector<GLRInputLayout::Entry> &entries) { static inline void VertexAttribSetup(int attrib, int fmt, int offset, std::vector<GLRInputLayout::Entry> &entries) {
if (fmt) { if (fmt) {
const GlTypeInfo &type = GLComp[fmt]; const GlTypeInfo &type = GLComp[fmt];
GLRInputLayout::Entry entry; GLRInputLayout::Entry entry;
@ -197,7 +197,6 @@ static inline void VertexAttribSetup(int attrib, int fmt, int stride, int offset
entry.location = attrib; entry.location = attrib;
entry.normalized = type.normalized; entry.normalized = type.normalized;
entry.type = type.type; entry.type = type.type;
entry.stride = stride;
entry.count = type.count; entry.count = type.count;
entries.push_back(entry); entries.push_back(entry);
} }
@ -212,15 +211,15 @@ GLRInputLayout *DrawEngineGLES::SetupDecFmtForDraw(LinkedShader *program, const
} }
std::vector<GLRInputLayout::Entry> entries; std::vector<GLRInputLayout::Entry> entries;
VertexAttribSetup(ATTR_W1, decFmt.w0fmt, decFmt.stride, decFmt.w0off, entries); VertexAttribSetup(ATTR_W1, decFmt.w0fmt, decFmt.w0off, entries);
VertexAttribSetup(ATTR_W2, decFmt.w1fmt, decFmt.stride, decFmt.w1off, entries); VertexAttribSetup(ATTR_W2, decFmt.w1fmt, decFmt.w1off, entries);
VertexAttribSetup(ATTR_TEXCOORD, decFmt.uvfmt, decFmt.stride, decFmt.uvoff, entries); VertexAttribSetup(ATTR_TEXCOORD, decFmt.uvfmt, decFmt.uvoff, entries);
VertexAttribSetup(ATTR_COLOR0, decFmt.c0fmt, decFmt.stride, decFmt.c0off, entries); VertexAttribSetup(ATTR_COLOR0, decFmt.c0fmt, decFmt.c0off, entries);
VertexAttribSetup(ATTR_COLOR1, decFmt.c1fmt, decFmt.stride, decFmt.c1off, entries); VertexAttribSetup(ATTR_COLOR1, decFmt.c1fmt, decFmt.c1off, entries);
VertexAttribSetup(ATTR_NORMAL, decFmt.nrmfmt, decFmt.stride, decFmt.nrmoff, entries); VertexAttribSetup(ATTR_NORMAL, decFmt.nrmfmt, decFmt.nrmoff, entries);
VertexAttribSetup(ATTR_POSITION, decFmt.posfmt, decFmt.stride, decFmt.posoff, entries); VertexAttribSetup(ATTR_POSITION, decFmt.posfmt, decFmt.posoff, entries);
inputLayout = render_->CreateInputLayout(entries); inputLayout = render_->CreateInputLayout(entries, decFmt.stride);
inputLayoutMap_.Insert(key, inputLayout); inputLayoutMap_.Insert(key, inputLayout);
return inputLayout; return inputLayout;
} }