Compare commits

...
Sign in to create a new pull request.

4 commits

Author SHA1 Message Date
Henrik Rydgård
1987169c81 OpenGL: When possible, avoid rebinding vertex arrays between glDrawArrays
Profitable optimization in DrawArrays-heavy games like GTA.
2023-05-17 18:42:23 +02:00
Henrik Rydgård
a9f2a7d7cd OpenGL: For contiguous DrawArrays, avoid re-binding the vertex buffer if possible. 2023-05-17 17:57:47 +02:00
Henrik Rydgård
78834a7424 Break out EnableDisableVertexArrays 2023-05-17 17:47:00 +02:00
Henrik Rydgård
1409d2dec4 Remove vestigial support for multiple vertex streams in OpenGL renderer
Unused, and made things more complex. Might do this later for all
backends.
2023-05-17 17:44:08 +02:00
7 changed files with 86 additions and 45 deletions

View file

@ -39,6 +39,7 @@ struct GLQueueProfileContext {
bool enabled;
double cpuStartTime;
double cpuEndTime;
int drawArraysRebindsAvoided;
};

View file

@ -651,7 +651,7 @@ retry_depth:
currentReadHandle_ = fbo->handle;
}
void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCalls, bool keepSteps, bool useVR) {
void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCalls, bool keepSteps, bool useVR, GLQueueProfileContext &profile) {
if (skipGLCalls) {
if (keepSteps) {
return;
@ -713,9 +713,9 @@ void GLQueueRunner::RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCal
if (IsVREnabled()) {
GLRStep vrStep = step;
PreprocessStepVR(&vrStep);
PerformRenderPass(vrStep, renderCount == 1, renderCount == totalRenderCount);
PerformRenderPass(vrStep, renderCount == 1, renderCount == totalRenderCount, profile);
} else {
PerformRenderPass(step, renderCount == 1, renderCount == totalRenderCount);
PerformRenderPass(step, renderCount == 1, renderCount == totalRenderCount, profile);
}
break;
case GLRStepType::COPY:
@ -778,7 +778,20 @@ void GLQueueRunner::PerformBlit(const GLRStep &step) {
}
}
void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last) {
static void EnableDisableVertexArrays(uint32_t prevAttr, uint32_t newAttr) {
int enable = (~prevAttr) & newAttr;
int disable = prevAttr & (~newAttr);
for (int i = 0; i < 7; i++) { // SEM_MAX
if (enable & (1 << i)) {
glEnableVertexAttribArray(i);
}
if (disable & (1 << i)) {
glDisableVertexAttribArray(i);
}
}
}
void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last, GLQueueProfileContext &profile) {
CHECK_GL_ERROR_IF_DEBUG();
PerformBindFramebufferAsRenderTarget(step);
@ -821,6 +834,11 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
bool logicEnabled = false;
#endif
bool clipDistanceEnabled[8]{};
bool lastDrawIsArray = false;
int lastBindOffset = 0;
GLRInputLayout *lastDrawLayout = nullptr;
GLuint blendEqColor = (GLuint)-1;
GLuint blendEqAlpha = (GLuint)-1;
@ -1163,25 +1181,19 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
if (buf != curArrayBuffer) {
glBindBuffer(GL_ARRAY_BUFFER, buf);
curArrayBuffer = buf;
// Invalidate any draw offset caching.
lastDrawLayout = nullptr;
}
if (attrMask != layout->semanticsMask_) {
int enable = layout->semanticsMask_ & ~attrMask;
int disable = (~layout->semanticsMask_) & attrMask;
for (int i = 0; i < 7; i++) { // SEM_MAX
if (enable & (1 << i)) {
glEnableVertexAttribArray(i);
}
if (disable & (1 << i)) {
glDisableVertexAttribArray(i);
}
}
EnableDisableVertexArrays(attrMask, layout->semanticsMask_);
attrMask = layout->semanticsMask_;
}
if (c.draw.indexBuffer) {
for (size_t i = 0; i < layout->entries.size(); i++) {
auto &entry = layout->entries[i];
glVertexAttribPointer(entry.location, entry.count, entry.type, entry.normalized, entry.stride, (const void *)(c.draw.offset + entry.offset));
glVertexAttribPointer(entry.location, entry.count, entry.type, entry.normalized, layout->stride, (const void *)(c.draw.offset + entry.offset));
}
if (c.draw.indexBuffer) {
GLuint buf = c.draw.indexBuffer->buffer_;
_dbg_assert_(!(c.draw.indexBuffer && c.draw.indexBuffer->Mapped()));
if (buf != curElemArrayBuffer) {
@ -1193,9 +1205,32 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
} else {
glDrawElementsInstanced(c.draw.mode, c.draw.count, c.draw.indexType, c.draw.indices, c.draw.instances);
}
lastDrawIsArray = false;
} else {
glDrawArrays(c.draw.mode, c.draw.first, c.draw.count);
// See if we can avoid calling glVertexAttribPointer.
int offset = 0;
bool rebind = true;
if (lastDrawIsArray && layout == lastDrawLayout) {
unsigned int diff = (unsigned int)c.draw.offset - (unsigned int)lastBindOffset;
if (diff % layout->stride == 0) {
// Compatible draws.
offset = diff / layout->stride;
rebind = false;
profile.drawArraysRebindsAvoided++;
}
}
if (rebind) {
// Rebind.
for (size_t i = 0; i < layout->entries.size(); i++) {
auto &entry = layout->entries[i];
glVertexAttribPointer(entry.location, entry.count, entry.type, entry.normalized, layout->stride, (const void *)(c.draw.offset + entry.offset));
}
lastBindOffset = (int)c.draw.offset;
}
glDrawArrays(c.draw.mode, c.draw.first + offset, c.draw.count);
lastDrawIsArray = true;
}
lastDrawLayout = layout;
CHECK_GL_ERROR_IF_DEBUG();
break;
}

View file

@ -357,7 +357,7 @@ public:
void RunInitSteps(const std::vector<GLRInitStep> &steps, bool skipGLCalls);
void RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCalls, bool keepSteps, bool useVR);
void RunSteps(const std::vector<GLRStep *> &steps, bool skipGLCalls, bool keepSteps, bool useVR, GLQueueProfileContext &profile);
void CreateDeviceObjects();
void DestroyDeviceObjects();
@ -382,7 +382,7 @@ private:
void InitCreateFramebuffer(const GLRInitStep &step);
void PerformBindFramebufferAsRenderTarget(const GLRStep &pass);
void PerformRenderPass(const GLRStep &pass, bool first, bool last);
void PerformRenderPass(const GLRStep &pass, bool first, bool last, GLQueueProfileContext &profile);
void PerformCopy(const GLRStep &pass);
void PerformBlit(const GLRStep &pass);
void PerformReadback(const GLRStep &pass);

View file

@ -193,7 +193,11 @@ std::string GLRenderManager::GetGpuProfileString() const {
const GLQueueProfileContext &profile = frameData_[curFrame].profile;
float cputime_ms = 1000.0f * (profile.cpuEndTime - profile.cpuStartTime);
return StringFromFormat("CPU time to run the list: %0.2f ms", cputime_ms);
return StringFromFormat(
"CPU time to run the list: %0.2f ms\n"
"Avoided DrawArrays rebinds: %d",
cputime_ms,
profile.drawArraysRebindsAvoided);
}
void GLRenderManager::BindFramebufferAsRenderTarget(GLRFramebuffer *fb, GLRRenderPassAction color, GLRRenderPassAction depth, GLRRenderPassAction stencil, uint32_t clearColor, float clearDepth, uint8_t clearStencil, const char *tag) {
@ -429,17 +433,18 @@ bool GLRenderManager::Run(GLRRenderThreadTask &task) {
if (frameData.profile.enabled) {
frameData.profile.cpuStartTime = time_now_d();
frameData.profile.drawArraysRebindsAvoided = 0;
}
if (IsVREnabled()) {
int passes = GetVRPassesCount();
for (int i = 0; i < passes; i++) {
PreVRFrameRender(i);
queueRunner_.RunSteps(task.steps, skipGLCalls_, i < passes - 1, true);
queueRunner_.RunSteps(task.steps, skipGLCalls_, i < passes - 1, true, frameData.profile);
PostVRFrameRender();
}
} else {
queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false);
queueRunner_.RunSteps(task.steps, skipGLCalls_, false, false, frameData.profile);
}
if (frameData.profile.enabled) {

View file

@ -359,10 +359,10 @@ public:
int count;
GLenum type;
GLboolean normalized;
int stride;
intptr_t offset;
};
std::vector<Entry> entries;
unsigned int stride;
int semanticsMask_ = 0;
};
@ -487,10 +487,11 @@ public:
return step.create_program.program;
}
GLRInputLayout *CreateInputLayout(const std::vector<GLRInputLayout::Entry> &entries) {
GLRInputLayout *CreateInputLayout(const std::vector<GLRInputLayout::Entry> &entries, int stride) {
GLRInitStep step{ GLRInitStepType::CREATE_INPUT_LAYOUT };
step.create_input_layout.inputLayout = new GLRInputLayout();
step.create_input_layout.inputLayout->entries = entries;
step.create_input_layout.inputLayout->stride = stride;
for (auto &iter : step.create_input_layout.inputLayout->entries) {
step.create_input_layout.inputLayout->semanticsMask_ |= 1 << iter.location;
}

View file

@ -1393,13 +1393,13 @@ OpenGLInputLayout::~OpenGLInputLayout() {
void OpenGLInputLayout::Compile(const InputLayoutDesc &desc) {
// TODO: This is only accurate if there's only one stream. But whatever, for now we
// never use multiple streams anyway.
stride = desc.bindings.empty() ? 0 : (GLsizei)desc.bindings[0].stride;
_dbg_assert_(desc.bindings.size() == 1);
stride = (GLsizei)desc.bindings[0].stride;
std::vector<GLRInputLayout::Entry> entries;
for (auto &attr : desc.attributes) {
GLRInputLayout::Entry entry;
entry.location = attr.location;
entry.stride = (GLsizei)desc.bindings[attr.binding].stride;
entry.offset = attr.offset;
switch (attr.format) {
case DataFormat::R32G32_FLOAT:
@ -1431,7 +1431,7 @@ void OpenGLInputLayout::Compile(const InputLayoutDesc &desc) {
entries.push_back(entry);
}
if (!entries.empty()) {
inputLayout_ = render_->CreateInputLayout(entries);
inputLayout_ = render_->CreateInputLayout(entries, stride);
} else {
inputLayout_ = nullptr;
}

View file

@ -103,12 +103,12 @@ void DrawEngineGLES::InitDeviceObjects() {
int vertexSize = sizeof(TransformedVertex);
std::vector<GLRInputLayout::Entry> entries;
entries.push_back({ ATTR_POSITION, 4, GL_FLOAT, GL_FALSE, vertexSize, offsetof(TransformedVertex, x) });
entries.push_back({ ATTR_TEXCOORD, 3, GL_FLOAT, GL_FALSE, vertexSize, offsetof(TransformedVertex, u) });
entries.push_back({ ATTR_COLOR0, 4, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, offsetof(TransformedVertex, color0) });
entries.push_back({ ATTR_COLOR1, 3, GL_UNSIGNED_BYTE, GL_TRUE, vertexSize, offsetof(TransformedVertex, color1) });
entries.push_back({ ATTR_NORMAL, 1, GL_FLOAT, GL_FALSE, vertexSize, offsetof(TransformedVertex, fog) });
softwareInputLayout_ = render_->CreateInputLayout(entries);
entries.push_back({ ATTR_POSITION, 4, GL_FLOAT, GL_FALSE, offsetof(TransformedVertex, x) });
entries.push_back({ ATTR_TEXCOORD, 3, GL_FLOAT, GL_FALSE, offsetof(TransformedVertex, u) });
entries.push_back({ ATTR_COLOR0, 4, GL_UNSIGNED_BYTE, GL_TRUE, offsetof(TransformedVertex, color0) });
entries.push_back({ ATTR_COLOR1, 3, GL_UNSIGNED_BYTE, GL_TRUE, offsetof(TransformedVertex, color1) });
entries.push_back({ ATTR_NORMAL, 1, GL_FLOAT, GL_FALSE, offsetof(TransformedVertex, fog) });
softwareInputLayout_ = render_->CreateInputLayout(entries, vertexSize);
draw_->SetInvalidationCallback(std::bind(&DrawEngineGLES::Invalidate, this, std::placeholders::_1));
}
@ -189,7 +189,7 @@ static const GlTypeInfo GLComp[] = {
{GL_UNSIGNED_SHORT, 4, GL_TRUE},// DEC_U16_4,
};
static inline void VertexAttribSetup(int attrib, int fmt, int stride, int offset, std::vector<GLRInputLayout::Entry> &entries) {
static inline void VertexAttribSetup(int attrib, int fmt, int offset, std::vector<GLRInputLayout::Entry> &entries) {
if (fmt) {
const GlTypeInfo &type = GLComp[fmt];
GLRInputLayout::Entry entry;
@ -197,7 +197,6 @@ static inline void VertexAttribSetup(int attrib, int fmt, int stride, int offset
entry.location = attrib;
entry.normalized = type.normalized;
entry.type = type.type;
entry.stride = stride;
entry.count = type.count;
entries.push_back(entry);
}
@ -212,15 +211,15 @@ GLRInputLayout *DrawEngineGLES::SetupDecFmtForDraw(LinkedShader *program, const
}
std::vector<GLRInputLayout::Entry> entries;
VertexAttribSetup(ATTR_W1, decFmt.w0fmt, decFmt.stride, decFmt.w0off, entries);
VertexAttribSetup(ATTR_W2, decFmt.w1fmt, decFmt.stride, decFmt.w1off, entries);
VertexAttribSetup(ATTR_TEXCOORD, decFmt.uvfmt, decFmt.stride, decFmt.uvoff, entries);
VertexAttribSetup(ATTR_COLOR0, decFmt.c0fmt, decFmt.stride, decFmt.c0off, entries);
VertexAttribSetup(ATTR_COLOR1, decFmt.c1fmt, decFmt.stride, decFmt.c1off, entries);
VertexAttribSetup(ATTR_NORMAL, decFmt.nrmfmt, decFmt.stride, decFmt.nrmoff, entries);
VertexAttribSetup(ATTR_POSITION, decFmt.posfmt, decFmt.stride, decFmt.posoff, entries);
VertexAttribSetup(ATTR_W1, decFmt.w0fmt, decFmt.w0off, entries);
VertexAttribSetup(ATTR_W2, decFmt.w1fmt, decFmt.w1off, entries);
VertexAttribSetup(ATTR_TEXCOORD, decFmt.uvfmt, decFmt.uvoff, entries);
VertexAttribSetup(ATTR_COLOR0, decFmt.c0fmt, decFmt.c0off, entries);
VertexAttribSetup(ATTR_COLOR1, decFmt.c1fmt, decFmt.c1off, entries);
VertexAttribSetup(ATTR_NORMAL, decFmt.nrmfmt, decFmt.nrmoff, entries);
VertexAttribSetup(ATTR_POSITION, decFmt.posfmt, decFmt.posoff, entries);
inputLayout = render_->CreateInputLayout(entries);
inputLayout = render_->CreateInputLayout(entries, decFmt.stride);
inputLayoutMap_.Insert(key, inputLayout);
return inputLayout;
}