metal: SDL_RenderFillRects uses one draw call per 16k rectangles (within the given FillRects call), instead of one draw call per rectangle. Reduces CPU usage when drawing many rectangles.
--HG-- extra : amend_source : 1b39afd4eaa31c151f607e4685f079a46fa6238c
This commit is contained in:
parent
15c44098d6
commit
52adb7e81e
1 changed files with 61 additions and 18 deletions
|
@ -117,6 +117,7 @@ typedef struct METAL_ShaderPipelines
|
|||
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplernearest;
|
||||
@property (nonatomic, retain) id<MTLSamplerState> mtlsamplerlinear;
|
||||
@property (nonatomic, retain) id<MTLBuffer> mtlbufconstants;
|
||||
@property (nonatomic, retain) id<MTLBuffer> mtlbufquadindices;
|
||||
@property (nonatomic, retain) CAMetalLayer *mtllayer;
|
||||
@property (nonatomic, retain) MTLRenderPassDescriptor *mtlpassdesc;
|
||||
@property (nonatomic, assign) METAL_ShaderPipelines *activepipelines;
|
||||
|
@ -137,6 +138,7 @@ typedef struct METAL_ShaderPipelines
|
|||
[_mtlsamplernearest release];
|
||||
[_mtlsamplerlinear release];
|
||||
[_mtlbufconstants release];
|
||||
[_mtlbufquadindices release];
|
||||
[_mtllayer release];
|
||||
[_mtlpassdesc release];
|
||||
[super dealloc];
|
||||
|
@ -794,7 +796,6 @@ METAL_QueueDrawPoints(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL
|
|||
static int
|
||||
METAL_QueueFillRects(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_FRect * rects, int count)
|
||||
{
|
||||
// !!! FIXME: use an index buffer
|
||||
const size_t vertlen = (sizeof (float) * 8) * count;
|
||||
float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
|
||||
if (!verts) {
|
||||
|
@ -803,6 +804,11 @@ METAL_QueueFillRects(SDL_Renderer * renderer, SDL_RenderCommand *cmd, const SDL_
|
|||
|
||||
cmd->data.draw.count = count;
|
||||
|
||||
/* Quads in the following vertex order (matches the quad index buffer):
|
||||
* 1---3
|
||||
* | \ |
|
||||
* 0---2
|
||||
*/
|
||||
for (int i = 0; i < count; i++, rects++) {
|
||||
if ((rects->w <= 0.0f) || (rects->h <= 0.0f)) {
|
||||
cmd->data.draw.count--;
|
||||
|
@ -829,9 +835,8 @@ static int
|
|||
METAL_QueueCopy(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture * texture,
|
||||
const SDL_Rect * srcrect, const SDL_FRect * dstrect)
|
||||
{
|
||||
METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
|
||||
const float texw = (float) texturedata.mtltexture.width;
|
||||
const float texh = (float) texturedata.mtltexture.height;
|
||||
const float texw = (float) texture->w;
|
||||
const float texh = (float) texture->h;
|
||||
// !!! FIXME: use an index buffer
|
||||
const size_t vertlen = (sizeof (float) * 16);
|
||||
float *verts = (float *) SDL_AllocateRenderVertices(renderer, vertlen, 0, &cmd->data.draw.first);
|
||||
|
@ -867,9 +872,8 @@ METAL_QueueCopyEx(SDL_Renderer * renderer, SDL_RenderCommand *cmd, SDL_Texture *
|
|||
const SDL_Rect * srcquad, const SDL_FRect * dstrect,
|
||||
const double angle, const SDL_FPoint *center, const SDL_RendererFlip flip)
|
||||
{
|
||||
METAL_TextureData *texturedata = (__bridge METAL_TextureData *)texture->driverdata;
|
||||
const float texw = (float) texturedata.mtltexture.width;
|
||||
const float texh = (float) texturedata.mtltexture.height;
|
||||
const float texw = (float) texture->w;
|
||||
const float texh = (float) texture->h;
|
||||
const float rads = (float)(M_PI * (float) angle / 180.0f);
|
||||
const float c = cosf(rads), s = sinf(rads);
|
||||
float minu, maxu, minv, maxv;
|
||||
|
@ -1159,10 +1163,19 @@ METAL_RunCommandQueue(SDL_Renderer * renderer, SDL_RenderCommand *cmd, void *ver
|
|||
|
||||
case SDL_RENDERCMD_FILL_RECTS: {
|
||||
const size_t count = cmd->data.draw.count;
|
||||
size_t start = 0;
|
||||
const size_t maxcount = UINT16_MAX / 6;
|
||||
SetDrawState(renderer, cmd, SDL_METAL_FRAGMENT_SOLID, CONSTANTS_OFFSET_IDENTITY, mtlbufvertex, &statecache);
|
||||
for (size_t i = 0; i < count; i++, start += 4) { // !!! FIXME: can we do all of these this with a single draw call, using MTLPrimitiveTypeTriangle and an index buffer?
|
||||
[data.mtlcmdencoder drawPrimitives:MTLPrimitiveTypeTriangleStrip vertexStart:start vertexCount:4];
|
||||
/* Our index buffer has 16 bit indices, so we can only draw 65k
|
||||
* vertices (16k rects) at a time. */
|
||||
for (size_t i = 0; i < count; i += maxcount) {
|
||||
/* Set the vertex buffer offset for our current positions.
|
||||
* The vertex buffer itself was bound in SetDrawState. */
|
||||
[data.mtlcmdencoder setVertexBufferOffset:cmd->data.draw.first + i*sizeof(float)*8 atIndex:0];
|
||||
[data.mtlcmdencoder drawIndexedPrimitives:MTLPrimitiveTypeTriangle
|
||||
indexCount:SDL_min(maxcount, count - i) * 6
|
||||
indexType:MTLIndexTypeUInt16
|
||||
indexBuffer:data.mtlbufquadindices
|
||||
indexBufferOffset:0];
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -1424,11 +1437,6 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
|
|||
#if !__has_feature(objc_arc)
|
||||
[mtlbufconstantstaging autorelease];
|
||||
#endif
|
||||
mtlbufconstantstaging.label = @"SDL constant staging data";
|
||||
|
||||
id<MTLBuffer> mtlbufconstants = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModePrivate];
|
||||
data.mtlbufconstants = mtlbufconstants;
|
||||
data.mtlbufconstants.label = @"SDL constant data";
|
||||
|
||||
char *constantdata = [mtlbufconstantstaging contents];
|
||||
SDL_memcpy(constantdata + CONSTANTS_OFFSET_IDENTITY, identitytransform, sizeof(identitytransform));
|
||||
|
@ -1437,10 +1445,42 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
|
|||
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT601, decodetransformBT601, sizeof(decodetransformBT601));
|
||||
SDL_memcpy(constantdata + CONSTANTS_OFFSET_DECODE_BT709, decodetransformBT709, sizeof(decodetransformBT709));
|
||||
|
||||
int quadcount = UINT16_MAX / 4;
|
||||
size_t indicessize = sizeof(UInt16) * quadcount * 6;
|
||||
id<MTLBuffer> mtlbufquadindicesstaging = [data.mtldevice newBufferWithLength:indicessize options:MTLResourceStorageModeShared];
|
||||
#if !__has_feature(objc_arc)
|
||||
[mtlbufquadindicesstaging autorelease];
|
||||
#endif
|
||||
|
||||
/* Quads in the following vertex order (matches the FillRects vertices):
|
||||
* 1---3
|
||||
* | \ |
|
||||
* 0---2
|
||||
*/
|
||||
UInt16 *indexdata = [mtlbufquadindicesstaging contents];
|
||||
for (int i = 0; i < quadcount; i++) {
|
||||
indexdata[i * 6 + 0] = i * 4 + 0;
|
||||
indexdata[i * 6 + 1] = i * 4 + 1;
|
||||
indexdata[i * 6 + 2] = i * 4 + 2;
|
||||
|
||||
indexdata[i * 6 + 3] = i * 4 + 2;
|
||||
indexdata[i * 6 + 4] = i * 4 + 1;
|
||||
indexdata[i * 6 + 5] = i * 4 + 3;
|
||||
}
|
||||
|
||||
id<MTLBuffer> mtlbufconstants = [data.mtldevice newBufferWithLength:CONSTANTS_LENGTH options:MTLResourceStorageModePrivate];
|
||||
data.mtlbufconstants = mtlbufconstants;
|
||||
data.mtlbufconstants.label = @"SDL constant data";
|
||||
|
||||
id<MTLBuffer> mtlbufquadindices = [data.mtldevice newBufferWithLength:indicessize options:MTLResourceStorageModePrivate];
|
||||
data.mtlbufquadindices = mtlbufquadindices;
|
||||
data.mtlbufquadindices.label = @"SDL quad index buffer";
|
||||
|
||||
id<MTLCommandBuffer> cmdbuffer = [data.mtlcmdqueue commandBuffer];
|
||||
id<MTLBlitCommandEncoder> blitcmd = [cmdbuffer blitCommandEncoder];
|
||||
|
||||
[blitcmd copyFromBuffer:mtlbufconstantstaging sourceOffset:0 toBuffer:data.mtlbufconstants destinationOffset:0 size:CONSTANTS_LENGTH];
|
||||
[blitcmd copyFromBuffer:mtlbufconstantstaging sourceOffset:0 toBuffer:mtlbufconstants destinationOffset:0 size:CONSTANTS_LENGTH];
|
||||
[blitcmd copyFromBuffer:mtlbufquadindicesstaging sourceOffset:0 toBuffer:mtlbufquadindices destinationOffset:0 size:indicessize];
|
||||
|
||||
[blitcmd endEncoding];
|
||||
[cmdbuffer commit];
|
||||
|
@ -1503,8 +1543,10 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
|
|||
#endif
|
||||
#else
|
||||
#ifdef __IPHONE_11_0
|
||||
if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
|
||||
maxtexsize = 16384;
|
||||
if (@available(iOS 11.0, *)) {
|
||||
if ([mtldevice supportsFeatureSet:MTLFeatureSet_iOS_GPUFamily4_v1]) {
|
||||
maxtexsize = 16384;
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
#ifdef __IPHONE_10_0
|
||||
|
@ -1529,6 +1571,7 @@ METAL_CreateRenderer(SDL_Window * window, Uint32 flags)
|
|||
[mtlsamplernearest release];
|
||||
[mtlsamplerlinear release];
|
||||
[mtlbufconstants release];
|
||||
[mtlbufquadindices release];
|
||||
[view release];
|
||||
[data release];
|
||||
[mtldevice release];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue