Merge pull request #15902 from hrydgard/block-refactor

Block transfer code refactor
This commit is contained in:
Henrik Rydgård 2022-08-25 17:03:26 +02:00 committed by GitHub
commit 60d66c20cd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 89 additions and 120 deletions

View file

@ -113,12 +113,11 @@ void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, G
VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) const {
addr &= 0x3FFFFFFF;
VirtualFramebuffer *match = nullptr;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *v = vfbs_[i];
if (v->fb_address == addr) {
for (auto vfb : vfbs_) {
if (vfb->fb_address == addr) {
// Could check w too but whatever (actually, might very well make sense to do so, depending on context).
if (!match || v->last_frame_render > match->last_frame_render) {
match = v;
if (!match || vfb->last_frame_render > match->last_frame_render) {
match = vfb;
}
}
}
@ -182,8 +181,8 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, int fb_stride
if (viewport_width != region_width) {
// The majority of the time, these are equal. If not, let's check what we know.
u32 nearest_address = 0xFFFFFFFF;
for (size_t i = 0; i < vfbs_.size(); ++i) {
const u32 other_address = vfbs_[i]->fb_address & 0x3FFFFFFF;
for (auto vfb : vfbs_) {
const u32 other_address = vfb->fb_address & 0x3FFFFFFF;
if (other_address > fb_address && other_address < nearest_address) {
nearest_address = other_address;
}
@ -319,9 +318,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
// Find a matching framebuffer
VirtualFramebuffer *vfb = nullptr;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *v = vfbs_[i];
for (auto v : vfbs_) {
const u32 bpp = BufferFormatBytesPerPixel(v->fb_format);
if (params.fb_address == v->fb_address && params.fb_format == v->fb_format && params.fb_stride == v->fb_stride) {
@ -460,7 +457,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
// TODO: Is it worth trying to upload the depth buffer (only if it wasn't copied above..?)
}
// Let's check for depth buffer overlap. Might be interesting.
// Let's check for depth buffer overlap. Might be interesting (not that interesting anymore..)
bool sharingReported = false;
for (size_t i = 0, end = vfbs_.size(); i < end; ++i) {
if (vfbs_[i]->z_stride != 0 && params.fb_address == vfbs_[i]->z_address) {
@ -1255,8 +1252,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
// "framebuffers" sitting in RAM (created from block transfer or similar) so we only take off the kernel
// and uncached bits of the address when comparing.
const u32 addr = fbaddr & 0x3FFFFFFF;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *v = vfbs_[i];
for (auto v : vfbs_) {
const u32 v_addr = v->fb_address & 0x3FFFFFFF;
const u32 v_size = ColorBufferByteSize(v);
if (addr >= v_addr && addr < v_addr + v_size) {
@ -1531,14 +1527,15 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
dst &= 0x3FFFFFFF;
src &= 0x3FFFFFFF;
// TODO: Merge the below into FindTransferFramebuffer
VirtualFramebuffer *dstBuffer = 0;
VirtualFramebuffer *srcBuffer = 0;
u32 dstY = (u32)-1;
u32 dstH = 0;
u32 srcY = (u32)-1;
u32 srcH = 0;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
for (auto vfb : vfbs_) {
if (vfb->fb_stride == 0) {
continue;
}
@ -1640,119 +1637,68 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
}
}
// Can't be const, in case it has to create a vfb unfortunately.
void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) {
u32 dstYOffset = -1;
u32 dstXOffset = -1;
u32 srcYOffset = -1;
u32 srcXOffset = -1;
int width = srcWidth;
int height = srcHeight;
void FramebufferManagerCommon::FindTransferFramebuffer(VirtualFramebuffer *&buffer, u32 basePtr, int stride, int &x, int &y, int &width, int &height, int bpp, bool destination) {
u32 xOffset = -1;
u32 yOffset = -1;
int transferWidth = width;
int transferHeight = height;
dstBasePtr &= 0x3FFFFFFF;
srcBasePtr &= 0x3FFFFFFF;
basePtr &= 0x3FFFFFFF;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
for (auto vfb : vfbs_) {
const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
const u32 vfb_size = ColorBufferByteSize(vfb);
const u32 vfb_bpp = BufferFormatBytesPerPixel(vfb->fb_format);
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
const u32 vfb_byteWidth = vfb->width * vfb_bpp;
// These heuristics are a bit annoying.
// The goal is to avoid using GPU block transfers for things that ought to be memory.
// Maybe we should even check for textures at these places instead?
if (vfb_address <= basePtr && basePtr < vfb_address + vfb_size) {
const u32 byteOffset = basePtr - vfb_address;
const u32 byteStride = stride * bpp;
const u32 memYOffset = byteOffset / byteStride;
if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) {
const u32 byteOffset = dstBasePtr - vfb_address;
const u32 byteStride = dstStride * bpp;
const u32 yOffset = byteOffset / byteStride;
// Some games use mismatching bitdepths. But make sure the stride matches.
// Some games use mismatching bitdepths. But make sure the stride matches.
// If it doesn't, generally this means we detected the framebuffer with too large a height.
// Use bufferHeight in case of buffers that resize up and down often per frame (Valkyrie Profile.)
bool match = yOffset < dstYOffset && (int)yOffset <= (int)vfb->bufferHeight - dstHeight;
// TODO: Surely this first comparison should be <= ?
// Or does the exact match (byteOffset == 0) case get handled elsewhere?
bool match = memYOffset < yOffset && (int)memYOffset <= (int)vfb->bufferHeight - height;
if (match && vfb_byteStride != byteStride) {
// Grand Knights History copies with a mismatching stride but a full line at a time.
// That's why we multiply by height, not width - this copy is a rectangle with the wrong stride but a line with the correct one.
// Makes it hard to detect the wrong transfers in e.g. God of War.
if (width != dstStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
// However, some other games write cluts to framebuffers.
// Let's catch this and upload. Otherwise reject the match.
match = (vfb->usageFlags & FB_USAGE_CLUT) != 0;
if (match) {
dstWidth = byteStride * height / vfb_bpp;
dstHeight = 1;
if (transferWidth != stride || (byteStride * transferHeight != vfb_byteStride && byteStride * transferHeight != vfb_byteWidth)) {
if (destination) {
// However, some other games write cluts to framebuffers.
// Let's catch this and upload. Otherwise reject the match.
match = (vfb->usageFlags & FB_USAGE_CLUT) != 0;
if (match) {
width = byteStride * transferHeight / vfb_bpp;
height = 1;
}
} else {
match = false;
}
} else {
dstWidth = byteStride * height / vfb_bpp;
dstHeight = 1;
width = byteStride * transferHeight / vfb_bpp;
height = 1;
}
} else if (match) {
dstWidth = width;
dstHeight = height;
width = transferWidth;
height = transferHeight;
}
if (match) {
dstYOffset = yOffset;
dstXOffset = dstStride == 0 ? 0 : (byteOffset / bpp) % dstStride;
dstBuffer = vfb;
}
}
if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) {
const u32 byteOffset = srcBasePtr - vfb_address;
const u32 byteStride = srcStride * bpp;
const u32 yOffset = byteOffset / byteStride;
bool match = yOffset < srcYOffset && (int)yOffset <= (int)vfb->bufferHeight - srcHeight;
if (match && vfb_byteStride != byteStride) {
if (width != srcStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
match = false;
} else {
srcWidth = byteStride * height / vfb_bpp;
srcHeight = 1;
}
} else if (match) {
srcWidth = width;
srcHeight = height;
}
if (match) {
srcYOffset = yOffset;
srcXOffset = srcStride == 0 ? 0 : (byteOffset / bpp) % srcStride;
srcBuffer = vfb;
xOffset = stride == 0 ? 0 : (byteOffset / bpp) % stride;
yOffset = memYOffset;
buffer = vfb;
}
}
}
if (srcBuffer && !dstBuffer) {
if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB ||
(PSP_CoreParameter().compat.flags().IntraVRAMBlockTransferAllowCreateFB &&
Memory::IsVRAMAddress(srcBuffer->fb_address) && Memory::IsVRAMAddress(dstBasePtr))) {
GEBufferFormat ramFormat;
// Try to guess the appropriate format. We only know the bpp from the block transfer command (16 or 32 bit).
if (bpp == 4) {
// Only one possibility unless it's doing split pixel tricks (which we could detect through stride maybe).
ramFormat = GE_FORMAT_8888;
} else if (srcBuffer->fb_format != GE_FORMAT_8888) {
// We guess that the game will interpret the data the same as it was in the source of the copy.
// Seems like a likely good guess, and works in Test Drive Unlimited.
ramFormat = srcBuffer->fb_format;
} else {
// No info left - just fall back to something. But this is definitely split pixel tricks.
ramFormat = GE_FORMAT_5551;
}
dstBuffer = CreateRAMFramebuffer(dstBasePtr, dstWidth, dstHeight, dstStride, ramFormat);
}
}
if (dstBuffer)
dstBuffer->last_frame_used = gpuStats.numFlips;
if (dstYOffset != (u32)-1) {
dstY += dstYOffset;
dstX += dstXOffset;
}
if (srcYOffset != (u32)-1) {
srcY += srcYOffset;
srcX += srcXOffset;
if (yOffset != (u32)-1) {
x += xOffset;
y += yOffset;
}
}
@ -1956,8 +1902,33 @@ bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dst
int dstWidth = width;
int dstHeight = height;
// This looks at the compat flags BlockTransferAllowCreateFB*.
FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp);
// These modify the X/Y/W/H parameters depending on the memory offset of the base pointers from the actual buffers.
FindTransferFramebuffer(srcBuffer, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, bpp, false);
FindTransferFramebuffer(dstBuffer, dstBasePtr, dstStride, dstX, dstY, dstWidth, dstHeight, bpp, true);
if (srcBuffer && !dstBuffer) {
if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB ||
(PSP_CoreParameter().compat.flags().IntraVRAMBlockTransferAllowCreateFB &&
Memory::IsVRAMAddress(srcBuffer->fb_address) && Memory::IsVRAMAddress(dstBasePtr))) {
GEBufferFormat ramFormat;
// Try to guess the appropriate format. We only know the bpp from the block transfer command (16 or 32 bit).
if (bpp == 4) {
// Only one possibility unless it's doing split pixel tricks (which we could detect through stride maybe).
ramFormat = GE_FORMAT_8888;
} else if (srcBuffer->fb_format != GE_FORMAT_8888) {
// We guess that the game will interpret the data the same as it was in the source of the copy.
// Seems like a likely good guess, and works in Test Drive Unlimited.
ramFormat = srcBuffer->fb_format;
} else {
// No info left - just fall back to something. But this is definitely split pixel tricks.
ramFormat = GE_FORMAT_5551;
}
dstBuffer = CreateRAMFramebuffer(dstBasePtr, dstWidth, dstHeight, dstStride, ramFormat);
}
}
if (dstBuffer)
dstBuffer->last_frame_used = gpuStats.numFlips;
if (dstBuffer && srcBuffer) {
if (srcBuffer == dstBuffer) {
@ -2032,13 +2003,15 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS
}
if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) {
// TODO: Figure out how we can avoid repeating the search here.
VirtualFramebuffer *dstBuffer = 0;
VirtualFramebuffer *srcBuffer = 0;
int srcWidth = width;
int srcHeight = height;
int dstWidth = width;
int dstHeight = height;
FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp);
FindTransferFramebuffer(srcBuffer, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, bpp, false);
FindTransferFramebuffer(dstBuffer, dstBasePtr, dstStride, dstX, dstY, dstWidth, dstHeight, bpp, true);
// A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to
// the backbuffer. Detect this and have the framebuffermanager draw the pixels.
@ -2499,9 +2472,7 @@ void FramebufferManagerCommon::RebindFramebuffer(const char *tag) {
std::vector<FramebufferInfo> FramebufferManagerCommon::GetFramebufferList() const {
std::vector<FramebufferInfo> list;
for (size_t i = 0; i < vfbs_.size(); ++i) {
VirtualFramebuffer *vfb = vfbs_[i];
for (auto vfb : vfbs_) {
FramebufferInfo info;
info.fb_address = vfb->fb_address;
info.z_address = vfb->z_address;

View file

@ -275,10 +275,15 @@ public:
void UpdateFromMemory(u32 addr, int size, bool safe);
void ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor);
bool PerformStencilUpload(u32 addr, int size, StencilUpload flags);
// Returns true if it's sure this is a direct FBO->FBO transfer and it has already handle it.
// In that case we hardly need to actually copy the bytes in VRAM, they will be wrong anyway (unless
// read framebuffers is on, in which case this should always return false).
// If this returns false, a memory copy will happen and NotifyBlockTransferAfter will be called.
bool NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp, u32 skipDrawReason);
// This gets called after the memory copy, in case NotifyBlockTransferBefore returned false.
// Otherwise it doesn't get called.
void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp, u32 skipDrawReason);
bool BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags);
@ -412,7 +417,9 @@ protected:
bool ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const;
void DownloadFramebufferOnSwitch(VirtualFramebuffer *vfb);
void FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp);
void FindTransferFramebuffer(VirtualFramebuffer *&srcBuffer, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int bpp, bool destination);
VirtualFramebuffer *FindDownloadTempBuffer(VirtualFramebuffer *vfb);
virtual void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) {}

View file

@ -2773,15 +2773,6 @@ void GPUCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat f
}
void GPUCommon::DoBlockTransfer(u32 skipDrawReason) {
// TODO: This is used a lot to copy data around between render targets and textures,
// and also to quickly load textures from RAM to VRAM. So we should do checks like the following:
// * Does dstBasePtr point to an existing texture? If so maybe reload it immediately.
//
// * Does srcBasePtr point to a render target, and dstBasePtr to a texture? If so
// either copy between rt and texture or reassign the texture to point to the render target
//
// etc....
u32 srcBasePtr = gstate.getTransferSrcAddress();
u32 srcStride = gstate.getTransferSrcStride();