Merge pull request #15902 from hrydgard/block-refactor
Block transfer code refactor
This commit is contained in:
commit
60d66c20cd
3 changed files with 89 additions and 120 deletions
|
@ -113,12 +113,11 @@ void FramebufferManagerCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, G
|
|||
VirtualFramebuffer *FramebufferManagerCommon::GetVFBAt(u32 addr) const {
|
||||
addr &= 0x3FFFFFFF;
|
||||
VirtualFramebuffer *match = nullptr;
|
||||
for (size_t i = 0; i < vfbs_.size(); ++i) {
|
||||
VirtualFramebuffer *v = vfbs_[i];
|
||||
if (v->fb_address == addr) {
|
||||
for (auto vfb : vfbs_) {
|
||||
if (vfb->fb_address == addr) {
|
||||
// Could check w too but whatever (actually, might very well make sense to do so, depending on context).
|
||||
if (!match || v->last_frame_render > match->last_frame_render) {
|
||||
match = v;
|
||||
if (!match || vfb->last_frame_render > match->last_frame_render) {
|
||||
match = vfb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -182,8 +181,8 @@ void FramebufferManagerCommon::EstimateDrawingSize(u32 fb_address, int fb_stride
|
|||
if (viewport_width != region_width) {
|
||||
// The majority of the time, these are equal. If not, let's check what we know.
|
||||
u32 nearest_address = 0xFFFFFFFF;
|
||||
for (size_t i = 0; i < vfbs_.size(); ++i) {
|
||||
const u32 other_address = vfbs_[i]->fb_address & 0x3FFFFFFF;
|
||||
for (auto vfb : vfbs_) {
|
||||
const u32 other_address = vfb->fb_address & 0x3FFFFFFF;
|
||||
if (other_address > fb_address && other_address < nearest_address) {
|
||||
nearest_address = other_address;
|
||||
}
|
||||
|
@ -319,9 +318,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
|||
|
||||
// Find a matching framebuffer
|
||||
VirtualFramebuffer *vfb = nullptr;
|
||||
for (size_t i = 0; i < vfbs_.size(); ++i) {
|
||||
VirtualFramebuffer *v = vfbs_[i];
|
||||
|
||||
for (auto v : vfbs_) {
|
||||
const u32 bpp = BufferFormatBytesPerPixel(v->fb_format);
|
||||
|
||||
if (params.fb_address == v->fb_address && params.fb_format == v->fb_format && params.fb_stride == v->fb_stride) {
|
||||
|
@ -460,7 +457,7 @@ VirtualFramebuffer *FramebufferManagerCommon::DoSetRenderFrameBuffer(const Frame
|
|||
// TODO: Is it worth trying to upload the depth buffer (only if it wasn't copied above..?)
|
||||
}
|
||||
|
||||
// Let's check for depth buffer overlap. Might be interesting.
|
||||
// Let's check for depth buffer overlap. Might be interesting (not that interesting anymore..)
|
||||
bool sharingReported = false;
|
||||
for (size_t i = 0, end = vfbs_.size(); i < end; ++i) {
|
||||
if (vfbs_[i]->z_stride != 0 && params.fb_address == vfbs_[i]->z_address) {
|
||||
|
@ -1255,8 +1252,7 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) {
|
|||
// "framebuffers" sitting in RAM (created from block transfer or similar) so we only take off the kernel
|
||||
// and uncached bits of the address when comparing.
|
||||
const u32 addr = fbaddr & 0x3FFFFFFF;
|
||||
for (size_t i = 0; i < vfbs_.size(); ++i) {
|
||||
VirtualFramebuffer *v = vfbs_[i];
|
||||
for (auto v : vfbs_) {
|
||||
const u32 v_addr = v->fb_address & 0x3FFFFFFF;
|
||||
const u32 v_size = ColorBufferByteSize(v);
|
||||
if (addr >= v_addr && addr < v_addr + v_size) {
|
||||
|
@ -1531,14 +1527,15 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
|
|||
dst &= 0x3FFFFFFF;
|
||||
src &= 0x3FFFFFFF;
|
||||
|
||||
// TODO: Merge the below into FindTransferFramebuffer
|
||||
|
||||
VirtualFramebuffer *dstBuffer = 0;
|
||||
VirtualFramebuffer *srcBuffer = 0;
|
||||
u32 dstY = (u32)-1;
|
||||
u32 dstH = 0;
|
||||
u32 srcY = (u32)-1;
|
||||
u32 srcH = 0;
|
||||
for (size_t i = 0; i < vfbs_.size(); ++i) {
|
||||
VirtualFramebuffer *vfb = vfbs_[i];
|
||||
for (auto vfb : vfbs_) {
|
||||
if (vfb->fb_stride == 0) {
|
||||
continue;
|
||||
}
|
||||
|
@ -1640,119 +1637,68 @@ bool FramebufferManagerCommon::NotifyFramebufferCopy(u32 src, u32 dst, int size,
|
|||
}
|
||||
}
|
||||
|
||||
// Can't be const, in case it has to create a vfb unfortunately.
|
||||
void FramebufferManagerCommon::FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp) {
|
||||
u32 dstYOffset = -1;
|
||||
u32 dstXOffset = -1;
|
||||
u32 srcYOffset = -1;
|
||||
u32 srcXOffset = -1;
|
||||
int width = srcWidth;
|
||||
int height = srcHeight;
|
||||
void FramebufferManagerCommon::FindTransferFramebuffer(VirtualFramebuffer *&buffer, u32 basePtr, int stride, int &x, int &y, int &width, int &height, int bpp, bool destination) {
|
||||
u32 xOffset = -1;
|
||||
u32 yOffset = -1;
|
||||
int transferWidth = width;
|
||||
int transferHeight = height;
|
||||
|
||||
dstBasePtr &= 0x3FFFFFFF;
|
||||
srcBasePtr &= 0x3FFFFFFF;
|
||||
basePtr &= 0x3FFFFFFF;
|
||||
|
||||
for (size_t i = 0; i < vfbs_.size(); ++i) {
|
||||
VirtualFramebuffer *vfb = vfbs_[i];
|
||||
for (auto vfb : vfbs_) {
|
||||
const u32 vfb_address = vfb->fb_address & 0x3FFFFFFF;
|
||||
const u32 vfb_size = ColorBufferByteSize(vfb);
|
||||
const u32 vfb_bpp = BufferFormatBytesPerPixel(vfb->fb_format);
|
||||
const u32 vfb_byteStride = vfb->fb_stride * vfb_bpp;
|
||||
const u32 vfb_byteWidth = vfb->width * vfb_bpp;
|
||||
|
||||
// These heuristics are a bit annoying.
|
||||
// The goal is to avoid using GPU block transfers for things that ought to be memory.
|
||||
// Maybe we should even check for textures at these places instead?
|
||||
if (vfb_address <= basePtr && basePtr < vfb_address + vfb_size) {
|
||||
const u32 byteOffset = basePtr - vfb_address;
|
||||
const u32 byteStride = stride * bpp;
|
||||
const u32 memYOffset = byteOffset / byteStride;
|
||||
|
||||
if (vfb_address <= dstBasePtr && dstBasePtr < vfb_address + vfb_size) {
|
||||
const u32 byteOffset = dstBasePtr - vfb_address;
|
||||
const u32 byteStride = dstStride * bpp;
|
||||
const u32 yOffset = byteOffset / byteStride;
|
||||
|
||||
// Some games use mismatching bitdepths. But make sure the stride matches.
|
||||
// Some games use mismatching bitdepths. But make sure the stride matches.
|
||||
// If it doesn't, generally this means we detected the framebuffer with too large a height.
|
||||
// Use bufferHeight in case of buffers that resize up and down often per frame (Valkyrie Profile.)
|
||||
bool match = yOffset < dstYOffset && (int)yOffset <= (int)vfb->bufferHeight - dstHeight;
|
||||
|
||||
// TODO: Surely this first comparison should be <= ?
|
||||
// Or does the exact match (byteOffset == 0) case get handled elsewhere?
|
||||
bool match = memYOffset < yOffset && (int)memYOffset <= (int)vfb->bufferHeight - height;
|
||||
if (match && vfb_byteStride != byteStride) {
|
||||
// Grand Knights History copies with a mismatching stride but a full line at a time.
|
||||
// That's why we multiply by height, not width - this copy is a rectangle with the wrong stride but a line with the correct one.
|
||||
// Makes it hard to detect the wrong transfers in e.g. God of War.
|
||||
if (width != dstStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
|
||||
// However, some other games write cluts to framebuffers.
|
||||
// Let's catch this and upload. Otherwise reject the match.
|
||||
match = (vfb->usageFlags & FB_USAGE_CLUT) != 0;
|
||||
if (match) {
|
||||
dstWidth = byteStride * height / vfb_bpp;
|
||||
dstHeight = 1;
|
||||
if (transferWidth != stride || (byteStride * transferHeight != vfb_byteStride && byteStride * transferHeight != vfb_byteWidth)) {
|
||||
if (destination) {
|
||||
// However, some other games write cluts to framebuffers.
|
||||
// Let's catch this and upload. Otherwise reject the match.
|
||||
match = (vfb->usageFlags & FB_USAGE_CLUT) != 0;
|
||||
if (match) {
|
||||
width = byteStride * transferHeight / vfb_bpp;
|
||||
height = 1;
|
||||
}
|
||||
} else {
|
||||
match = false;
|
||||
}
|
||||
} else {
|
||||
dstWidth = byteStride * height / vfb_bpp;
|
||||
dstHeight = 1;
|
||||
width = byteStride * transferHeight / vfb_bpp;
|
||||
height = 1;
|
||||
}
|
||||
} else if (match) {
|
||||
dstWidth = width;
|
||||
dstHeight = height;
|
||||
width = transferWidth;
|
||||
height = transferHeight;
|
||||
}
|
||||
if (match) {
|
||||
dstYOffset = yOffset;
|
||||
dstXOffset = dstStride == 0 ? 0 : (byteOffset / bpp) % dstStride;
|
||||
dstBuffer = vfb;
|
||||
}
|
||||
}
|
||||
if (vfb_address <= srcBasePtr && srcBasePtr < vfb_address + vfb_size) {
|
||||
const u32 byteOffset = srcBasePtr - vfb_address;
|
||||
const u32 byteStride = srcStride * bpp;
|
||||
const u32 yOffset = byteOffset / byteStride;
|
||||
bool match = yOffset < srcYOffset && (int)yOffset <= (int)vfb->bufferHeight - srcHeight;
|
||||
if (match && vfb_byteStride != byteStride) {
|
||||
if (width != srcStride || (byteStride * height != vfb_byteStride && byteStride * height != vfb_byteWidth)) {
|
||||
match = false;
|
||||
} else {
|
||||
srcWidth = byteStride * height / vfb_bpp;
|
||||
srcHeight = 1;
|
||||
}
|
||||
} else if (match) {
|
||||
srcWidth = width;
|
||||
srcHeight = height;
|
||||
}
|
||||
if (match) {
|
||||
srcYOffset = yOffset;
|
||||
srcXOffset = srcStride == 0 ? 0 : (byteOffset / bpp) % srcStride;
|
||||
srcBuffer = vfb;
|
||||
xOffset = stride == 0 ? 0 : (byteOffset / bpp) % stride;
|
||||
yOffset = memYOffset;
|
||||
buffer = vfb;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (srcBuffer && !dstBuffer) {
|
||||
if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB ||
|
||||
(PSP_CoreParameter().compat.flags().IntraVRAMBlockTransferAllowCreateFB &&
|
||||
Memory::IsVRAMAddress(srcBuffer->fb_address) && Memory::IsVRAMAddress(dstBasePtr))) {
|
||||
GEBufferFormat ramFormat;
|
||||
// Try to guess the appropriate format. We only know the bpp from the block transfer command (16 or 32 bit).
|
||||
if (bpp == 4) {
|
||||
// Only one possibility unless it's doing split pixel tricks (which we could detect through stride maybe).
|
||||
ramFormat = GE_FORMAT_8888;
|
||||
} else if (srcBuffer->fb_format != GE_FORMAT_8888) {
|
||||
// We guess that the game will interpret the data the same as it was in the source of the copy.
|
||||
// Seems like a likely good guess, and works in Test Drive Unlimited.
|
||||
ramFormat = srcBuffer->fb_format;
|
||||
} else {
|
||||
// No info left - just fall back to something. But this is definitely split pixel tricks.
|
||||
ramFormat = GE_FORMAT_5551;
|
||||
}
|
||||
dstBuffer = CreateRAMFramebuffer(dstBasePtr, dstWidth, dstHeight, dstStride, ramFormat);
|
||||
}
|
||||
}
|
||||
|
||||
if (dstBuffer)
|
||||
dstBuffer->last_frame_used = gpuStats.numFlips;
|
||||
|
||||
if (dstYOffset != (u32)-1) {
|
||||
dstY += dstYOffset;
|
||||
dstX += dstXOffset;
|
||||
}
|
||||
if (srcYOffset != (u32)-1) {
|
||||
srcY += srcYOffset;
|
||||
srcX += srcXOffset;
|
||||
if (yOffset != (u32)-1) {
|
||||
x += xOffset;
|
||||
y += yOffset;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1956,8 +1902,33 @@ bool FramebufferManagerCommon::NotifyBlockTransferBefore(u32 dstBasePtr, int dst
|
|||
int dstWidth = width;
|
||||
int dstHeight = height;
|
||||
|
||||
// This looks at the compat flags BlockTransferAllowCreateFB*.
|
||||
FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp);
|
||||
// These modify the X/Y/W/H parameters depending on the memory offset of the base pointers from the actual buffers.
|
||||
FindTransferFramebuffer(srcBuffer, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, bpp, false);
|
||||
FindTransferFramebuffer(dstBuffer, dstBasePtr, dstStride, dstX, dstY, dstWidth, dstHeight, bpp, true);
|
||||
|
||||
if (srcBuffer && !dstBuffer) {
|
||||
if (PSP_CoreParameter().compat.flags().BlockTransferAllowCreateFB ||
|
||||
(PSP_CoreParameter().compat.flags().IntraVRAMBlockTransferAllowCreateFB &&
|
||||
Memory::IsVRAMAddress(srcBuffer->fb_address) && Memory::IsVRAMAddress(dstBasePtr))) {
|
||||
GEBufferFormat ramFormat;
|
||||
// Try to guess the appropriate format. We only know the bpp from the block transfer command (16 or 32 bit).
|
||||
if (bpp == 4) {
|
||||
// Only one possibility unless it's doing split pixel tricks (which we could detect through stride maybe).
|
||||
ramFormat = GE_FORMAT_8888;
|
||||
} else if (srcBuffer->fb_format != GE_FORMAT_8888) {
|
||||
// We guess that the game will interpret the data the same as it was in the source of the copy.
|
||||
// Seems like a likely good guess, and works in Test Drive Unlimited.
|
||||
ramFormat = srcBuffer->fb_format;
|
||||
} else {
|
||||
// No info left - just fall back to something. But this is definitely split pixel tricks.
|
||||
ramFormat = GE_FORMAT_5551;
|
||||
}
|
||||
dstBuffer = CreateRAMFramebuffer(dstBasePtr, dstWidth, dstHeight, dstStride, ramFormat);
|
||||
}
|
||||
}
|
||||
|
||||
if (dstBuffer)
|
||||
dstBuffer->last_frame_used = gpuStats.numFlips;
|
||||
|
||||
if (dstBuffer && srcBuffer) {
|
||||
if (srcBuffer == dstBuffer) {
|
||||
|
@ -2032,13 +2003,15 @@ void FramebufferManagerCommon::NotifyBlockTransferAfter(u32 dstBasePtr, int dstS
|
|||
}
|
||||
|
||||
if (MayIntersectFramebuffer(srcBasePtr) || MayIntersectFramebuffer(dstBasePtr)) {
|
||||
// TODO: Figure out how we can avoid repeating the search here.
|
||||
VirtualFramebuffer *dstBuffer = 0;
|
||||
VirtualFramebuffer *srcBuffer = 0;
|
||||
int srcWidth = width;
|
||||
int srcHeight = height;
|
||||
int dstWidth = width;
|
||||
int dstHeight = height;
|
||||
FindTransferFramebuffers(dstBuffer, srcBuffer, dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, dstWidth, dstHeight, bpp);
|
||||
FindTransferFramebuffer(srcBuffer, srcBasePtr, srcStride, srcX, srcY, srcWidth, srcHeight, bpp, false);
|
||||
FindTransferFramebuffer(dstBuffer, dstBasePtr, dstStride, dstX, dstY, dstWidth, dstHeight, bpp, true);
|
||||
|
||||
// A few games use this INSTEAD of actually drawing the video image to the screen, they just blast it to
|
||||
// the backbuffer. Detect this and have the framebuffermanager draw the pixels.
|
||||
|
@ -2499,9 +2472,7 @@ void FramebufferManagerCommon::RebindFramebuffer(const char *tag) {
|
|||
std::vector<FramebufferInfo> FramebufferManagerCommon::GetFramebufferList() const {
|
||||
std::vector<FramebufferInfo> list;
|
||||
|
||||
for (size_t i = 0; i < vfbs_.size(); ++i) {
|
||||
VirtualFramebuffer *vfb = vfbs_[i];
|
||||
|
||||
for (auto vfb : vfbs_) {
|
||||
FramebufferInfo info;
|
||||
info.fb_address = vfb->fb_address;
|
||||
info.z_address = vfb->z_address;
|
||||
|
|
|
@ -275,10 +275,15 @@ public:
|
|||
void UpdateFromMemory(u32 addr, int size, bool safe);
|
||||
void ApplyClearToMemory(int x1, int y1, int x2, int y2, u32 clearColor);
|
||||
bool PerformStencilUpload(u32 addr, int size, StencilUpload flags);
|
||||
|
||||
// Returns true if it's sure this is a direct FBO->FBO transfer and it has already handle it.
|
||||
// In that case we hardly need to actually copy the bytes in VRAM, they will be wrong anyway (unless
|
||||
// read framebuffers is on, in which case this should always return false).
|
||||
// If this returns false, a memory copy will happen and NotifyBlockTransferAfter will be called.
|
||||
bool NotifyBlockTransferBefore(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp, u32 skipDrawReason);
|
||||
|
||||
// This gets called after the memory copy, in case NotifyBlockTransferBefore returned false.
|
||||
// Otherwise it doesn't get called.
|
||||
void NotifyBlockTransferAfter(u32 dstBasePtr, int dstStride, int dstX, int dstY, u32 srcBasePtr, int srcStride, int srcX, int srcY, int w, int h, int bpp, u32 skipDrawReason);
|
||||
|
||||
bool BindFramebufferAsColorTexture(int stage, VirtualFramebuffer *framebuffer, int flags);
|
||||
|
@ -412,7 +417,9 @@ protected:
|
|||
|
||||
bool ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const;
|
||||
void DownloadFramebufferOnSwitch(VirtualFramebuffer *vfb);
|
||||
void FindTransferFramebuffers(VirtualFramebuffer *&dstBuffer, VirtualFramebuffer *&srcBuffer, u32 dstBasePtr, int dstStride, int &dstX, int &dstY, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int &dstWidth, int &dstHeight, int bpp);
|
||||
|
||||
void FindTransferFramebuffer(VirtualFramebuffer *&srcBuffer, u32 srcBasePtr, int srcStride, int &srcX, int &srcY, int &srcWidth, int &srcHeight, int bpp, bool destination);
|
||||
|
||||
VirtualFramebuffer *FindDownloadTempBuffer(VirtualFramebuffer *vfb);
|
||||
virtual void UpdateDownloadTempBuffer(VirtualFramebuffer *nvfb) {}
|
||||
|
||||
|
|
|
@ -2773,15 +2773,6 @@ void GPUCommon::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat f
|
|||
}
|
||||
|
||||
void GPUCommon::DoBlockTransfer(u32 skipDrawReason) {
|
||||
// TODO: This is used a lot to copy data around between render targets and textures,
|
||||
// and also to quickly load textures from RAM to VRAM. So we should do checks like the following:
|
||||
// * Does dstBasePtr point to an existing texture? If so maybe reload it immediately.
|
||||
//
|
||||
// * Does srcBasePtr point to a render target, and dstBasePtr to a texture? If so
|
||||
// either copy between rt and texture or reassign the texture to point to the render target
|
||||
//
|
||||
// etc....
|
||||
|
||||
u32 srcBasePtr = gstate.getTransferSrcAddress();
|
||||
u32 srcStride = gstate.getTransferSrcStride();
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue