Compare commits

...
Sign in to create a new pull request.

3 commits

Author SHA1 Message Date
Henrik Rydgård
a989b08648 Plumb through a parameter for creating framebuffers of different color formats 2023-02-03 13:39:51 +01:00
Henrik Rydgård
6c9963bd26 Remove a step of confusing indentation in TextureCacheCommon::LoadClut 2023-02-03 13:26:42 +01:00
Henrik Rydgård
49f830d88c Small refactor 2023-02-03 13:21:20 +01:00
14 changed files with 244 additions and 180 deletions

View file

@ -1325,7 +1325,17 @@ Framebuffer *D3D11DrawContext::CreateFramebuffer(const FramebufferDesc &desc) {
// Texture arrays are supported but we don't have any other use cases yet.
_dbg_assert_(desc.numLayers == 1);
fb->colorFormat = DXGI_FORMAT_R8G8B8A8_UNORM;
DXGI_FORMAT colorFormat;
switch (desc.colorFormat) {
case DataFormat::R8G8B8A8_UNORM:
colorFormat = DXGI_FORMAT_R8G8B8A8_UNORM;
break;
default:
_assert_msg_(false, "Framebuffer format not supported");
return nullptr;
}
fb->colorFormat = colorFormat;
D3D11_TEXTURE2D_DESC descColor{};
descColor.Width = desc.width;
descColor.Height = desc.height;

View file

@ -1264,7 +1264,18 @@ Framebuffer *D3D9Context::CreateFramebuffer(const FramebufferDesc &desc) {
D3D9Framebuffer *fbo = new D3D9Framebuffer(desc.width, desc.height);
fbo->depthstenciltex = nullptr;
HRESULT rtResult = device_->CreateTexture(desc.width, desc.height, 1, D3DUSAGE_RENDERTARGET, D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &fbo->tex, nullptr);
D3DFORMAT colorFormat;
switch (desc.colorFormat) {
case DataFormat::R8G8B8A8_UNORM:
// We pretend to support this, although in reality we use the reverse format.
colorFormat = D3DFMT_A8R8G8B8;
break;
default:
_assert_msg_(false, "Framebuffer format not supported");
return nullptr;
}
HRESULT rtResult = device_->CreateTexture(desc.width, desc.height, 1, D3DUSAGE_RENDERTARGET, colorFormat, D3DPOOL_DEFAULT, &fbo->tex, nullptr);
if (FAILED(rtResult)) {
ERROR_LOG(G3D, "Failed to create render target");
fbo->Release();

View file

@ -517,7 +517,22 @@ void GLQueueRunner::InitCreateFramebuffer(const GLRInitStep &step) {
// Color texture is same everywhere
glGenFramebuffers(1, &fbo->handle);
initFBOTexture(fbo->color_texture, GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE, true);
GLint colorInternalFormat;
GLint colorFormat;
GLint colorElementType;
switch (fbo->colorFormat) {
case Draw::DataFormat::R8G8B8A8_UNORM:
colorInternalFormat = GL_RGBA;
colorFormat = GL_RGBA;
colorElementType = GL_UNSIGNED_BYTE;
break;
default:
_assert_msg_(false, "Data format not supported");
return;
}
initFBOTexture(fbo->color_texture, colorInternalFormat, colorFormat, colorElementType, true);
retry_depth:
if (!fbo->z_stencil_) {

View file

@ -106,6 +106,7 @@ void GLDeleter::Perform(GLRenderManager *renderManager, bool skipGLCalls) {
framebuffer->z_stencil_texture.texture = 0;
framebuffer->z_buffer = 0;
framebuffer->stencil_buffer = 0;
framebuffer->colorFormat = Draw::DataFormat::UNDEFINED;
}
delete framebuffer;
}

View file

@ -51,10 +51,9 @@ public:
class GLRFramebuffer {
public:
GLRFramebuffer(const Draw::DeviceCaps &caps, int _width, int _height, bool z_stencil)
GLRFramebuffer(const Draw::DeviceCaps &caps, Draw::DataFormat _colorFormat, int _width, int _height, bool z_stencil)
: color_texture(caps, _width, _height, 1, 1), z_stencil_texture(caps, _width, _height, 1, 1),
width(_width), height(_height), z_stencil_(z_stencil) {
}
colorFormat(_colorFormat), width(_width), height(_height), z_stencil_(z_stencil) {}
~GLRFramebuffer();
@ -65,6 +64,7 @@ public:
GLRTexture z_stencil_texture;
GLuint z_buffer = 0;
GLuint stencil_buffer = 0;
Draw::DataFormat colorFormat;
int width;
int height;
@ -464,9 +464,9 @@ public:
return step.create_shader.shader;
}
GLRFramebuffer *CreateFramebuffer(int width, int height, bool z_stencil) {
GLRFramebuffer *CreateFramebuffer(Draw::DataFormat colorFormat, int width, int height, bool z_stencil) {
GLRInitStep step{ GLRInitStepType::CREATE_FRAMEBUFFER };
step.create_framebuffer.framebuffer = new GLRFramebuffer(caps_, width, height, z_stencil);
step.create_framebuffer.framebuffer = new GLRFramebuffer(caps_, colorFormat, width, height, z_stencil);
initSteps_.push_back(step);
return step.create_framebuffer.framebuffer;
}

View file

@ -1437,7 +1437,7 @@ Framebuffer *OpenGLContext::CreateFramebuffer(const FramebufferDesc &desc) {
// TODO: Support multiview later. (It's our only use case for multi layers).
_dbg_assert_(desc.numLayers == 1);
GLRFramebuffer *framebuffer = renderManager_.CreateFramebuffer(desc.width, desc.height, desc.z_stencil);
GLRFramebuffer *framebuffer = renderManager_.CreateFramebuffer(desc.colorFormat, desc.width, desc.height, desc.z_stencil);
OpenGLFramebuffer *fbo = new OpenGLFramebuffer(&renderManager_, framebuffer);
return fbo;
}

View file

@ -35,12 +35,12 @@ void VKRImage::Delete(VulkanContext *vulkan) {
}
}
VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag)
VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, VkFormat colorFormat, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag)
: vulkan_(vk), tag_(tag), width(_width), height(_height), numLayers(_numLayers) {
_dbg_assert_(tag);
CreateImage(vulkan_, initCmd, color, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
CreateImage(vulkan_, initCmd, color, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, colorFormat, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
if (createDepthStencilBuffer) {
CreateImage(vulkan_, initCmd, depth, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
}

View file

@ -58,7 +58,7 @@ struct VKRImage {
class VKRFramebuffer {
public:
VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag);
VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, VkFormat colorFormat, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag);
~VKRFramebuffer();
VkFramebuffer Get(VKRRenderPass *compatibleRenderPass, RenderPassType rpType);

View file

@ -415,7 +415,8 @@ public:
void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override;
bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override;
bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, const char *tag) override;
DataFormat PreferredFramebufferReadbackFormat(Framebuffer *src) override;
DataFormat PreferredColorReadbackFormat(Framebuffer *src) override;
DataFormat PreferredDepthReadbackFormat(Framebuffer *src) override;
// These functions should be self explanatory.
void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override;
@ -1602,8 +1603,12 @@ Framebuffer *VKContext::CreateFramebuffer(const FramebufferDesc &desc) {
_assert_(desc.width > 0);
_assert_(desc.height > 0);
_assert_(desc.colorFormat == DataFormat::R8G8B8A8_UNORM || desc.colorFormat == DataFormat::R16_UNORM);
VkFormat colorFormat = DataFormatToVulkan(desc.colorFormat);
VkCommandBuffer cmd = renderManager_.GetInitCmd();
VKRFramebuffer *vkrfb = new VKRFramebuffer(vulkan_, cmd, renderManager_.GetQueueRunner()->GetCompatibleRenderPass(), desc.width, desc.height, desc.numLayers, desc.multiSampleLevel, desc.z_stencil, desc.tag);
VKRFramebuffer *vkrfb = new VKRFramebuffer(vulkan_, cmd, renderManager_.GetQueueRunner()->GetCompatibleRenderPass(), colorFormat, desc.width, desc.height, desc.numLayers, desc.multiSampleLevel, desc.z_stencil, desc.tag);
return new VKFramebuffer(vkrfb, desc.multiSampleLevel);
}
@ -1643,15 +1648,19 @@ bool VKContext::CopyFramebufferToMemorySync(Framebuffer *srcfb, int channelBits,
return renderManager_.CopyFramebufferToMemorySync(src ? src->GetFB() : nullptr, aspectMask, x, y, w, h, format, (uint8_t *)pixels, pixelStride, tag);
}
DataFormat VKContext::PreferredFramebufferReadbackFormat(Framebuffer *src) {
DataFormat VKContext::PreferredColorReadbackFormat(Framebuffer *src) {
if (src) {
return DrawContext::PreferredFramebufferReadbackFormat(src);
return DrawContext::PreferredColorReadbackFormat(src);
}
if (vulkan_->GetSwapchainFormat() == VK_FORMAT_B8G8R8A8_UNORM) {
return Draw::DataFormat::B8G8R8A8_UNORM;
}
return DrawContext::PreferredFramebufferReadbackFormat(src);
return DrawContext::PreferredColorReadbackFormat(src);
}
DataFormat VKContext::PreferredDepthReadbackFormat(Framebuffer *src) {
return Draw::DataFormat::R16_UNORM;
}
void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) {

View file

@ -295,6 +295,7 @@ enum class Event {
constexpr uint32_t MAX_TEXTURE_SLOTS = 3;
struct FramebufferDesc {
DataFormat colorFormat;
int width;
int height;
int depth;
@ -696,9 +697,13 @@ public:
virtual bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, const char *tag) {
return false;
}
virtual DataFormat PreferredFramebufferReadbackFormat(Framebuffer *src) {
virtual DataFormat PreferredColorReadbackFormat(Framebuffer *src) {
return DataFormat::R8G8B8A8_UNORM;
}
virtual DataFormat PreferredDepthReadbackFormat(Framebuffer *src) {
// We use a shader to read depth and write color, while scaling.
return DataFormat::R16_UNORM;
}
// These functions should be self explanatory.
// Binding a zero render target means binding the backbuffer.

View file

@ -1689,7 +1689,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w,
char tag[128];
size_t len = FormatFramebufferName(vfb, tag, sizeof(tag));
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), msaaLevel_, true, tag });
vfb->fbo = draw_->CreateFramebuffer({ colorFormat_, vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), msaaLevel_, true, tag });
if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) {
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, ColorBufferByteSize(vfb), tag, len);
}
@ -2060,7 +2060,7 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd
char name[64];
snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address);
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, name });
vfb->fbo = draw_->CreateFramebuffer({ colorFormat_, vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, name });
vfbs_.push_back(vfb);
u32 byteSize = ColorBufferByteSize(vfb);
@ -2112,8 +2112,7 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram
char name[64];
snprintf(name, sizeof(name), "download_temp");
// TODO: We don't have a way to create a depth-only framebuffer yet.
// Also, at least on Vulkan we always create both depth and color, need to rework how we handle renderpasses.
nvfb->fbo = draw_->CreateFramebuffer({ nvfb->bufferWidth, nvfb->bufferHeight, 1, 1, 0, channel == RASTER_DEPTH ? true : false, name });
nvfb->fbo = draw_->CreateFramebuffer({ colorFormat_, nvfb->bufferWidth, nvfb->bufferHeight, 1, 1, 0, channel == RASTER_DEPTH ? true : false, name });
if (!nvfb->fbo) {
ERROR_LOG(FRAMEBUF, "Error creating FBO! %d x %d", nvfb->renderWidth, nvfb->renderHeight);
delete nvfb;
@ -2496,7 +2495,7 @@ static const char *TempFBOReasonToString(TempFBO reason) {
}
Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u16 h) {
u64 key = ((u64)reason << 48) | ((u32)w << 16) | h;
u64 key = ((u64)reason << 48) | ((u64)w << 16) | h;
auto it = tempFBOs_.find(key);
if (it != tempFBOs_.end()) {
it->second.last_frame_used = gpuStats.numFlips;
@ -2507,7 +2506,7 @@ Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u
char name[128];
snprintf(name, sizeof(name), "tempfbo_%s_%dx%d", TempFBOReasonToString(reason), w / renderScaleFactor_, h / renderScaleFactor_);
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, GetFramebufferLayers(), 0, z_stencil, name });
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ colorFormat_, w, h, 1, GetFramebufferLayers(), 0, z_stencil, name });
if (!fbo) {
return nullptr;
}
@ -2699,7 +2698,7 @@ bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, G
bool FramebufferManagerCommon::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
int w, h;
draw_->GetFramebufferDimensions(nullptr, &w, &h);
Draw::DataFormat fmt = draw_->PreferredFramebufferReadbackFormat(nullptr);
Draw::DataFormat fmt = draw_->PreferredColorReadbackFormat(nullptr);
// Ignore preferred formats other than BGRA.
if (fmt != Draw::DataFormat::B8G8R8A8_UNORM)
fmt = Draw::DataFormat::R8G8B8A8_UNORM;
@ -2717,19 +2716,14 @@ bool FramebufferManagerCommon::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
// (Except using the GPU might cause problems because of various implementations'
// dithering behavior and games that expect exact colors like Danganronpa, so we
// can't entirely be rid of the CPU path.) -- unknown
void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel) {
void FramebufferManagerCommon::ReadbackFramebufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, RasterChannel channel, Draw::DataFormat destFormat, u32 fb_address, u32 stride) {
if (w <= 0 || h <= 0) {
ERROR_LOG(G3D, "Bad inputs to ReadbackFramebufferSync: %d %d %d %d", x, y, w, h);
return;
}
const u32 fb_address = channel == RASTER_COLOR ? vfb->fb_address : vfb->z_address;
Draw::DataFormat destFormat = channel == RASTER_COLOR ? GEFormatToThin3D(vfb->fb_format) : GEFormatToThin3D(GE_FORMAT_DEPTH16);
const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
int stride = channel == RASTER_COLOR ? vfb->fb_stride : vfb->z_stride;
const int dstByteOffset = (y * stride + x) * dstBpp;
// Leave the gap between the end of the last line and the full stride.
// This is only used for the NotifyMemInfo range.
@ -2747,14 +2741,14 @@ void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb,
DEBUG_LOG(G3D, "Reading framebuffer to mem, fb_address = %08x, ptr=%p", fb_address, destPtr);
if (channel == RASTER_DEPTH) {
_assert_msg_(vfb && vfb->z_address != 0 && vfb->z_stride != 0, "Depth buffer invalid");
ReadbackDepthbufferSync(vfb->fbo, x, y, w, h, (uint16_t *)destPtr, stride);
// _assert_msg_(vfb && vfb->z_address != 0 && vfb->z_stride != 0, "Depth buffer invalid");
ReadbackDepthbufferSync(fbo, x, y, w, h, (uint16_t *)destPtr, stride);
} else {
draw_->CopyFramebufferToMemorySync(vfb->fbo, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, stride, "ReadbackFramebufferSync");
draw_->CopyFramebufferToMemorySync(fbo, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, stride, "ReadbackFramebufferSync");
}
char tag[128];
size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->fb_format));
size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%dx%d", fb_address, w, h);
NotifyMemInfo(MemBlockFlags::WRITE, fb_address + dstByteOffset, dstSize, tag, len);
gpuStats.numReadbacks++;
@ -2807,14 +2801,19 @@ void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb,
}
}
u32 address = vfb->Address(channel);
int stride = vfb->Stride(channel);
Draw::DataFormat destFormat = channel == RASTER_COLOR ? GEFormatToThin3D(vfb->fb_format) : GEFormatToThin3D(GE_FORMAT_DEPTH16);
if (vfb->renderWidth == vfb->width && vfb->renderHeight == vfb->height) {
// No need to stretch-blit
ReadbackFramebufferSync(vfb, x, y, w, h, channel);
ReadbackFramebufferSync(vfb->fbo, x, y, w, h, channel, destFormat, address, stride);
} else {
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb, channel);
if (nvfb) {
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, channel, "Blit_ReadFramebufferToMemory");
ReadbackFramebufferSync(nvfb, x, y, w, h, channel);
ReadbackFramebufferSync(nvfb->fbo, x, y, w, h, channel, destFormat, address, stride);
}
}
@ -2837,7 +2836,7 @@ void FramebufferManagerCommon::FlushBeforeCopy() {
}
}
// TODO: Replace with with depal, reading the palette from the texture on the GPU directly.
// In practice, this has been replaced with depal, reading the palette from the texture on the GPU directly.
void FramebufferManagerCommon::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) {
VirtualFramebuffer *vfb = GetVFBAt(fb_address);
if (vfb && vfb->fb_stride != 0) {
@ -2871,7 +2870,7 @@ void FramebufferManagerCommon::DownloadFramebufferForClut(u32 fb_address, u32 lo
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb, RASTER_COLOR);
if (nvfb) {
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, RASTER_COLOR, "Blit_DownloadFramebufferForClut");
ReadbackFramebufferSync(nvfb, x, y, w, h, RASTER_COLOR);
ReadbackFramebufferSync(nvfb->fbo, x, y, w, h, RASTER_COLOR, GEFormatToThin3D(nvfb->fb_format), nvfb->fb_address, nvfb->fb_stride);
}
textureCache_->ForgetLastTexture();
@ -3202,7 +3201,7 @@ VirtualFramebuffer *FramebufferManagerCommon::ResolveFramebufferColorToFormat(Vi
char tag[128];
FormatFramebufferName(vfb, tag, sizeof(tag));
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, tag });
vfb->fbo = draw_->CreateFramebuffer({ colorFormat_, vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, tag });
vfbs_.push_back(vfb);
}

View file

@ -152,6 +152,9 @@ struct VirtualFramebuffer {
inline int BufferWidthInBytes() const { return bufferWidth * BufferFormatBytesPerPixel(fb_format); }
inline int FbStrideInBytes() const { return fb_stride * BufferFormatBytesPerPixel(fb_format); }
inline int ZStrideInBytes() const { return z_stride * 2; }
inline u32 Address(RasterChannel channel) const { return channel == RASTER_COLOR ? fb_address : z_address; }
inline int Stride(RasterChannel channel) const { return channel == RASTER_COLOR ? fb_stride : z_stride; }
};
struct FramebufferHeuristicParams {
@ -451,7 +454,8 @@ public:
}
protected:
virtual void ReadbackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel);
void ReadbackFramebufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, RasterChannel channel, Draw::DataFormat destFormat, u32 fb_address, u32 stride);
// Used for when a shader is required, such as GLES.
virtual bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride);
virtual bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride);
@ -601,4 +605,6 @@ protected:
Draw2D draw2D_;
// The fragment shaders are "owned" by the pipelines since they're 1:1.
const Draw::DataFormat colorFormat_ = Draw::DataFormat::R8G8B8A8_UNORM;
};

View file

@ -278,7 +278,7 @@ bool PresentationCommon::UpdatePostShader() {
previousIndex_ = 0;
for (int i = 0; i < FRAMES; ++i) {
previousFramebuffers_[i] = draw_->CreateFramebuffer({ w, h, 1, 1, 0, false, "inter_presentation" });
previousFramebuffers_[i] = draw_->CreateFramebuffer({ Draw::DataFormat::R8G8B8A8_UNORM, w, h, 1, 1, 0, false, "inter_presentation" });
if (!previousFramebuffers_[i]) {
DestroyPostShader();
return false;
@ -394,7 +394,7 @@ bool PresentationCommon::AllocateFramebuffer(int w, int h) {
}
// No depth/stencil for post processing
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, 0, false, "presentation" });
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ Draw::DataFormat::R8G8B8A8_UNORM, w, h, 1, 1, 0, false, "presentation" });
if (!fbo) {
return false;
}

View file

@ -1237,158 +1237,166 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
clutTotalBytes_ = loadBytes;
clutRenderAddress_ = 0xFFFFFFFF;
if (Memory::IsValidAddress(clutAddr)) {
if (Memory::IsVRAMAddress(clutAddr)) {
// Clear the uncached and mirror bits, etc. to match framebuffers.
const u32 clutLoadAddr = clutAddr & 0x041FFFFF;
const u32 clutLoadEnd = clutLoadAddr + loadBytes;
static const u32 MAX_CLUT_OFFSET = 4096;
if (!Memory::IsValidAddress(clutAddr)) {
memset(clutBufRaw_, 0x00, loadBytes);
// Reload the clut next time.
clutLastFormat_ = 0xFFFFFFFF;
clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);
return;
}
clutRenderOffset_ = MAX_CLUT_OFFSET;
const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
// Check if it's trying to upload pixels from a framebuffer as a CLUT.
if (Memory::IsVRAMAddress(clutAddr)) {
// Clear the uncached and mirror bits, etc. to match framebuffers.
const u32 clutLoadAddr = clutAddr & 0x041FFFFF;
const u32 clutLoadEnd = clutLoadAddr + loadBytes;
static const u32 MAX_CLUT_OFFSET = 4096;
u32 bestClutAddress = 0xFFFFFFFF;
clutRenderOffset_ = MAX_CLUT_OFFSET;
const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
VirtualFramebuffer *chosenFramebuffer = nullptr;
for (VirtualFramebuffer *framebuffer : framebuffers) {
// Let's not deal with divide by zero.
if (framebuffer->fb_stride == 0)
continue;
u32 bestClutAddress = 0xFFFFFFFF;
const u32 fb_address = framebuffer->fb_address;
const u32 fb_bpp = BufferFormatBytesPerPixel(framebuffer->fb_format);
int offset = clutLoadAddr - fb_address;
VirtualFramebuffer *chosenFramebuffer = nullptr;
for (VirtualFramebuffer *framebuffer : framebuffers) {
// Let's not deal with divide by zero.
if (framebuffer->fb_stride == 0)
continue;
// Is this inside the framebuffer at all? Note that we only check the first line here, this should
// be changed.
bool matchRange = offset >= 0 && offset < (int)(framebuffer->fb_stride * fb_bpp);
if (matchRange) {
// And is it inside the rendered area? Sometimes games pack data in the margin between width and stride.
// If the framebuffer width was detected as 512, we're gonna assume it's really 480.
int fbMatchWidth = framebuffer->width;
if (fbMatchWidth == 512) {
fbMatchWidth = 480;
}
bool inMargin = ((offset / fb_bpp) % framebuffer->fb_stride) == fbMatchWidth;
const u32 fb_address = framebuffer->fb_address;
const u32 fb_bpp = BufferFormatBytesPerPixel(framebuffer->fb_format);
int offset = clutLoadAddr - fb_address;
// The offset check here means, in the context of the loop, that we'll pick
// the framebuffer with the smallest offset. This is yet another framebuffer matching
// loop with its own rules, eventually we'll probably want to do something
// more systematic.
if (matchRange && !inMargin && offset < (int)clutRenderOffset_) {
WARN_LOG_N_TIMES(clutfb, 5, G3D, "Detected LoadCLUT(%d bytes) from framebuffer %08x (%s), byte offset %d", loadBytes, fb_address, GeBufferFormatToString(framebuffer->fb_format), offset);
framebuffer->last_frame_clut = gpuStats.numFlips;
// Also mark used so it's not decimated.
framebuffer->last_frame_used = gpuStats.numFlips;
framebuffer->usageFlags |= FB_USAGE_CLUT;
bestClutAddress = framebuffer->fb_address;
clutRenderOffset_ = (u32)offset;
chosenFramebuffer = framebuffer;
if (offset == 0) {
// Not gonna find a better match according to the smallest-offset rule, so we'll go with this one.
break;
}
// Is this inside the framebuffer at all? Note that we only check the first line here, this should
// be changed.
bool matchRange = offset >= 0 && offset < (int)(framebuffer->fb_stride * fb_bpp);
if (matchRange) {
// And is it inside the rendered area? Sometimes games pack data in the margin between width and stride.
// If the framebuffer width was detected as 512, we're gonna assume it's really 480.
int fbMatchWidth = framebuffer->width;
if (fbMatchWidth == 512) {
fbMatchWidth = 480;
}
bool inMargin = ((offset / fb_bpp) % framebuffer->fb_stride) == fbMatchWidth;
// The offset check here means, in the context of the loop, that we'll pick
// the framebuffer with the smallest offset. This is yet another framebuffer matching
// loop with its own rules, eventually we'll probably want to do something
// more systematic.
if (matchRange && !inMargin && offset < (int)clutRenderOffset_) {
WARN_LOG_N_TIMES(clutfb, 5, G3D, "Detected LoadCLUT(%d bytes) from framebuffer %08x (%s), byte offset %d", loadBytes, fb_address, GeBufferFormatToString(framebuffer->fb_format), offset);
framebuffer->last_frame_clut = gpuStats.numFlips;
// Also mark used so it's not decimated.
framebuffer->last_frame_used = gpuStats.numFlips;
framebuffer->usageFlags |= FB_USAGE_CLUT;
bestClutAddress = framebuffer->fb_address;
clutRenderOffset_ = (u32)offset;
chosenFramebuffer = framebuffer;
if (offset == 0) {
// Not gonna find a better match according to the smallest-offset rule, so we'll go with this one.
break;
}
}
}
// To turn off dynamic CLUT (for demonstration or testing purposes), add "false &&" to this check.
if (chosenFramebuffer && chosenFramebuffer->fbo) {
clutRenderAddress_ = bestClutAddress;
if (!dynamicClutTemp_) {
Draw::FramebufferDesc desc{};
desc.width = 512;
desc.height = 1;
desc.depth = 1;
desc.z_stencil = false;
desc.numLayers = 1;
desc.multiSampleLevel = 0;
desc.tag = "dynamic_clut";
dynamicClutFbo_ = draw_->CreateFramebuffer(desc);
desc.tag = "dynamic_clut_temp";
dynamicClutTemp_ = draw_->CreateFramebuffer(desc);
}
// We'll need to copy from the offset.
const u32 fb_bpp = BufferFormatBytesPerPixel(chosenFramebuffer->fb_format);
const int totalPixelsOffset = clutRenderOffset_ / fb_bpp;
const int clutYOffset = totalPixelsOffset / chosenFramebuffer->fb_stride;
const int clutXOffset = totalPixelsOffset % chosenFramebuffer->fb_stride;
const int scale = chosenFramebuffer->renderScaleFactor;
// Copy the pixels to our temp clut, scaling down if needed and wrapping.
framebufferManager_->BlitUsingRaster(
chosenFramebuffer->fbo, clutXOffset * scale, clutYOffset * scale, (clutXOffset + 512.0f) * scale, (clutYOffset + 1.0f) * scale,
dynamicClutTemp_, 0.0f, 0.0f, 512.0f, 1.0f,
false, scale, framebufferManager_->Get2DPipeline(DRAW2D_COPY_COLOR_RECT2LIN), "copy_clut_to_temp");
framebufferManager_->RebindFramebuffer("after_copy_clut_to_temp");
clutRenderFormat_ = chosenFramebuffer->fb_format;
}
NotifyMemInfo(MemBlockFlags::ALLOC, clutAddr, loadBytes, "CLUT");
}
// It's possible for a game to load CLUT outside valid memory without crashing, should result in zeroes.
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
_assert_(bytes <= 2048);
bool performDownload = PSP_CoreParameter().compat.flags().AllowDownloadCLUT;
if (GPURecord::IsActive())
performDownload = true;
if (clutRenderAddress_ != 0xFFFFFFFF && performDownload) {
framebufferManager_->DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes);
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
// To turn off dynamic CLUT (for demonstration or testing purposes), add "false &&" to this check.
if (chosenFramebuffer && chosenFramebuffer->fbo) {
clutRenderAddress_ = bestClutAddress;
if (!dynamicClutTemp_) {
Draw::FramebufferDesc desc{};
desc.width = 512;
desc.height = 1;
desc.depth = 1;
desc.z_stencil = false;
desc.numLayers = 1;
desc.multiSampleLevel = 0;
desc.tag = "dynamic_clut";
dynamicClutFbo_ = draw_->CreateFramebuffer(desc);
desc.tag = "dynamic_clut_temp";
dynamicClutTemp_ = draw_->CreateFramebuffer(desc);
}
} else {
// Here we could check for clutRenderAddress_ != 0xFFFFFFFF and zero the CLUT or something,
// but choosing not to for now. Though the results of loading the CLUT from RAM here is
// almost certainly going to be bogus.
#ifdef _M_SSE
if (bytes == loadBytes) {
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
__m128i *dest = (__m128i *)clutBufRaw_;
int numBlocks = bytes / 32;
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
__m128i data1 = _mm_loadu_si128(source);
__m128i data2 = _mm_loadu_si128(source + 1);
_mm_store_si128(dest, data1);
_mm_store_si128(dest + 1, data2);
}
} else {
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
}
#elif PPSSPP_ARCH(ARM_NEON)
if (bytes == loadBytes) {
const uint32_t *source = (const uint32_t *)Memory::GetPointerUnchecked(clutAddr);
uint32_t *dest = (uint32_t *)clutBufRaw_;
int numBlocks = bytes / 32;
for (int i = 0; i < numBlocks; i++, source += 8, dest += 8) {
uint32x4_t data1 = vld1q_u32(source);
uint32x4_t data2 = vld1q_u32(source + 4);
vst1q_u32(dest, data1);
vst1q_u32(dest + 4, data2);
}
} else {
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
}
#else
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
#endif
// We'll need to copy from the offset.
const u32 fb_bpp = BufferFormatBytesPerPixel(chosenFramebuffer->fb_format);
const int totalPixelsOffset = clutRenderOffset_ / fb_bpp;
const int clutYOffset = totalPixelsOffset / chosenFramebuffer->fb_stride;
const int clutXOffset = totalPixelsOffset % chosenFramebuffer->fb_stride;
const int scale = chosenFramebuffer->renderScaleFactor;
// Copy the pixels to our temp clut, scaling down if needed and wrapping.
framebufferManager_->BlitUsingRaster(
chosenFramebuffer->fbo, clutXOffset * scale, clutYOffset * scale, (clutXOffset + 512.0f) * scale, (clutYOffset + 1.0f) * scale,
dynamicClutTemp_, 0.0f, 0.0f, 512.0f, 1.0f,
false, scale, framebufferManager_->Get2DPipeline(DRAW2D_COPY_COLOR_RECT2LIN), "copy_clut_to_temp");
framebufferManager_->RebindFramebuffer("after_copy_clut_to_temp");
clutRenderFormat_ = chosenFramebuffer->fb_format;
}
NotifyMemInfo(MemBlockFlags::ALLOC, clutAddr, loadBytes, "CLUT");
}
// It's possible for a game to load CLUT outside valid memory without crashing, should result in zeroes.
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
_assert_(bytes <= 2048);
bool performDownload = PSP_CoreParameter().compat.flags().AllowDownloadCLUT;
if (GPURecord::IsActive())
performDownload = true;
if (clutRenderAddress_ != 0xFFFFFFFF && performDownload) {
framebufferManager_->DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes);
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
} else {
memset(clutBufRaw_, 0x00, loadBytes);
// The common case.
// Here we could check for clutRenderAddress_ != 0xFFFFFFFF and zero the CLUT or something,
// but choosing not to for now. Though the results of loading the CLUT from RAM here is
// almost certainly going to be bogus.
#ifdef _M_SSE
if (bytes == loadBytes) {
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
__m128i *dest = (__m128i *)clutBufRaw_;
int numBlocks = bytes / 32;
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
__m128i data1 = _mm_loadu_si128(source);
__m128i data2 = _mm_loadu_si128(source + 1);
_mm_store_si128(dest, data1);
_mm_store_si128(dest + 1, data2);
}
} else {
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
}
#elif PPSSPP_ARCH(ARM_NEON)
if (bytes == loadBytes) {
const uint32_t *source = (const uint32_t *)Memory::GetPointerUnchecked(clutAddr);
uint32_t *dest = (uint32_t *)clutBufRaw_;
int numBlocks = bytes / 32;
for (int i = 0; i < numBlocks; i++, source += 8, dest += 8) {
uint32x4_t data1 = vld1q_u32(source);
uint32x4_t data2 = vld1q_u32(source + 4);
vst1q_u32(dest, data1);
vst1q_u32(dest + 4, data2);
}
} else {
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
}
#else
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
#endif
}
// Reload the clut next time.
clutLastFormat_ = 0xFFFFFFFF;
clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);