Compare commits
3 commits
master
...
minor-read
Author | SHA1 | Date | |
---|---|---|---|
|
a989b08648 | ||
|
6c9963bd26 | ||
|
49f830d88c |
14 changed files with 244 additions and 180 deletions
|
@ -1325,7 +1325,17 @@ Framebuffer *D3D11DrawContext::CreateFramebuffer(const FramebufferDesc &desc) {
|
||||||
// Texture arrays are supported but we don't have any other use cases yet.
|
// Texture arrays are supported but we don't have any other use cases yet.
|
||||||
_dbg_assert_(desc.numLayers == 1);
|
_dbg_assert_(desc.numLayers == 1);
|
||||||
|
|
||||||
fb->colorFormat = DXGI_FORMAT_R8G8B8A8_UNORM;
|
DXGI_FORMAT colorFormat;
|
||||||
|
switch (desc.colorFormat) {
|
||||||
|
case DataFormat::R8G8B8A8_UNORM:
|
||||||
|
colorFormat = DXGI_FORMAT_R8G8B8A8_UNORM;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
_assert_msg_(false, "Framebuffer format not supported");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
fb->colorFormat = colorFormat;
|
||||||
D3D11_TEXTURE2D_DESC descColor{};
|
D3D11_TEXTURE2D_DESC descColor{};
|
||||||
descColor.Width = desc.width;
|
descColor.Width = desc.width;
|
||||||
descColor.Height = desc.height;
|
descColor.Height = desc.height;
|
||||||
|
|
|
@ -1264,7 +1264,18 @@ Framebuffer *D3D9Context::CreateFramebuffer(const FramebufferDesc &desc) {
|
||||||
D3D9Framebuffer *fbo = new D3D9Framebuffer(desc.width, desc.height);
|
D3D9Framebuffer *fbo = new D3D9Framebuffer(desc.width, desc.height);
|
||||||
fbo->depthstenciltex = nullptr;
|
fbo->depthstenciltex = nullptr;
|
||||||
|
|
||||||
HRESULT rtResult = device_->CreateTexture(desc.width, desc.height, 1, D3DUSAGE_RENDERTARGET, D3DFMT_A8R8G8B8, D3DPOOL_DEFAULT, &fbo->tex, nullptr);
|
D3DFORMAT colorFormat;
|
||||||
|
switch (desc.colorFormat) {
|
||||||
|
case DataFormat::R8G8B8A8_UNORM:
|
||||||
|
// We pretend to support this, although in reality we use the reverse format.
|
||||||
|
colorFormat = D3DFMT_A8R8G8B8;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
_assert_msg_(false, "Framebuffer format not supported");
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
HRESULT rtResult = device_->CreateTexture(desc.width, desc.height, 1, D3DUSAGE_RENDERTARGET, colorFormat, D3DPOOL_DEFAULT, &fbo->tex, nullptr);
|
||||||
if (FAILED(rtResult)) {
|
if (FAILED(rtResult)) {
|
||||||
ERROR_LOG(G3D, "Failed to create render target");
|
ERROR_LOG(G3D, "Failed to create render target");
|
||||||
fbo->Release();
|
fbo->Release();
|
||||||
|
|
|
@ -517,7 +517,22 @@ void GLQueueRunner::InitCreateFramebuffer(const GLRInitStep &step) {
|
||||||
|
|
||||||
// Color texture is same everywhere
|
// Color texture is same everywhere
|
||||||
glGenFramebuffers(1, &fbo->handle);
|
glGenFramebuffers(1, &fbo->handle);
|
||||||
initFBOTexture(fbo->color_texture, GL_RGBA, GL_RGBA, GL_UNSIGNED_BYTE, true);
|
|
||||||
|
GLint colorInternalFormat;
|
||||||
|
GLint colorFormat;
|
||||||
|
GLint colorElementType;
|
||||||
|
switch (fbo->colorFormat) {
|
||||||
|
case Draw::DataFormat::R8G8B8A8_UNORM:
|
||||||
|
colorInternalFormat = GL_RGBA;
|
||||||
|
colorFormat = GL_RGBA;
|
||||||
|
colorElementType = GL_UNSIGNED_BYTE;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
_assert_msg_(false, "Data format not supported");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
initFBOTexture(fbo->color_texture, colorInternalFormat, colorFormat, colorElementType, true);
|
||||||
|
|
||||||
retry_depth:
|
retry_depth:
|
||||||
if (!fbo->z_stencil_) {
|
if (!fbo->z_stencil_) {
|
||||||
|
|
|
@ -106,6 +106,7 @@ void GLDeleter::Perform(GLRenderManager *renderManager, bool skipGLCalls) {
|
||||||
framebuffer->z_stencil_texture.texture = 0;
|
framebuffer->z_stencil_texture.texture = 0;
|
||||||
framebuffer->z_buffer = 0;
|
framebuffer->z_buffer = 0;
|
||||||
framebuffer->stencil_buffer = 0;
|
framebuffer->stencil_buffer = 0;
|
||||||
|
framebuffer->colorFormat = Draw::DataFormat::UNDEFINED;
|
||||||
}
|
}
|
||||||
delete framebuffer;
|
delete framebuffer;
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,10 +51,9 @@ public:
|
||||||
|
|
||||||
class GLRFramebuffer {
|
class GLRFramebuffer {
|
||||||
public:
|
public:
|
||||||
GLRFramebuffer(const Draw::DeviceCaps &caps, int _width, int _height, bool z_stencil)
|
GLRFramebuffer(const Draw::DeviceCaps &caps, Draw::DataFormat _colorFormat, int _width, int _height, bool z_stencil)
|
||||||
: color_texture(caps, _width, _height, 1, 1), z_stencil_texture(caps, _width, _height, 1, 1),
|
: color_texture(caps, _width, _height, 1, 1), z_stencil_texture(caps, _width, _height, 1, 1),
|
||||||
width(_width), height(_height), z_stencil_(z_stencil) {
|
colorFormat(_colorFormat), width(_width), height(_height), z_stencil_(z_stencil) {}
|
||||||
}
|
|
||||||
|
|
||||||
~GLRFramebuffer();
|
~GLRFramebuffer();
|
||||||
|
|
||||||
|
@ -65,6 +64,7 @@ public:
|
||||||
GLRTexture z_stencil_texture;
|
GLRTexture z_stencil_texture;
|
||||||
GLuint z_buffer = 0;
|
GLuint z_buffer = 0;
|
||||||
GLuint stencil_buffer = 0;
|
GLuint stencil_buffer = 0;
|
||||||
|
Draw::DataFormat colorFormat;
|
||||||
|
|
||||||
int width;
|
int width;
|
||||||
int height;
|
int height;
|
||||||
|
@ -464,9 +464,9 @@ public:
|
||||||
return step.create_shader.shader;
|
return step.create_shader.shader;
|
||||||
}
|
}
|
||||||
|
|
||||||
GLRFramebuffer *CreateFramebuffer(int width, int height, bool z_stencil) {
|
GLRFramebuffer *CreateFramebuffer(Draw::DataFormat colorFormat, int width, int height, bool z_stencil) {
|
||||||
GLRInitStep step{ GLRInitStepType::CREATE_FRAMEBUFFER };
|
GLRInitStep step{ GLRInitStepType::CREATE_FRAMEBUFFER };
|
||||||
step.create_framebuffer.framebuffer = new GLRFramebuffer(caps_, width, height, z_stencil);
|
step.create_framebuffer.framebuffer = new GLRFramebuffer(caps_, colorFormat, width, height, z_stencil);
|
||||||
initSteps_.push_back(step);
|
initSteps_.push_back(step);
|
||||||
return step.create_framebuffer.framebuffer;
|
return step.create_framebuffer.framebuffer;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1437,7 +1437,7 @@ Framebuffer *OpenGLContext::CreateFramebuffer(const FramebufferDesc &desc) {
|
||||||
// TODO: Support multiview later. (It's our only use case for multi layers).
|
// TODO: Support multiview later. (It's our only use case for multi layers).
|
||||||
_dbg_assert_(desc.numLayers == 1);
|
_dbg_assert_(desc.numLayers == 1);
|
||||||
|
|
||||||
GLRFramebuffer *framebuffer = renderManager_.CreateFramebuffer(desc.width, desc.height, desc.z_stencil);
|
GLRFramebuffer *framebuffer = renderManager_.CreateFramebuffer(desc.colorFormat, desc.width, desc.height, desc.z_stencil);
|
||||||
OpenGLFramebuffer *fbo = new OpenGLFramebuffer(&renderManager_, framebuffer);
|
OpenGLFramebuffer *fbo = new OpenGLFramebuffer(&renderManager_, framebuffer);
|
||||||
return fbo;
|
return fbo;
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,12 +35,12 @@ void VKRImage::Delete(VulkanContext *vulkan) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag)
|
VKRFramebuffer::VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, VkFormat colorFormat, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag)
|
||||||
: vulkan_(vk), tag_(tag), width(_width), height(_height), numLayers(_numLayers) {
|
: vulkan_(vk), tag_(tag), width(_width), height(_height), numLayers(_numLayers) {
|
||||||
|
|
||||||
_dbg_assert_(tag);
|
_dbg_assert_(tag);
|
||||||
|
|
||||||
CreateImage(vulkan_, initCmd, color, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, VK_FORMAT_R8G8B8A8_UNORM, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
|
CreateImage(vulkan_, initCmd, color, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, colorFormat, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, true, tag);
|
||||||
if (createDepthStencilBuffer) {
|
if (createDepthStencilBuffer) {
|
||||||
CreateImage(vulkan_, initCmd, depth, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
|
CreateImage(vulkan_, initCmd, depth, width, height, numLayers, VK_SAMPLE_COUNT_1_BIT, vulkan_->GetDeviceInfo().preferredDepthStencilFormat, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, false, tag);
|
||||||
}
|
}
|
||||||
|
|
|
@ -58,7 +58,7 @@ struct VKRImage {
|
||||||
|
|
||||||
class VKRFramebuffer {
|
class VKRFramebuffer {
|
||||||
public:
|
public:
|
||||||
VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag);
|
VKRFramebuffer(VulkanContext *vk, VkCommandBuffer initCmd, VKRRenderPass *compatibleRenderPass, VkFormat colorFormat, int _width, int _height, int _numLayers, int _multiSampleLevel, bool createDepthStencilBuffer, const char *tag);
|
||||||
~VKRFramebuffer();
|
~VKRFramebuffer();
|
||||||
|
|
||||||
VkFramebuffer Get(VKRRenderPass *compatibleRenderPass, RenderPassType rpType);
|
VkFramebuffer Get(VKRRenderPass *compatibleRenderPass, RenderPassType rpType);
|
||||||
|
|
|
@ -415,7 +415,8 @@ public:
|
||||||
void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override;
|
void CopyFramebufferImage(Framebuffer *src, int level, int x, int y, int z, Framebuffer *dst, int dstLevel, int dstX, int dstY, int dstZ, int width, int height, int depth, int channelBits, const char *tag) override;
|
||||||
bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override;
|
bool BlitFramebuffer(Framebuffer *src, int srcX1, int srcY1, int srcX2, int srcY2, Framebuffer *dst, int dstX1, int dstY1, int dstX2, int dstY2, int channelBits, FBBlitFilter filter, const char *tag) override;
|
||||||
bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, const char *tag) override;
|
bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, const char *tag) override;
|
||||||
DataFormat PreferredFramebufferReadbackFormat(Framebuffer *src) override;
|
DataFormat PreferredColorReadbackFormat(Framebuffer *src) override;
|
||||||
|
DataFormat PreferredDepthReadbackFormat(Framebuffer *src) override;
|
||||||
|
|
||||||
// These functions should be self explanatory.
|
// These functions should be self explanatory.
|
||||||
void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override;
|
void BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) override;
|
||||||
|
@ -1602,8 +1603,12 @@ Framebuffer *VKContext::CreateFramebuffer(const FramebufferDesc &desc) {
|
||||||
_assert_(desc.width > 0);
|
_assert_(desc.width > 0);
|
||||||
_assert_(desc.height > 0);
|
_assert_(desc.height > 0);
|
||||||
|
|
||||||
|
_assert_(desc.colorFormat == DataFormat::R8G8B8A8_UNORM || desc.colorFormat == DataFormat::R16_UNORM);
|
||||||
|
|
||||||
|
VkFormat colorFormat = DataFormatToVulkan(desc.colorFormat);
|
||||||
|
|
||||||
VkCommandBuffer cmd = renderManager_.GetInitCmd();
|
VkCommandBuffer cmd = renderManager_.GetInitCmd();
|
||||||
VKRFramebuffer *vkrfb = new VKRFramebuffer(vulkan_, cmd, renderManager_.GetQueueRunner()->GetCompatibleRenderPass(), desc.width, desc.height, desc.numLayers, desc.multiSampleLevel, desc.z_stencil, desc.tag);
|
VKRFramebuffer *vkrfb = new VKRFramebuffer(vulkan_, cmd, renderManager_.GetQueueRunner()->GetCompatibleRenderPass(), colorFormat, desc.width, desc.height, desc.numLayers, desc.multiSampleLevel, desc.z_stencil, desc.tag);
|
||||||
return new VKFramebuffer(vkrfb, desc.multiSampleLevel);
|
return new VKFramebuffer(vkrfb, desc.multiSampleLevel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1643,15 +1648,19 @@ bool VKContext::CopyFramebufferToMemorySync(Framebuffer *srcfb, int channelBits,
|
||||||
return renderManager_.CopyFramebufferToMemorySync(src ? src->GetFB() : nullptr, aspectMask, x, y, w, h, format, (uint8_t *)pixels, pixelStride, tag);
|
return renderManager_.CopyFramebufferToMemorySync(src ? src->GetFB() : nullptr, aspectMask, x, y, w, h, format, (uint8_t *)pixels, pixelStride, tag);
|
||||||
}
|
}
|
||||||
|
|
||||||
DataFormat VKContext::PreferredFramebufferReadbackFormat(Framebuffer *src) {
|
DataFormat VKContext::PreferredColorReadbackFormat(Framebuffer *src) {
|
||||||
if (src) {
|
if (src) {
|
||||||
return DrawContext::PreferredFramebufferReadbackFormat(src);
|
return DrawContext::PreferredColorReadbackFormat(src);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vulkan_->GetSwapchainFormat() == VK_FORMAT_B8G8R8A8_UNORM) {
|
if (vulkan_->GetSwapchainFormat() == VK_FORMAT_B8G8R8A8_UNORM) {
|
||||||
return Draw::DataFormat::B8G8R8A8_UNORM;
|
return Draw::DataFormat::B8G8R8A8_UNORM;
|
||||||
}
|
}
|
||||||
return DrawContext::PreferredFramebufferReadbackFormat(src);
|
return DrawContext::PreferredColorReadbackFormat(src);
|
||||||
|
}
|
||||||
|
|
||||||
|
DataFormat VKContext::PreferredDepthReadbackFormat(Framebuffer *src) {
|
||||||
|
return Draw::DataFormat::R16_UNORM;
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) {
|
void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPassInfo &rp, const char *tag) {
|
||||||
|
|
|
@ -295,6 +295,7 @@ enum class Event {
|
||||||
constexpr uint32_t MAX_TEXTURE_SLOTS = 3;
|
constexpr uint32_t MAX_TEXTURE_SLOTS = 3;
|
||||||
|
|
||||||
struct FramebufferDesc {
|
struct FramebufferDesc {
|
||||||
|
DataFormat colorFormat;
|
||||||
int width;
|
int width;
|
||||||
int height;
|
int height;
|
||||||
int depth;
|
int depth;
|
||||||
|
@ -696,9 +697,13 @@ public:
|
||||||
virtual bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, const char *tag) {
|
virtual bool CopyFramebufferToMemorySync(Framebuffer *src, int channelBits, int x, int y, int w, int h, Draw::DataFormat format, void *pixels, int pixelStride, const char *tag) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
virtual DataFormat PreferredFramebufferReadbackFormat(Framebuffer *src) {
|
virtual DataFormat PreferredColorReadbackFormat(Framebuffer *src) {
|
||||||
return DataFormat::R8G8B8A8_UNORM;
|
return DataFormat::R8G8B8A8_UNORM;
|
||||||
}
|
}
|
||||||
|
virtual DataFormat PreferredDepthReadbackFormat(Framebuffer *src) {
|
||||||
|
// We use a shader to read depth and write color, while scaling.
|
||||||
|
return DataFormat::R16_UNORM;
|
||||||
|
}
|
||||||
|
|
||||||
// These functions should be self explanatory.
|
// These functions should be self explanatory.
|
||||||
// Binding a zero render target means binding the backbuffer.
|
// Binding a zero render target means binding the backbuffer.
|
||||||
|
|
|
@ -1689,7 +1689,7 @@ void FramebufferManagerCommon::ResizeFramebufFBO(VirtualFramebuffer *vfb, int w,
|
||||||
char tag[128];
|
char tag[128];
|
||||||
size_t len = FormatFramebufferName(vfb, tag, sizeof(tag));
|
size_t len = FormatFramebufferName(vfb, tag, sizeof(tag));
|
||||||
|
|
||||||
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), msaaLevel_, true, tag });
|
vfb->fbo = draw_->CreateFramebuffer({ colorFormat_, vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), msaaLevel_, true, tag });
|
||||||
if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) {
|
if (Memory::IsVRAMAddress(vfb->fb_address) && vfb->fb_stride != 0) {
|
||||||
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, ColorBufferByteSize(vfb), tag, len);
|
NotifyMemInfo(MemBlockFlags::ALLOC, vfb->fb_address, ColorBufferByteSize(vfb), tag, len);
|
||||||
}
|
}
|
||||||
|
@ -2060,7 +2060,7 @@ VirtualFramebuffer *FramebufferManagerCommon::CreateRAMFramebuffer(uint32_t fbAd
|
||||||
char name[64];
|
char name[64];
|
||||||
snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address);
|
snprintf(name, sizeof(name), "%08x_color_RAM", vfb->fb_address);
|
||||||
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
|
textureCache_->NotifyFramebuffer(vfb, NOTIFY_FB_CREATED);
|
||||||
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, name });
|
vfb->fbo = draw_->CreateFramebuffer({ colorFormat_, vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, name });
|
||||||
vfbs_.push_back(vfb);
|
vfbs_.push_back(vfb);
|
||||||
|
|
||||||
u32 byteSize = ColorBufferByteSize(vfb);
|
u32 byteSize = ColorBufferByteSize(vfb);
|
||||||
|
@ -2112,8 +2112,7 @@ VirtualFramebuffer *FramebufferManagerCommon::FindDownloadTempBuffer(VirtualFram
|
||||||
char name[64];
|
char name[64];
|
||||||
snprintf(name, sizeof(name), "download_temp");
|
snprintf(name, sizeof(name), "download_temp");
|
||||||
// TODO: We don't have a way to create a depth-only framebuffer yet.
|
// TODO: We don't have a way to create a depth-only framebuffer yet.
|
||||||
// Also, at least on Vulkan we always create both depth and color, need to rework how we handle renderpasses.
|
nvfb->fbo = draw_->CreateFramebuffer({ colorFormat_, nvfb->bufferWidth, nvfb->bufferHeight, 1, 1, 0, channel == RASTER_DEPTH ? true : false, name });
|
||||||
nvfb->fbo = draw_->CreateFramebuffer({ nvfb->bufferWidth, nvfb->bufferHeight, 1, 1, 0, channel == RASTER_DEPTH ? true : false, name });
|
|
||||||
if (!nvfb->fbo) {
|
if (!nvfb->fbo) {
|
||||||
ERROR_LOG(FRAMEBUF, "Error creating FBO! %d x %d", nvfb->renderWidth, nvfb->renderHeight);
|
ERROR_LOG(FRAMEBUF, "Error creating FBO! %d x %d", nvfb->renderWidth, nvfb->renderHeight);
|
||||||
delete nvfb;
|
delete nvfb;
|
||||||
|
@ -2496,7 +2495,7 @@ static const char *TempFBOReasonToString(TempFBO reason) {
|
||||||
}
|
}
|
||||||
|
|
||||||
Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u16 h) {
|
Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u16 h) {
|
||||||
u64 key = ((u64)reason << 48) | ((u32)w << 16) | h;
|
u64 key = ((u64)reason << 48) | ((u64)w << 16) | h;
|
||||||
auto it = tempFBOs_.find(key);
|
auto it = tempFBOs_.find(key);
|
||||||
if (it != tempFBOs_.end()) {
|
if (it != tempFBOs_.end()) {
|
||||||
it->second.last_frame_used = gpuStats.numFlips;
|
it->second.last_frame_used = gpuStats.numFlips;
|
||||||
|
@ -2507,7 +2506,7 @@ Draw::Framebuffer *FramebufferManagerCommon::GetTempFBO(TempFBO reason, u16 w, u
|
||||||
char name[128];
|
char name[128];
|
||||||
snprintf(name, sizeof(name), "tempfbo_%s_%dx%d", TempFBOReasonToString(reason), w / renderScaleFactor_, h / renderScaleFactor_);
|
snprintf(name, sizeof(name), "tempfbo_%s_%dx%d", TempFBOReasonToString(reason), w / renderScaleFactor_, h / renderScaleFactor_);
|
||||||
|
|
||||||
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, GetFramebufferLayers(), 0, z_stencil, name });
|
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ colorFormat_, w, h, 1, GetFramebufferLayers(), 0, z_stencil, name });
|
||||||
if (!fbo) {
|
if (!fbo) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
@ -2699,7 +2698,7 @@ bool FramebufferManagerCommon::GetStencilbuffer(u32 fb_address, int fb_stride, G
|
||||||
bool FramebufferManagerCommon::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
|
bool FramebufferManagerCommon::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
|
||||||
int w, h;
|
int w, h;
|
||||||
draw_->GetFramebufferDimensions(nullptr, &w, &h);
|
draw_->GetFramebufferDimensions(nullptr, &w, &h);
|
||||||
Draw::DataFormat fmt = draw_->PreferredFramebufferReadbackFormat(nullptr);
|
Draw::DataFormat fmt = draw_->PreferredColorReadbackFormat(nullptr);
|
||||||
// Ignore preferred formats other than BGRA.
|
// Ignore preferred formats other than BGRA.
|
||||||
if (fmt != Draw::DataFormat::B8G8R8A8_UNORM)
|
if (fmt != Draw::DataFormat::B8G8R8A8_UNORM)
|
||||||
fmt = Draw::DataFormat::R8G8B8A8_UNORM;
|
fmt = Draw::DataFormat::R8G8B8A8_UNORM;
|
||||||
|
@ -2717,19 +2716,14 @@ bool FramebufferManagerCommon::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
|
||||||
// (Except using the GPU might cause problems because of various implementations'
|
// (Except using the GPU might cause problems because of various implementations'
|
||||||
// dithering behavior and games that expect exact colors like Danganronpa, so we
|
// dithering behavior and games that expect exact colors like Danganronpa, so we
|
||||||
// can't entirely be rid of the CPU path.) -- unknown
|
// can't entirely be rid of the CPU path.) -- unknown
|
||||||
void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel) {
|
void FramebufferManagerCommon::ReadbackFramebufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, RasterChannel channel, Draw::DataFormat destFormat, u32 fb_address, u32 stride) {
|
||||||
if (w <= 0 || h <= 0) {
|
if (w <= 0 || h <= 0) {
|
||||||
ERROR_LOG(G3D, "Bad inputs to ReadbackFramebufferSync: %d %d %d %d", x, y, w, h);
|
ERROR_LOG(G3D, "Bad inputs to ReadbackFramebufferSync: %d %d %d %d", x, y, w, h);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const u32 fb_address = channel == RASTER_COLOR ? vfb->fb_address : vfb->z_address;
|
|
||||||
|
|
||||||
Draw::DataFormat destFormat = channel == RASTER_COLOR ? GEFormatToThin3D(vfb->fb_format) : GEFormatToThin3D(GE_FORMAT_DEPTH16);
|
|
||||||
const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
|
const int dstBpp = (int)DataFormatSizeInBytes(destFormat);
|
||||||
|
|
||||||
int stride = channel == RASTER_COLOR ? vfb->fb_stride : vfb->z_stride;
|
|
||||||
|
|
||||||
const int dstByteOffset = (y * stride + x) * dstBpp;
|
const int dstByteOffset = (y * stride + x) * dstBpp;
|
||||||
// Leave the gap between the end of the last line and the full stride.
|
// Leave the gap between the end of the last line and the full stride.
|
||||||
// This is only used for the NotifyMemInfo range.
|
// This is only used for the NotifyMemInfo range.
|
||||||
|
@ -2747,14 +2741,14 @@ void FramebufferManagerCommon::ReadbackFramebufferSync(VirtualFramebuffer *vfb,
|
||||||
DEBUG_LOG(G3D, "Reading framebuffer to mem, fb_address = %08x, ptr=%p", fb_address, destPtr);
|
DEBUG_LOG(G3D, "Reading framebuffer to mem, fb_address = %08x, ptr=%p", fb_address, destPtr);
|
||||||
|
|
||||||
if (channel == RASTER_DEPTH) {
|
if (channel == RASTER_DEPTH) {
|
||||||
_assert_msg_(vfb && vfb->z_address != 0 && vfb->z_stride != 0, "Depth buffer invalid");
|
// _assert_msg_(vfb && vfb->z_address != 0 && vfb->z_stride != 0, "Depth buffer invalid");
|
||||||
ReadbackDepthbufferSync(vfb->fbo, x, y, w, h, (uint16_t *)destPtr, stride);
|
ReadbackDepthbufferSync(fbo, x, y, w, h, (uint16_t *)destPtr, stride);
|
||||||
} else {
|
} else {
|
||||||
draw_->CopyFramebufferToMemorySync(vfb->fbo, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, stride, "ReadbackFramebufferSync");
|
draw_->CopyFramebufferToMemorySync(fbo, channel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, x, y, w, h, destFormat, destPtr, stride, "ReadbackFramebufferSync");
|
||||||
}
|
}
|
||||||
|
|
||||||
char tag[128];
|
char tag[128];
|
||||||
size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%08x_%dx%d_%s", vfb->fb_address, vfb->z_address, w, h, GeBufferFormatToString(vfb->fb_format));
|
size_t len = snprintf(tag, sizeof(tag), "FramebufferPack/%08x_%dx%d", fb_address, w, h);
|
||||||
NotifyMemInfo(MemBlockFlags::WRITE, fb_address + dstByteOffset, dstSize, tag, len);
|
NotifyMemInfo(MemBlockFlags::WRITE, fb_address + dstByteOffset, dstSize, tag, len);
|
||||||
|
|
||||||
gpuStats.numReadbacks++;
|
gpuStats.numReadbacks++;
|
||||||
|
@ -2807,14 +2801,19 @@ void FramebufferManagerCommon::ReadFramebufferToMemory(VirtualFramebuffer *vfb,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
u32 address = vfb->Address(channel);
|
||||||
|
int stride = vfb->Stride(channel);
|
||||||
|
|
||||||
|
Draw::DataFormat destFormat = channel == RASTER_COLOR ? GEFormatToThin3D(vfb->fb_format) : GEFormatToThin3D(GE_FORMAT_DEPTH16);
|
||||||
|
|
||||||
if (vfb->renderWidth == vfb->width && vfb->renderHeight == vfb->height) {
|
if (vfb->renderWidth == vfb->width && vfb->renderHeight == vfb->height) {
|
||||||
// No need to stretch-blit
|
// No need to stretch-blit
|
||||||
ReadbackFramebufferSync(vfb, x, y, w, h, channel);
|
ReadbackFramebufferSync(vfb->fbo, x, y, w, h, channel, destFormat, address, stride);
|
||||||
} else {
|
} else {
|
||||||
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb, channel);
|
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb, channel);
|
||||||
if (nvfb) {
|
if (nvfb) {
|
||||||
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, channel, "Blit_ReadFramebufferToMemory");
|
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, channel, "Blit_ReadFramebufferToMemory");
|
||||||
ReadbackFramebufferSync(nvfb, x, y, w, h, channel);
|
ReadbackFramebufferSync(nvfb->fbo, x, y, w, h, channel, destFormat, address, stride);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2837,7 +2836,7 @@ void FramebufferManagerCommon::FlushBeforeCopy() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Replace with with depal, reading the palette from the texture on the GPU directly.
|
// In practice, this has been replaced with depal, reading the palette from the texture on the GPU directly.
|
||||||
void FramebufferManagerCommon::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) {
|
void FramebufferManagerCommon::DownloadFramebufferForClut(u32 fb_address, u32 loadBytes) {
|
||||||
VirtualFramebuffer *vfb = GetVFBAt(fb_address);
|
VirtualFramebuffer *vfb = GetVFBAt(fb_address);
|
||||||
if (vfb && vfb->fb_stride != 0) {
|
if (vfb && vfb->fb_stride != 0) {
|
||||||
|
@ -2871,7 +2870,7 @@ void FramebufferManagerCommon::DownloadFramebufferForClut(u32 fb_address, u32 lo
|
||||||
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb, RASTER_COLOR);
|
VirtualFramebuffer *nvfb = FindDownloadTempBuffer(vfb, RASTER_COLOR);
|
||||||
if (nvfb) {
|
if (nvfb) {
|
||||||
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, RASTER_COLOR, "Blit_DownloadFramebufferForClut");
|
BlitFramebuffer(nvfb, x, y, vfb, x, y, w, h, 0, RASTER_COLOR, "Blit_DownloadFramebufferForClut");
|
||||||
ReadbackFramebufferSync(nvfb, x, y, w, h, RASTER_COLOR);
|
ReadbackFramebufferSync(nvfb->fbo, x, y, w, h, RASTER_COLOR, GEFormatToThin3D(nvfb->fb_format), nvfb->fb_address, nvfb->fb_stride);
|
||||||
}
|
}
|
||||||
|
|
||||||
textureCache_->ForgetLastTexture();
|
textureCache_->ForgetLastTexture();
|
||||||
|
@ -3202,7 +3201,7 @@ VirtualFramebuffer *FramebufferManagerCommon::ResolveFramebufferColorToFormat(Vi
|
||||||
|
|
||||||
char tag[128];
|
char tag[128];
|
||||||
FormatFramebufferName(vfb, tag, sizeof(tag));
|
FormatFramebufferName(vfb, tag, sizeof(tag));
|
||||||
vfb->fbo = draw_->CreateFramebuffer({ vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, tag });
|
vfb->fbo = draw_->CreateFramebuffer({ colorFormat_, vfb->renderWidth, vfb->renderHeight, 1, GetFramebufferLayers(), 0, true, tag });
|
||||||
vfbs_.push_back(vfb);
|
vfbs_.push_back(vfb);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -152,6 +152,9 @@ struct VirtualFramebuffer {
|
||||||
inline int BufferWidthInBytes() const { return bufferWidth * BufferFormatBytesPerPixel(fb_format); }
|
inline int BufferWidthInBytes() const { return bufferWidth * BufferFormatBytesPerPixel(fb_format); }
|
||||||
inline int FbStrideInBytes() const { return fb_stride * BufferFormatBytesPerPixel(fb_format); }
|
inline int FbStrideInBytes() const { return fb_stride * BufferFormatBytesPerPixel(fb_format); }
|
||||||
inline int ZStrideInBytes() const { return z_stride * 2; }
|
inline int ZStrideInBytes() const { return z_stride * 2; }
|
||||||
|
|
||||||
|
inline u32 Address(RasterChannel channel) const { return channel == RASTER_COLOR ? fb_address : z_address; }
|
||||||
|
inline int Stride(RasterChannel channel) const { return channel == RASTER_COLOR ? fb_stride : z_stride; }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct FramebufferHeuristicParams {
|
struct FramebufferHeuristicParams {
|
||||||
|
@ -451,7 +454,8 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
virtual void ReadbackFramebufferSync(VirtualFramebuffer *vfb, int x, int y, int w, int h, RasterChannel channel);
|
void ReadbackFramebufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, RasterChannel channel, Draw::DataFormat destFormat, u32 fb_address, u32 stride);
|
||||||
|
|
||||||
// Used for when a shader is required, such as GLES.
|
// Used for when a shader is required, such as GLES.
|
||||||
virtual bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride);
|
virtual bool ReadbackDepthbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride);
|
||||||
virtual bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride);
|
virtual bool ReadbackStencilbufferSync(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint8_t *pixels, int pixelsStride);
|
||||||
|
@ -601,4 +605,6 @@ protected:
|
||||||
|
|
||||||
Draw2D draw2D_;
|
Draw2D draw2D_;
|
||||||
// The fragment shaders are "owned" by the pipelines since they're 1:1.
|
// The fragment shaders are "owned" by the pipelines since they're 1:1.
|
||||||
|
|
||||||
|
const Draw::DataFormat colorFormat_ = Draw::DataFormat::R8G8B8A8_UNORM;
|
||||||
};
|
};
|
||||||
|
|
|
@ -278,7 +278,7 @@ bool PresentationCommon::UpdatePostShader() {
|
||||||
previousIndex_ = 0;
|
previousIndex_ = 0;
|
||||||
|
|
||||||
for (int i = 0; i < FRAMES; ++i) {
|
for (int i = 0; i < FRAMES; ++i) {
|
||||||
previousFramebuffers_[i] = draw_->CreateFramebuffer({ w, h, 1, 1, 0, false, "inter_presentation" });
|
previousFramebuffers_[i] = draw_->CreateFramebuffer({ Draw::DataFormat::R8G8B8A8_UNORM, w, h, 1, 1, 0, false, "inter_presentation" });
|
||||||
if (!previousFramebuffers_[i]) {
|
if (!previousFramebuffers_[i]) {
|
||||||
DestroyPostShader();
|
DestroyPostShader();
|
||||||
return false;
|
return false;
|
||||||
|
@ -394,7 +394,7 @@ bool PresentationCommon::AllocateFramebuffer(int w, int h) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// No depth/stencil for post processing
|
// No depth/stencil for post processing
|
||||||
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ w, h, 1, 1, 0, false, "presentation" });
|
Draw::Framebuffer *fbo = draw_->CreateFramebuffer({ Draw::DataFormat::R8G8B8A8_UNORM, w, h, 1, 1, 0, false, "presentation" });
|
||||||
if (!fbo) {
|
if (!fbo) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1237,158 +1237,166 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
|
||||||
clutTotalBytes_ = loadBytes;
|
clutTotalBytes_ = loadBytes;
|
||||||
clutRenderAddress_ = 0xFFFFFFFF;
|
clutRenderAddress_ = 0xFFFFFFFF;
|
||||||
|
|
||||||
if (Memory::IsValidAddress(clutAddr)) {
|
if (!Memory::IsValidAddress(clutAddr)) {
|
||||||
if (Memory::IsVRAMAddress(clutAddr)) {
|
memset(clutBufRaw_, 0x00, loadBytes);
|
||||||
// Clear the uncached and mirror bits, etc. to match framebuffers.
|
// Reload the clut next time.
|
||||||
const u32 clutLoadAddr = clutAddr & 0x041FFFFF;
|
clutLastFormat_ = 0xFFFFFFFF;
|
||||||
const u32 clutLoadEnd = clutLoadAddr + loadBytes;
|
clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);
|
||||||
static const u32 MAX_CLUT_OFFSET = 4096;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
clutRenderOffset_ = MAX_CLUT_OFFSET;
|
// Check if it's trying to upload pixels from a framebuffer as a CLUT.
|
||||||
const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
|
if (Memory::IsVRAMAddress(clutAddr)) {
|
||||||
|
// Clear the uncached and mirror bits, etc. to match framebuffers.
|
||||||
|
const u32 clutLoadAddr = clutAddr & 0x041FFFFF;
|
||||||
|
const u32 clutLoadEnd = clutLoadAddr + loadBytes;
|
||||||
|
static const u32 MAX_CLUT_OFFSET = 4096;
|
||||||
|
|
||||||
u32 bestClutAddress = 0xFFFFFFFF;
|
clutRenderOffset_ = MAX_CLUT_OFFSET;
|
||||||
|
const std::vector<VirtualFramebuffer *> &framebuffers = framebufferManager_->Framebuffers();
|
||||||
|
|
||||||
VirtualFramebuffer *chosenFramebuffer = nullptr;
|
u32 bestClutAddress = 0xFFFFFFFF;
|
||||||
for (VirtualFramebuffer *framebuffer : framebuffers) {
|
|
||||||
// Let's not deal with divide by zero.
|
|
||||||
if (framebuffer->fb_stride == 0)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
const u32 fb_address = framebuffer->fb_address;
|
VirtualFramebuffer *chosenFramebuffer = nullptr;
|
||||||
const u32 fb_bpp = BufferFormatBytesPerPixel(framebuffer->fb_format);
|
for (VirtualFramebuffer *framebuffer : framebuffers) {
|
||||||
int offset = clutLoadAddr - fb_address;
|
// Let's not deal with divide by zero.
|
||||||
|
if (framebuffer->fb_stride == 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
// Is this inside the framebuffer at all? Note that we only check the first line here, this should
|
const u32 fb_address = framebuffer->fb_address;
|
||||||
// be changed.
|
const u32 fb_bpp = BufferFormatBytesPerPixel(framebuffer->fb_format);
|
||||||
bool matchRange = offset >= 0 && offset < (int)(framebuffer->fb_stride * fb_bpp);
|
int offset = clutLoadAddr - fb_address;
|
||||||
if (matchRange) {
|
|
||||||
// And is it inside the rendered area? Sometimes games pack data in the margin between width and stride.
|
|
||||||
// If the framebuffer width was detected as 512, we're gonna assume it's really 480.
|
|
||||||
int fbMatchWidth = framebuffer->width;
|
|
||||||
if (fbMatchWidth == 512) {
|
|
||||||
fbMatchWidth = 480;
|
|
||||||
}
|
|
||||||
bool inMargin = ((offset / fb_bpp) % framebuffer->fb_stride) == fbMatchWidth;
|
|
||||||
|
|
||||||
// The offset check here means, in the context of the loop, that we'll pick
|
// Is this inside the framebuffer at all? Note that we only check the first line here, this should
|
||||||
// the framebuffer with the smallest offset. This is yet another framebuffer matching
|
// be changed.
|
||||||
// loop with its own rules, eventually we'll probably want to do something
|
bool matchRange = offset >= 0 && offset < (int)(framebuffer->fb_stride * fb_bpp);
|
||||||
// more systematic.
|
if (matchRange) {
|
||||||
if (matchRange && !inMargin && offset < (int)clutRenderOffset_) {
|
// And is it inside the rendered area? Sometimes games pack data in the margin between width and stride.
|
||||||
WARN_LOG_N_TIMES(clutfb, 5, G3D, "Detected LoadCLUT(%d bytes) from framebuffer %08x (%s), byte offset %d", loadBytes, fb_address, GeBufferFormatToString(framebuffer->fb_format), offset);
|
// If the framebuffer width was detected as 512, we're gonna assume it's really 480.
|
||||||
framebuffer->last_frame_clut = gpuStats.numFlips;
|
int fbMatchWidth = framebuffer->width;
|
||||||
// Also mark used so it's not decimated.
|
if (fbMatchWidth == 512) {
|
||||||
framebuffer->last_frame_used = gpuStats.numFlips;
|
fbMatchWidth = 480;
|
||||||
framebuffer->usageFlags |= FB_USAGE_CLUT;
|
}
|
||||||
bestClutAddress = framebuffer->fb_address;
|
bool inMargin = ((offset / fb_bpp) % framebuffer->fb_stride) == fbMatchWidth;
|
||||||
clutRenderOffset_ = (u32)offset;
|
|
||||||
chosenFramebuffer = framebuffer;
|
// The offset check here means, in the context of the loop, that we'll pick
|
||||||
if (offset == 0) {
|
// the framebuffer with the smallest offset. This is yet another framebuffer matching
|
||||||
// Not gonna find a better match according to the smallest-offset rule, so we'll go with this one.
|
// loop with its own rules, eventually we'll probably want to do something
|
||||||
break;
|
// more systematic.
|
||||||
}
|
if (matchRange && !inMargin && offset < (int)clutRenderOffset_) {
|
||||||
|
WARN_LOG_N_TIMES(clutfb, 5, G3D, "Detected LoadCLUT(%d bytes) from framebuffer %08x (%s), byte offset %d", loadBytes, fb_address, GeBufferFormatToString(framebuffer->fb_format), offset);
|
||||||
|
framebuffer->last_frame_clut = gpuStats.numFlips;
|
||||||
|
// Also mark used so it's not decimated.
|
||||||
|
framebuffer->last_frame_used = gpuStats.numFlips;
|
||||||
|
framebuffer->usageFlags |= FB_USAGE_CLUT;
|
||||||
|
bestClutAddress = framebuffer->fb_address;
|
||||||
|
clutRenderOffset_ = (u32)offset;
|
||||||
|
chosenFramebuffer = framebuffer;
|
||||||
|
if (offset == 0) {
|
||||||
|
// Not gonna find a better match according to the smallest-offset rule, so we'll go with this one.
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// To turn off dynamic CLUT (for demonstration or testing purposes), add "false &&" to this check.
|
|
||||||
if (chosenFramebuffer && chosenFramebuffer->fbo) {
|
|
||||||
clutRenderAddress_ = bestClutAddress;
|
|
||||||
|
|
||||||
if (!dynamicClutTemp_) {
|
|
||||||
Draw::FramebufferDesc desc{};
|
|
||||||
desc.width = 512;
|
|
||||||
desc.height = 1;
|
|
||||||
desc.depth = 1;
|
|
||||||
desc.z_stencil = false;
|
|
||||||
desc.numLayers = 1;
|
|
||||||
desc.multiSampleLevel = 0;
|
|
||||||
desc.tag = "dynamic_clut";
|
|
||||||
dynamicClutFbo_ = draw_->CreateFramebuffer(desc);
|
|
||||||
desc.tag = "dynamic_clut_temp";
|
|
||||||
dynamicClutTemp_ = draw_->CreateFramebuffer(desc);
|
|
||||||
}
|
|
||||||
|
|
||||||
// We'll need to copy from the offset.
|
|
||||||
const u32 fb_bpp = BufferFormatBytesPerPixel(chosenFramebuffer->fb_format);
|
|
||||||
const int totalPixelsOffset = clutRenderOffset_ / fb_bpp;
|
|
||||||
const int clutYOffset = totalPixelsOffset / chosenFramebuffer->fb_stride;
|
|
||||||
const int clutXOffset = totalPixelsOffset % chosenFramebuffer->fb_stride;
|
|
||||||
const int scale = chosenFramebuffer->renderScaleFactor;
|
|
||||||
|
|
||||||
// Copy the pixels to our temp clut, scaling down if needed and wrapping.
|
|
||||||
framebufferManager_->BlitUsingRaster(
|
|
||||||
chosenFramebuffer->fbo, clutXOffset * scale, clutYOffset * scale, (clutXOffset + 512.0f) * scale, (clutYOffset + 1.0f) * scale,
|
|
||||||
dynamicClutTemp_, 0.0f, 0.0f, 512.0f, 1.0f,
|
|
||||||
false, scale, framebufferManager_->Get2DPipeline(DRAW2D_COPY_COLOR_RECT2LIN), "copy_clut_to_temp");
|
|
||||||
|
|
||||||
framebufferManager_->RebindFramebuffer("after_copy_clut_to_temp");
|
|
||||||
clutRenderFormat_ = chosenFramebuffer->fb_format;
|
|
||||||
}
|
|
||||||
NotifyMemInfo(MemBlockFlags::ALLOC, clutAddr, loadBytes, "CLUT");
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// It's possible for a game to load CLUT outside valid memory without crashing, should result in zeroes.
|
// To turn off dynamic CLUT (for demonstration or testing purposes), add "false &&" to this check.
|
||||||
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
|
if (chosenFramebuffer && chosenFramebuffer->fbo) {
|
||||||
_assert_(bytes <= 2048);
|
clutRenderAddress_ = bestClutAddress;
|
||||||
bool performDownload = PSP_CoreParameter().compat.flags().AllowDownloadCLUT;
|
|
||||||
if (GPURecord::IsActive())
|
if (!dynamicClutTemp_) {
|
||||||
performDownload = true;
|
Draw::FramebufferDesc desc{};
|
||||||
if (clutRenderAddress_ != 0xFFFFFFFF && performDownload) {
|
desc.width = 512;
|
||||||
framebufferManager_->DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes);
|
desc.height = 1;
|
||||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
desc.depth = 1;
|
||||||
if (bytes < loadBytes) {
|
desc.z_stencil = false;
|
||||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
desc.numLayers = 1;
|
||||||
|
desc.multiSampleLevel = 0;
|
||||||
|
desc.tag = "dynamic_clut";
|
||||||
|
dynamicClutFbo_ = draw_->CreateFramebuffer(desc);
|
||||||
|
desc.tag = "dynamic_clut_temp";
|
||||||
|
dynamicClutTemp_ = draw_->CreateFramebuffer(desc);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// Here we could check for clutRenderAddress_ != 0xFFFFFFFF and zero the CLUT or something,
|
// We'll need to copy from the offset.
|
||||||
// but choosing not to for now. Though the results of loading the CLUT from RAM here is
|
const u32 fb_bpp = BufferFormatBytesPerPixel(chosenFramebuffer->fb_format);
|
||||||
// almost certainly going to be bogus.
|
const int totalPixelsOffset = clutRenderOffset_ / fb_bpp;
|
||||||
#ifdef _M_SSE
|
const int clutYOffset = totalPixelsOffset / chosenFramebuffer->fb_stride;
|
||||||
if (bytes == loadBytes) {
|
const int clutXOffset = totalPixelsOffset % chosenFramebuffer->fb_stride;
|
||||||
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
|
const int scale = chosenFramebuffer->renderScaleFactor;
|
||||||
__m128i *dest = (__m128i *)clutBufRaw_;
|
|
||||||
int numBlocks = bytes / 32;
|
// Copy the pixels to our temp clut, scaling down if needed and wrapping.
|
||||||
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
|
framebufferManager_->BlitUsingRaster(
|
||||||
__m128i data1 = _mm_loadu_si128(source);
|
chosenFramebuffer->fbo, clutXOffset * scale, clutYOffset * scale, (clutXOffset + 512.0f) * scale, (clutYOffset + 1.0f) * scale,
|
||||||
__m128i data2 = _mm_loadu_si128(source + 1);
|
dynamicClutTemp_, 0.0f, 0.0f, 512.0f, 1.0f,
|
||||||
_mm_store_si128(dest, data1);
|
false, scale, framebufferManager_->Get2DPipeline(DRAW2D_COPY_COLOR_RECT2LIN), "copy_clut_to_temp");
|
||||||
_mm_store_si128(dest + 1, data2);
|
|
||||||
}
|
framebufferManager_->RebindFramebuffer("after_copy_clut_to_temp");
|
||||||
} else {
|
clutRenderFormat_ = chosenFramebuffer->fb_format;
|
||||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
}
|
||||||
if (bytes < loadBytes) {
|
NotifyMemInfo(MemBlockFlags::ALLOC, clutAddr, loadBytes, "CLUT");
|
||||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
}
|
||||||
}
|
|
||||||
}
|
// It's possible for a game to load CLUT outside valid memory without crashing, should result in zeroes.
|
||||||
#elif PPSSPP_ARCH(ARM_NEON)
|
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
|
||||||
if (bytes == loadBytes) {
|
_assert_(bytes <= 2048);
|
||||||
const uint32_t *source = (const uint32_t *)Memory::GetPointerUnchecked(clutAddr);
|
bool performDownload = PSP_CoreParameter().compat.flags().AllowDownloadCLUT;
|
||||||
uint32_t *dest = (uint32_t *)clutBufRaw_;
|
if (GPURecord::IsActive())
|
||||||
int numBlocks = bytes / 32;
|
performDownload = true;
|
||||||
for (int i = 0; i < numBlocks; i++, source += 8, dest += 8) {
|
if (clutRenderAddress_ != 0xFFFFFFFF && performDownload) {
|
||||||
uint32x4_t data1 = vld1q_u32(source);
|
framebufferManager_->DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes);
|
||||||
uint32x4_t data2 = vld1q_u32(source + 4);
|
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||||
vst1q_u32(dest, data1);
|
if (bytes < loadBytes) {
|
||||||
vst1q_u32(dest + 4, data2);
|
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
||||||
}
|
|
||||||
} else {
|
|
||||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
|
||||||
if (bytes < loadBytes) {
|
|
||||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
|
||||||
if (bytes < loadBytes) {
|
|
||||||
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
memset(clutBufRaw_, 0x00, loadBytes);
|
// The common case.
|
||||||
|
|
||||||
|
// Here we could check for clutRenderAddress_ != 0xFFFFFFFF and zero the CLUT or something,
|
||||||
|
// but choosing not to for now. Though the results of loading the CLUT from RAM here is
|
||||||
|
// almost certainly going to be bogus.
|
||||||
|
#ifdef _M_SSE
|
||||||
|
if (bytes == loadBytes) {
|
||||||
|
const __m128i *source = (const __m128i *)Memory::GetPointerUnchecked(clutAddr);
|
||||||
|
__m128i *dest = (__m128i *)clutBufRaw_;
|
||||||
|
int numBlocks = bytes / 32;
|
||||||
|
for (int i = 0; i < numBlocks; i++, source += 2, dest += 2) {
|
||||||
|
__m128i data1 = _mm_loadu_si128(source);
|
||||||
|
__m128i data2 = _mm_loadu_si128(source + 1);
|
||||||
|
_mm_store_si128(dest, data1);
|
||||||
|
_mm_store_si128(dest + 1, data2);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||||
|
if (bytes < loadBytes) {
|
||||||
|
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#elif PPSSPP_ARCH(ARM_NEON)
|
||||||
|
if (bytes == loadBytes) {
|
||||||
|
const uint32_t *source = (const uint32_t *)Memory::GetPointerUnchecked(clutAddr);
|
||||||
|
uint32_t *dest = (uint32_t *)clutBufRaw_;
|
||||||
|
int numBlocks = bytes / 32;
|
||||||
|
for (int i = 0; i < numBlocks; i++, source += 8, dest += 8) {
|
||||||
|
uint32x4_t data1 = vld1q_u32(source);
|
||||||
|
uint32x4_t data2 = vld1q_u32(source + 4);
|
||||||
|
vst1q_u32(dest, data1);
|
||||||
|
vst1q_u32(dest + 4, data2);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||||
|
if (bytes < loadBytes) {
|
||||||
|
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
|
||||||
|
if (bytes < loadBytes) {
|
||||||
|
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reload the clut next time.
|
// Reload the clut next time.
|
||||||
clutLastFormat_ = 0xFFFFFFFF;
|
clutLastFormat_ = 0xFFFFFFFF;
|
||||||
clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);
|
clutMaxBytes_ = std::max(clutMaxBytes_, loadBytes);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue