Revert "Merge pull request #7361 from hrydgard/color-conv-centralize"
This reverts commitf1b57dabf5
, reversing changes made to41001637ce
.
This commit is contained in:
parent
07933cad42
commit
cae58cafee
33 changed files with 1195 additions and 476 deletions
|
@ -326,8 +326,6 @@ add_library(Common STATIC
|
|||
${CommonExtra}
|
||||
Common/ChunkFile.cpp
|
||||
Common/ChunkFile.h
|
||||
Common/ColorConv.cpp
|
||||
Common/ColorConv.h
|
||||
Common/ConsoleListener.cpp
|
||||
Common/ConsoleListener.h
|
||||
Common/Crypto/md5.cpp
|
||||
|
@ -1380,8 +1378,6 @@ add_library(GPU OBJECT
|
|||
GPU/Common/IndexGenerator.h
|
||||
GPU/Common/TextureDecoder.cpp
|
||||
GPU/Common/TextureDecoder.h
|
||||
GPU/Common/TextureScaler.cpp
|
||||
GPU/Common/TextureScaler.h
|
||||
GPU/Common/TextureCacheCommon.cpp
|
||||
GPU/Common/TextureCacheCommon.h
|
||||
${GPU_NEON}
|
||||
|
@ -1410,6 +1406,8 @@ add_library(GPU OBJECT
|
|||
GPU/GLES/StencilBuffer.cpp
|
||||
GPU/GLES/TextureCache.cpp
|
||||
GPU/GLES/TextureCache.h
|
||||
GPU/GLES/TextureScaler.cpp
|
||||
GPU/GLES/TextureScaler.h
|
||||
GPU/GLES/TransformPipeline.cpp
|
||||
GPU/GLES/TransformPipeline.h
|
||||
GPU/GLES/VertexShaderGenerator.cpp
|
||||
|
|
|
@ -1512,7 +1512,7 @@ void ARM64XEmitter::ABI_PushRegisters(BitSet32 registers)
|
|||
{
|
||||
if (first)
|
||||
{
|
||||
STR(INDEX_PRE, (ARM64Reg)(X0 + it), SP, (u32)-(s32)stack_size);
|
||||
STR(INDEX_PRE, (ARM64Reg)(X0 + it), SP, -stack_size);
|
||||
first = false;
|
||||
current_offset += 16;
|
||||
}
|
||||
|
|
|
@ -21,12 +21,11 @@
|
|||
#endif
|
||||
|
||||
#include <memory.h>
|
||||
|
||||
#include "base/logging.h"
|
||||
#include "base/basictypes.h"
|
||||
|
||||
#include "CPUDetect.h"
|
||||
#include "Common.h"
|
||||
#include "CPUDetect.h"
|
||||
#include "StringUtils.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
|
|
|
@ -1,271 +0,0 @@
|
|||
// Copyright (C) 2015 PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
|
||||
// TODO: Make SSE2 and NEON versions of many of these.
|
||||
|
||||
#include "Common.h"
|
||||
#include "CPUDetect.h"
|
||||
#include "ColorConv.h"
|
||||
#include "CommonTypes.h"
|
||||
|
||||
#ifdef _M_SSE
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
inline u16 RGBA8888toRGB565(u32 px) {
|
||||
return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800);
|
||||
}
|
||||
|
||||
inline u16 RGBA8888toRGBA4444(u32 px) {
|
||||
return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000);
|
||||
}
|
||||
|
||||
inline u16 RGBA8888toRGBA5551(u32 px) {
|
||||
return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000);
|
||||
}
|
||||
|
||||
inline u16 BGRA8888toRGB565(u32 px) {
|
||||
return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800);
|
||||
}
|
||||
|
||||
inline u16 BGRA8888toRGBA4444(u32 px) {
|
||||
return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000);
|
||||
}
|
||||
|
||||
inline u32 RGBA2BGRA(u32 src) {
|
||||
const u32 r = (src & 0x000000FF) << 16;
|
||||
const u32 ga = src & 0xFF00FF00;
|
||||
const u32 b = (src & 0x00FF0000) >> 16;
|
||||
return r | ga | b;
|
||||
}
|
||||
|
||||
// Used heavily in Test Drive Unlimited (for no good reason...)
|
||||
void ConvertBGRA8888ToRGB565(u16 *dst, const u32 *src, int numPixels) {
|
||||
#if _M_SSE >= 0x401
|
||||
const __m128i maskG = _mm_set1_epi32(0x0000FC00);
|
||||
const __m128i maskRB = _mm_set1_epi32(0x00F800F8);
|
||||
const __m128i mask = _mm_set1_epi32(0x0000FFFF);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
int sseChunks = (numPixels / 4) & ~1;
|
||||
|
||||
// SSE 4.1 required for _mm_packus_epi32.
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF) || !cpu_info.bSSE4_1) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (int i = 0; i < sseChunks; i += 2) {
|
||||
__m128i c1 = _mm_load_si128(&srcp[i + 0]);
|
||||
__m128i c2 = _mm_load_si128(&srcp[i + 1]);
|
||||
__m128i g, rb;
|
||||
|
||||
g = _mm_and_si128(c1, maskG);
|
||||
g = _mm_srli_epi32(g, 5);
|
||||
rb = _mm_and_si128(c1, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 3), _mm_srli_epi32(rb, 8));
|
||||
c1 = _mm_and_si128(_mm_or_si128(g, rb), mask);
|
||||
|
||||
g = _mm_and_si128(c2, maskG);
|
||||
g = _mm_srli_epi32(g, 5);
|
||||
rb = _mm_and_si128(c2, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 3), _mm_srli_epi32(rb, 8));
|
||||
c2 = _mm_and_si128(_mm_or_si128(g, rb), mask);
|
||||
|
||||
_mm_store_si128(&dstp[i / 2], _mm_packus_epi32(c1, c2));
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
u32 i = sseChunks * 4;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
for (int x = 0; x < numPixels; ++x) {
|
||||
dst[x] = BGRA8888toRGB565(src[x]);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA8888ToRGB565(u16 *dst, const u32 *src, int numPixels) {
|
||||
for (int x = 0; x < numPixels; x++) {
|
||||
dst[x] = RGBA8888toRGB565(src[x]);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertBGRA8888ToRGBA4444(u16 *dst, const u32 *src, int numPixels) {
|
||||
for (int x = 0; x < numPixels; ++x) {
|
||||
dst[x] = BGRA8888toRGBA4444(src[x]);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA8888ToRGBA4444(u16 *dst, const u32 *src, int numPixels) {
|
||||
for (int x = 0; x < numPixels; ++x) {
|
||||
dst[x] = RGBA8888toRGBA4444(src[x]);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertBGRA8888ToRGBA8888(u32 *dst, const u32 *src, int numPixels) {
|
||||
#ifdef _M_SSE
|
||||
const __m128i maskGA = _mm_set1_epi32(0xFF00FF00);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
int sseChunks = numPixels / 4;
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF)) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (int i = 0; i < sseChunks; ++i) {
|
||||
__m128i c = _mm_load_si128(&srcp[i]);
|
||||
__m128i rb = _mm_andnot_si128(maskGA, c);
|
||||
c = _mm_and_si128(c, maskGA);
|
||||
|
||||
__m128i b = _mm_srli_epi32(rb, 16);
|
||||
__m128i r = _mm_slli_epi32(rb, 16);
|
||||
c = _mm_or_si128(_mm_or_si128(c, r), b);
|
||||
_mm_store_si128(&dstp[i], c);
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
int i = sseChunks * 4;
|
||||
#else
|
||||
int i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
const u32 c = src[i];
|
||||
dst[i] = ((c >> 16) & 0x000000FF) |
|
||||
((c >> 0) & 0xFF00FF00) |
|
||||
((c << 16) & 0x00FF0000);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA8888ToRGBA5551(u16 *dst, const u32 *src, int numPixels) {
|
||||
#if _M_SSE >= 0x401
|
||||
const __m128i maskAG = _mm_set1_epi32(0x8000F800);
|
||||
const __m128i maskRB = _mm_set1_epi32(0x00F800F8);
|
||||
const __m128i mask = _mm_set1_epi32(0x0000FFFF);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
int sseChunks = (numPixels / 4) & ~1;
|
||||
// SSE 4.1 required for _mm_packus_epi32.
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF) || !cpu_info.bSSE4_1) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (int i = 0; i < sseChunks; i += 2) {
|
||||
__m128i c1 = _mm_load_si128(&srcp[i + 0]);
|
||||
__m128i c2 = _mm_load_si128(&srcp[i + 1]);
|
||||
__m128i ag, rb;
|
||||
|
||||
ag = _mm_and_si128(c1, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c1, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 3), _mm_srli_epi32(rb, 9));
|
||||
c1 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
ag = _mm_and_si128(c2, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c2, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 3), _mm_srli_epi32(rb, 9));
|
||||
c2 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
_mm_store_si128(&dstp[i / 2], _mm_packus_epi32(c1, c2));
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
int i = sseChunks * 4;
|
||||
#else
|
||||
int i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
dst[i] = RGBA8888toRGBA5551(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
inline u16 BGRA8888toRGBA5551(u32 px) {
|
||||
return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000);
|
||||
}
|
||||
|
||||
void ConvertBGRA8888ToRGBA5551(u16 *dst, const u32 *src, int numPixels) {
|
||||
#if _M_SSE >= 0x401
|
||||
const __m128i maskAG = _mm_set1_epi32(0x8000F800);
|
||||
const __m128i maskRB = _mm_set1_epi32(0x00F800F8);
|
||||
const __m128i mask = _mm_set1_epi32(0x0000FFFF);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
int sseChunks = (numPixels / 4) & ~1;
|
||||
// SSE 4.1 required for _mm_packus_epi32.
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF) || !cpu_info.bSSE4_1) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (int i = 0; i < sseChunks; i += 2) {
|
||||
__m128i c1 = _mm_load_si128(&srcp[i + 0]);
|
||||
__m128i c2 = _mm_load_si128(&srcp[i + 1]);
|
||||
__m128i ag, rb;
|
||||
|
||||
ag = _mm_and_si128(c1, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c1, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 19), _mm_slli_epi32(rb, 7));
|
||||
c1 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
ag = _mm_and_si128(c2, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c2, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 19), _mm_slli_epi32(rb, 7));
|
||||
c2 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
_mm_store_si128(&dstp[i / 2], _mm_packus_epi32(c1, c2));
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
int i = sseChunks * 4;
|
||||
#else
|
||||
int i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
dst[i] = BGRA8888toRGBA5551(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGB565ToRGBA888F(u32 *dst32, const u16 *src, int numPixels) {
|
||||
u8 *dst = (u8 *)dst32;
|
||||
for (int x = 0; x < numPixels; x++) {
|
||||
u16 col = src[x];
|
||||
dst[x * 4] = Convert5To8((col)& 0x1f);
|
||||
dst[x * 4 + 1] = Convert6To8((col >> 5) & 0x3f);
|
||||
dst[x * 4 + 2] = Convert5To8((col >> 11) & 0x1f);
|
||||
dst[x * 4 + 3] = 255;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA5551ToRGBA8888(u32 *dst32, const u16 *src, int numPixels) {
|
||||
u8 *dst = (u8 *)dst32;
|
||||
for (int x = 0; x < numPixels; x++) {
|
||||
u16 col = src[x];
|
||||
dst[x * 4] = Convert5To8((col)& 0x1f);
|
||||
dst[x * 4 + 1] = Convert5To8((col >> 5) & 0x1f);
|
||||
dst[x * 4 + 2] = Convert5To8((col >> 10) & 0x1f);
|
||||
dst[x * 4 + 3] = (col >> 15) ? 255 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertRGBA4444ToRGBA8888(u32 *dst32, const u16 *src, int numPixels) {
|
||||
u8 *dst = (u8 *)dst32;
|
||||
for (int x = 0; x < numPixels; x++) {
|
||||
u16 col = src[x];
|
||||
dst[x * 4] = Convert4To8((col >> 8) & 0xf);
|
||||
dst[x * 4 + 1] = Convert4To8((col >> 4) & 0xf);
|
||||
dst[x * 4 + 2] = Convert4To8(col & 0xf);
|
||||
dst[x * 4 + 3] = Convert4To8(col >> 12);
|
||||
}
|
||||
}
|
|
@ -194,7 +194,6 @@
|
|||
<ClInclude Include="BitSet.h" />
|
||||
<ClInclude Include="ChunkFile.h" />
|
||||
<ClInclude Include="CodeBlock.h" />
|
||||
<ClInclude Include="ColorConv.h" />
|
||||
<ClInclude Include="Common.h" />
|
||||
<ClInclude Include="CommonFuncs.h" />
|
||||
<ClInclude Include="CommonTypes.h" />
|
||||
|
@ -241,7 +240,6 @@
|
|||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="ChunkFile.cpp" />
|
||||
<ClCompile Include="ColorConv.cpp" />
|
||||
<ClCompile Include="ConsoleListener.cpp" />
|
||||
<ClCompile Include="CPUDetect.cpp" />
|
||||
<ClCompile Include="Crypto\md5.cpp" />
|
||||
|
|
|
@ -7,7 +7,6 @@
|
|||
<ClInclude Include="Atomic_GCC.h" />
|
||||
<ClInclude Include="Atomic_Win32.h" />
|
||||
<ClInclude Include="ChunkFile.h" />
|
||||
<ClInclude Include="ColorConv.h" />
|
||||
<ClInclude Include="Common.h" />
|
||||
<ClInclude Include="CommonFuncs.h" />
|
||||
<ClInclude Include="CommonTypes.h" />
|
||||
|
@ -50,7 +49,6 @@
|
|||
<ItemGroup>
|
||||
<ClCompile Include="stdafx.cpp" />
|
||||
<ClCompile Include="ABI.cpp" />
|
||||
<ClCompile Include="ColorConv.cpp" />
|
||||
<ClCompile Include="ConsoleListener.cpp" />
|
||||
<ClCompile Include="CPUDetect.cpp" />
|
||||
<ClCompile Include="FileUtil.cpp" />
|
||||
|
|
|
@ -23,7 +23,6 @@
|
|||
#endif
|
||||
|
||||
#include "Common/FileUtil.h"
|
||||
#include "Common/ColorConv.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/Screenshot.h"
|
||||
#include "GPU/Common/GPUDebugInterface.h"
|
||||
|
@ -49,7 +48,8 @@ public:
|
|||
}
|
||||
}
|
||||
|
||||
bool put_buf(const void *buf, int len) override {
|
||||
bool put_buf(const void *buf, int len) override
|
||||
{
|
||||
if (fp_) {
|
||||
if (fwrite(buf, len, 1, fp_) != 1) {
|
||||
fclose(fp_);
|
||||
|
@ -149,13 +149,14 @@ static const u8 *ConvertBufferTo888RGB(const GPUDebugBuffer &buf, u8 *&temp) {
|
|||
const u16 *buf16 = (const u16 *)buffer;
|
||||
const u32 *buf32 = (const u32 *)buffer;
|
||||
for (u32 y = 0; y < buf.GetHeight(); y++) {
|
||||
u8 *dst;
|
||||
if (flip) {
|
||||
dst = &temp[(buf.GetHeight() - y - 1) * buf.GetStride() * 3];
|
||||
} else {
|
||||
dst = &temp[y * buf.GetStride() * 3];
|
||||
}
|
||||
for (u32 x = 0; x < buf.GetStride(); x++) {
|
||||
u8 *dst;
|
||||
if (flip) {
|
||||
dst = &temp[(buf.GetHeight() - y - 1) * buf.GetStride() * 3 + x * 3];
|
||||
} else {
|
||||
dst = &temp[y * buf.GetStride() * 3 + x * 3];
|
||||
}
|
||||
|
||||
u8 &r = brswap ? dst[2] : dst[0];
|
||||
u8 &g = dst[1];
|
||||
u8 &b = brswap ? dst[0] : dst[2];
|
||||
|
@ -202,7 +203,6 @@ static const u8 *ConvertBufferTo888RGB(const GPUDebugBuffer &buf, u8 *&temp) {
|
|||
ERROR_LOG(COMMON, "Unsupported framebuffer format for screenshot: %d", buf.GetFormat());
|
||||
return nullptr;
|
||||
}
|
||||
dst += 3;
|
||||
}
|
||||
}
|
||||
buffer = temp;
|
||||
|
|
|
@ -17,8 +17,6 @@
|
|||
|
||||
#include "ext/xxhash.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/ColorConv.h"
|
||||
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
// NEON is in a separate file so that it can be compiled with a runtime check.
|
||||
#include "GPU/Common/TextureDecoderNEON.h"
|
||||
|
@ -329,3 +327,128 @@ void DecodeDXT5Block(u32 *dst, const DXT5Block *src, int pitch) {
|
|||
dst += pitch;
|
||||
}
|
||||
}
|
||||
|
||||
void ConvertBGRA8888ToRGBA8888(u32 *dst, const u32 *src, const u32 numPixels) {
|
||||
#ifdef _M_SSE
|
||||
const __m128i maskGA = _mm_set1_epi32(0xFF00FF00);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
u32 sseChunks = numPixels / 4;
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF)) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (u32 i = 0; i < sseChunks; ++i) {
|
||||
__m128i c = _mm_load_si128(&srcp[i]);
|
||||
__m128i rb = _mm_andnot_si128(maskGA, c);
|
||||
c = _mm_and_si128(c, maskGA);
|
||||
|
||||
__m128i b = _mm_srli_epi32(rb, 16);
|
||||
__m128i r = _mm_slli_epi32(rb, 16);
|
||||
c = _mm_or_si128(_mm_or_si128(c, r), b);
|
||||
_mm_store_si128(&dstp[i], c);
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
u32 i = sseChunks * 4;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
const u32 c = src[i];
|
||||
dst[i] = ((c >> 16) & 0x000000FF) |
|
||||
((c >> 0) & 0xFF00FF00) |
|
||||
((c << 16) & 0x00FF0000);
|
||||
}
|
||||
}
|
||||
|
||||
inline u16 RGBA8888toRGBA5551(u32 px) {
|
||||
return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000);
|
||||
}
|
||||
|
||||
void ConvertRGBA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels) {
|
||||
#if _M_SSE >= 0x401
|
||||
const __m128i maskAG = _mm_set1_epi32(0x8000F800);
|
||||
const __m128i maskRB = _mm_set1_epi32(0x00F800F8);
|
||||
const __m128i mask = _mm_set1_epi32(0x0000FFFF);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
u32 sseChunks = (numPixels / 4) & ~1;
|
||||
// SSE 4.1 required for _mm_packus_epi32.
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF) || !cpu_info.bSSE4_1) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (u32 i = 0; i < sseChunks; i += 2) {
|
||||
__m128i c1 = _mm_load_si128(&srcp[i + 0]);
|
||||
__m128i c2 = _mm_load_si128(&srcp[i + 1]);
|
||||
__m128i ag, rb;
|
||||
|
||||
ag = _mm_and_si128(c1, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c1, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 3), _mm_srli_epi32(rb, 9));
|
||||
c1 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
ag = _mm_and_si128(c2, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c2, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 3), _mm_srli_epi32(rb, 9));
|
||||
c2 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
_mm_store_si128(&dstp[i / 2], _mm_packus_epi32(c1, c2));
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
u32 i = sseChunks * 4;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
dst[i] = RGBA8888toRGBA5551(src[i]);
|
||||
}
|
||||
}
|
||||
|
||||
inline u16 BGRA8888toRGBA5551(u32 px) {
|
||||
return ((px >> 19) & 0x001F) | ((px >> 6) & 0x03E0) | ((px << 7) & 0x7C00) | ((px >> 16) & 0x8000);
|
||||
}
|
||||
|
||||
void ConvertBGRA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels) {
|
||||
#if _M_SSE >= 0x401
|
||||
const __m128i maskAG = _mm_set1_epi32(0x8000F800);
|
||||
const __m128i maskRB = _mm_set1_epi32(0x00F800F8);
|
||||
const __m128i mask = _mm_set1_epi32(0x0000FFFF);
|
||||
|
||||
const __m128i *srcp = (const __m128i *)src;
|
||||
__m128i *dstp = (__m128i *)dst;
|
||||
u32 sseChunks = (numPixels / 4) & ~1;
|
||||
// SSE 4.1 required for _mm_packus_epi32.
|
||||
if (((intptr_t)src & 0xF) || ((intptr_t)dst & 0xF) || !cpu_info.bSSE4_1) {
|
||||
sseChunks = 0;
|
||||
}
|
||||
for (u32 i = 0; i < sseChunks; i += 2) {
|
||||
__m128i c1 = _mm_load_si128(&srcp[i + 0]);
|
||||
__m128i c2 = _mm_load_si128(&srcp[i + 1]);
|
||||
__m128i ag, rb;
|
||||
|
||||
ag = _mm_and_si128(c1, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c1, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 19), _mm_slli_epi32(rb, 7));
|
||||
c1 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
ag = _mm_and_si128(c2, maskAG);
|
||||
ag = _mm_or_si128(_mm_srli_epi32(ag, 16), _mm_srli_epi32(ag, 6));
|
||||
rb = _mm_and_si128(c2, maskRB);
|
||||
rb = _mm_or_si128(_mm_srli_epi32(rb, 19), _mm_slli_epi32(rb, 7));
|
||||
c2 = _mm_and_si128(_mm_or_si128(ag, rb), mask);
|
||||
|
||||
_mm_store_si128(&dstp[i / 2], _mm_packus_epi32(c1, c2));
|
||||
}
|
||||
// The remainder starts right after those done via SSE.
|
||||
u32 i = sseChunks * 4;
|
||||
#else
|
||||
u32 i = 0;
|
||||
#endif
|
||||
for (; i < numPixels; i++) {
|
||||
dst[i] = BGRA8888toRGBA5551(src[i]);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -216,3 +216,7 @@ inline void DeIndexTexture4Optimal(ClutT *dest, const u32 texaddr, int length, C
|
|||
const u8 *indexed = (const u8 *) Memory::GetPointer(texaddr);
|
||||
DeIndexTexture4Optimal(dest, indexed, length, color);
|
||||
}
|
||||
|
||||
void ConvertBGRA8888ToRGBA8888(u32 *dst, const u32 *src, const u32 numPixels);
|
||||
void ConvertRGBA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels);
|
||||
void ConvertBGRA8888ToRGBA5551(u16 *dst, const u32 *src, const u32 numPixels);
|
||||
|
|
|
@ -21,7 +21,6 @@
|
|||
#include "base/logging.h"
|
||||
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/ColorConv.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/HDRemaster.h"
|
||||
|
@ -365,7 +364,8 @@ void VertexDecoder::Step_TcFloatPrescale() const {
|
|||
uv[1] = uvdata[1] * gstate_c.uv.vScale + gstate_c.uv.vOff;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_Color565() const {
|
||||
void VertexDecoder::Step_Color565() const
|
||||
{
|
||||
u8 *c = decoded_ + decFmt.c0off;
|
||||
u16 cdata = *(u16_le *)(ptr_ + coloff);
|
||||
c[0] = Convert5To8(cdata & 0x1f);
|
||||
|
@ -375,7 +375,8 @@ void VertexDecoder::Step_Color565() const {
|
|||
// Always full alpha.
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_Color5551() const {
|
||||
void VertexDecoder::Step_Color5551() const
|
||||
{
|
||||
u8 *c = decoded_ + decFmt.c0off;
|
||||
u16 cdata = *(u16_le *)(ptr_ + coloff);
|
||||
c[0] = Convert5To8(cdata & 0x1f);
|
||||
|
@ -385,7 +386,8 @@ void VertexDecoder::Step_Color5551() const {
|
|||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] != 0;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_Color4444() const {
|
||||
void VertexDecoder::Step_Color4444() const
|
||||
{
|
||||
u8 *c = decoded_ + decFmt.c0off;
|
||||
u16 cdata = *(u16_le *)(ptr_ + coloff);
|
||||
for (int j = 0; j < 4; j++)
|
||||
|
@ -393,7 +395,8 @@ void VertexDecoder::Step_Color4444() const {
|
|||
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && c[3] == 255;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_Color8888() const {
|
||||
void VertexDecoder::Step_Color8888() const
|
||||
{
|
||||
u8 *c = decoded_ + decFmt.c0off;
|
||||
const u8 *cdata = (const u8*)(ptr_ + coloff);
|
||||
memcpy(c, cdata, sizeof(u8) * 4);
|
||||
|
|
|
@ -16,8 +16,6 @@
|
|||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include "math/lin/matrix4x4.h"
|
||||
#include "Common/ColorConv.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Core/Host.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/Config.h"
|
||||
|
@ -40,6 +38,26 @@
|
|||
#include <algorithm>
|
||||
|
||||
namespace DX9 {
|
||||
inline u16 RGBA8888toRGB565(u32 px) {
|
||||
return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800);
|
||||
}
|
||||
|
||||
inline u16 RGBA8888toRGBA4444(u32 px) {
|
||||
return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000);
|
||||
}
|
||||
|
||||
inline u16 RGBA8888toRGBA5551(u32 px) {
|
||||
return ((px >> 3) & 0x001F) | ((px >> 6) & 0x03E0) | ((px >> 9) & 0x7C00) | ((px >> 16) & 0x8000);
|
||||
}
|
||||
|
||||
inline u16 BGRA8888toRGB565(u32 px) {
|
||||
return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800);
|
||||
}
|
||||
|
||||
inline u16 BGRA8888toRGBA4444(u32 px) {
|
||||
return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000);
|
||||
}
|
||||
|
||||
static void ConvertFromRGBA8888(u8 *dst, u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format);
|
||||
|
||||
void CenterRect(float *x, float *y, float *w, float *h,
|
||||
|
@ -131,6 +149,24 @@ namespace DX9 {
|
|||
}
|
||||
}
|
||||
|
||||
static inline void ARGB8From4444(u16 c, u32 * dst) {
|
||||
*dst = ((c & 0xf) << 4) | (((c >> 4) & 0xf) << 12) | (((c >> 8) & 0xf) << 20) | ((c >> 12) << 28);
|
||||
}
|
||||
static inline void ARGB8From565(u16 c, u32 * dst) {
|
||||
*dst = ((c & 0x001f) << 19) | (((c >> 5) & 0x003f) << 11) | ((((c >> 10) & 0x001f) << 3)) | 0xFF000000;
|
||||
}
|
||||
static inline void ARGB8From5551(u16 c, u32 * dst) {
|
||||
*dst = ((c & 0x001f) << 19) | (((c >> 5) & 0x001f) << 11) | ((((c >> 10) & 0x001f) << 3)) | 0xFF000000;
|
||||
}
|
||||
|
||||
// TODO: Swizzle the texture access instead.
|
||||
static inline u32 RGBA2BGRA(u32 src) {
|
||||
const u32 r = (src & 0x000000FF) << 16;
|
||||
const u32 ga = src & 0xFF00FF00;
|
||||
const u32 b = (src & 0x00FF0000) >> 16;
|
||||
return r | ga | b;
|
||||
}
|
||||
|
||||
void FramebufferManagerDX9::MakePixelTexture(const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
|
||||
u8 *convBuf = NULL;
|
||||
D3DLOCKED_RECT rect;
|
||||
|
@ -171,26 +207,40 @@ namespace DX9 {
|
|||
if (srcPixelFormat != GE_FORMAT_8888 || srcStride != 512) {
|
||||
for (int y = 0; y < height; y++) {
|
||||
switch (srcPixelFormat) {
|
||||
// not tested
|
||||
// not tested
|
||||
case GE_FORMAT_565:
|
||||
{
|
||||
const u16_le *src = (const u16_le *)srcPixels + srcStride * y;
|
||||
u32 *dst = (u32 *)(convBuf + rect.Pitch * y);
|
||||
ConvertRGB565ToRGBA888F(dst, src, width);
|
||||
for (int x = 0; x < width; x++) {
|
||||
u16_le col0 = src[x+0];
|
||||
ARGB8From565(col0, &dst[x + 0]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
// faster
|
||||
case GE_FORMAT_5551:
|
||||
{
|
||||
const u16_le *src = (const u16_le *)srcPixels + srcStride * y;
|
||||
u32 *dst = (u32 *)(convBuf + rect.Pitch * y);
|
||||
ConvertRGBA5551ToRGBA8888(dst, src, width);
|
||||
for (int x = 0; x < width; x++) {
|
||||
u16_le col0 = src[x+0];
|
||||
ARGB8From5551(col0, &dst[x + 0]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case GE_FORMAT_4444:
|
||||
{
|
||||
const u16_le *src = (const u16_le *)srcPixels + srcStride * y;
|
||||
u32 *dst = (u32 *)(convBuf + rect.Pitch * y);
|
||||
ConvertRGBA4444ToRGBA8888(dst, src, width);
|
||||
u8 *dst = (u8 *)(convBuf + rect.Pitch * y);
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
u16_le col = src[x];
|
||||
dst[x * 4 + 0] = (col >> 12) << 4;
|
||||
dst[x * 4 + 1] = ((col >> 8) & 0xf) << 4;
|
||||
dst[x * 4 + 2] = ((col >> 4) & 0xf) << 4;
|
||||
dst[x * 4 + 3] = (col & 0xf) << 4;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -198,7 +248,10 @@ namespace DX9 {
|
|||
{
|
||||
const u32_le *src = (const u32_le *)srcPixels + srcStride * y;
|
||||
u32 *dst = (u32 *)(convBuf + rect.Pitch * y);
|
||||
ConvertBGRA8888ToRGBA8888(dst, src, width);
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
dst[x] = RGBA2BGRA(src[x]);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -207,11 +260,15 @@ namespace DX9 {
|
|||
for (int y = 0; y < height; y++) {
|
||||
const u32_le *src = (const u32_le *)srcPixels + srcStride * y;
|
||||
u32 *dst = (u32 *)(convBuf + rect.Pitch * y);
|
||||
ConvertBGRA8888ToRGBA8888(dst, src, width);
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
dst[x] = RGBA2BGRA(src[x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
drawPixelsTex_->UnlockRect(0);
|
||||
// D3DXSaveTextureToFile("game:\\cc.png", D3DXIFF_PNG, drawPixelsTex_, NULL);
|
||||
}
|
||||
|
||||
void FramebufferManagerDX9::DrawPixels(VirtualFramebuffer *vfb, int dstX, int dstY, const u8 *srcPixels, GEBufferFormat srcPixelFormat, int srcStride, int width, int height) {
|
||||
|
@ -607,6 +664,7 @@ namespace DX9 {
|
|||
}
|
||||
|
||||
void FramebufferManagerDX9::CopyDisplayToOutput() {
|
||||
|
||||
fbo_unbind();
|
||||
dxstate.viewport.set(0, 0, PSP_CoreParameter().pixelWidth, PSP_CoreParameter().pixelHeight);
|
||||
currentRenderVfb_ = 0;
|
||||
|
@ -977,7 +1035,9 @@ namespace DX9 {
|
|||
switch (format) {
|
||||
case GE_FORMAT_565: // BGR 565
|
||||
for (u32 y = 0; y < height; ++y) {
|
||||
ConvertBGRA8888ToRGB565(dst16, src32, width);
|
||||
for (u32 x = 0; x < width; ++x) {
|
||||
dst16[x] = BGRA8888toRGB565(src32[x]);
|
||||
}
|
||||
src32 += srcStride;
|
||||
dst16 += dstStride;
|
||||
}
|
||||
|
@ -991,7 +1051,9 @@ namespace DX9 {
|
|||
break;
|
||||
case GE_FORMAT_4444: // ABGR 4444
|
||||
for (u32 y = 0; y < height; ++y) {
|
||||
ConvertBGRA8888ToRGBA4444(dst16, src32, width);
|
||||
for (u32 x = 0; x < width; ++x) {
|
||||
dst16[x] = BGRA8888toRGBA4444(src32[x]);
|
||||
}
|
||||
src32 += srcStride;
|
||||
dst16 += dstStride;
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
#include "helper/dx_state.h"
|
||||
#include "helper/fbo.h"
|
||||
#include "Common/ColorConv.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "GPU/Directx9/FramebufferDX9.h"
|
||||
#include "GPU/Directx9/PixelShaderGeneratorDX9.h"
|
||||
|
|
|
@ -1703,25 +1703,8 @@ void TextureCacheDX9::LoadTextureLevel(TexCacheEntry &entry, int level, int maxL
|
|||
gpuStats.numTexturesDecoded++;
|
||||
|
||||
u32 *pixelData = (u32 *)finalBuf;
|
||||
if (scaleFactor > 1 && (entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
GEBufferFormat dstFormat;
|
||||
switch (dstFmt) {
|
||||
case D3DFMT_A8R8G8B8: dstFormat = GE_FORMAT_8888; break;
|
||||
case D3DFMT_R5G6B5: dstFormat = GE_FORMAT_565; break;
|
||||
case D3DFMT_A4R4G4B4: dstFormat = GE_FORMAT_4444; break;
|
||||
case D3DFMT_A1R5G5B5: dstFormat = GE_FORMAT_5551; break;
|
||||
default: goto dontScale;
|
||||
}
|
||||
scaler.Scale(pixelData, dstFormat, w, h, scaleFactor);
|
||||
switch (dstFormat) {
|
||||
case GE_FORMAT_8888: dstFmt = D3DFMT_A8R8G8B8; break;
|
||||
case GE_FORMAT_565: dstFmt = D3DFMT_R5G6B5; break;
|
||||
case GE_FORMAT_4444: dstFmt = D3DFMT_A4R4G4B4; break;
|
||||
case GE_FORMAT_5551: dstFmt = D3DFMT_A1R5G5B5; break;
|
||||
}
|
||||
dontScale:
|
||||
;
|
||||
}
|
||||
if (scaleFactor > 1 && (entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0)
|
||||
scaler.Scale(pixelData, dstFmt, w, h, scaleFactor);
|
||||
|
||||
if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, w, w, h);
|
||||
|
|
|
@ -24,7 +24,7 @@
|
|||
#include "helper/fbo.h"
|
||||
#include "GPU/GPUInterface.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Common/TextureScaler.h"
|
||||
#include "GPU/Directx9/TextureScalerDX9.h"
|
||||
#include "GPU/Common/TextureCacheCommon.h"
|
||||
|
||||
struct VirtualFramebuffer;
|
||||
|
@ -197,7 +197,7 @@ private:
|
|||
|
||||
bool clearCacheNextFrame_;
|
||||
bool lowMemoryMode_;
|
||||
TextureScaler scaler;
|
||||
TextureScalerDX9 scaler;
|
||||
|
||||
SimpleBuf<u32> tmpTexBuf32;
|
||||
SimpleBuf<u16> tmpTexBuf16;
|
||||
|
|
692
GPU/Directx9/TextureScalerDX9.cpp
Normal file
692
GPU/Directx9/TextureScalerDX9.cpp
Normal file
|
@ -0,0 +1,692 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
// On Visual Studio 2012, include this before anything else so
|
||||
// _VARIADIC_MAX gets set to 10, to avoid std::bind compile errors.
|
||||
// See header file for reasons why.
|
||||
#if defined(_WIN32) && _MSC_VER == 1700
|
||||
#include "../native/base/basictypes.h"
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include "GPU/Directx9/TextureScalerDX9.h"
|
||||
|
||||
#include "Core/Config.h"
|
||||
#include "Common/Common.h"
|
||||
#include "Common/Log.h"
|
||||
#include "Common/MsgHandler.h"
|
||||
#include "Common/CommonFuncs.h"
|
||||
#include "Common/ThreadPools.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "ext/xbrz/xbrz.h"
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <D3D9Types.h>
|
||||
|
||||
#undef min
|
||||
#undef max
|
||||
|
||||
#if _M_SSE >= 0x402
|
||||
#include <nmmintrin.h>
|
||||
#endif
|
||||
|
||||
// Report the time and throughput for each larger scaling operation in the log
|
||||
//#define SCALING_MEASURE_TIME
|
||||
|
||||
#ifdef SCALING_MEASURE_TIME
|
||||
#include "native/base/timeutil.h"
|
||||
#endif
|
||||
|
||||
/////////////////////////////////////// Helper Functions (mostly math for parallelization)
|
||||
|
||||
namespace {
|
||||
//////////////////////////////////////////////////////////////////// Color space conversion
|
||||
|
||||
// convert 4444 image to 8888, parallelizable
|
||||
void convert4444(u16* data, u32* out, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = ((val>> 0) & 0xF) * 17;
|
||||
u32 g = ((val>> 4) & 0xF) * 17;
|
||||
u32 b = ((val>> 8) & 0xF) * 17;
|
||||
u32 a = ((val>>12) & 0xF) * 17;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 565 image to 8888, parallelizable
|
||||
void convert565(u16* data, u32* out, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val ) & 0x1F);
|
||||
u32 g = Convert6To8((val>> 5) & 0x3F);
|
||||
u32 b = Convert5To8((val>>11) & 0x1F);
|
||||
out[y*width + x] = (0xFF << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 5551 image to 8888, parallelizable
|
||||
void convert5551(u16* data, u32* out, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val>> 0) & 0x1F);
|
||||
u32 g = Convert5To8((val>> 5) & 0x1F);
|
||||
u32 b = Convert5To8((val>>10) & 0x1F);
|
||||
u32 a = ((val >> 15) & 0x1) * 255;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////// Various image processing
|
||||
|
||||
#define R(_col) ((_col>> 0)&0xFF)
|
||||
#define G(_col) ((_col>> 8)&0xFF)
|
||||
#define B(_col) ((_col>>16)&0xFF)
|
||||
#define A(_col) ((_col>>24)&0xFF)
|
||||
|
||||
#define DISTANCE(_p1,_p2) ( abs(static_cast<int>(static_cast<int>(R(_p1))-R(_p2))) + abs(static_cast<int>(static_cast<int>(G(_p1))-G(_p2))) \
|
||||
+ abs(static_cast<int>(static_cast<int>(B(_p1))-B(_p2))) + abs(static_cast<int>(static_cast<int>(A(_p1))-A(_p2))) )
|
||||
|
||||
// this is sadly much faster than an inline function with a loop, at least in VC10
|
||||
#define MIX_PIXELS(_p0, _p1, _factors) \
|
||||
( (R(_p0)*(_factors)[0] + R(_p1)*(_factors)[1])/255 << 0 ) | \
|
||||
( (G(_p0)*(_factors)[0] + G(_p1)*(_factors)[1])/255 << 8 ) | \
|
||||
( (B(_p0)*(_factors)[0] + B(_p1)*(_factors)[1])/255 << 16 ) | \
|
||||
( (A(_p0)*(_factors)[0] + A(_p1)*(_factors)[1])/255 << 24 )
|
||||
|
||||
#define BLOCK_SIZE 32
|
||||
|
||||
// 3x3 convolution with Neumann boundary conditions, parallelizable
|
||||
// quite slow, could be sped up a lot
|
||||
// especially handling of separable kernels
|
||||
void convolve3x3(u32* data, u32* out, const int kernel[3][3], int width, int height, int l, int u) {
|
||||
for(int yb = 0; yb < (u-l)/BLOCK_SIZE+1; ++yb) {
|
||||
for(int xb = 0; xb < width/BLOCK_SIZE+1; ++xb) {
|
||||
for(int y = l+yb*BLOCK_SIZE; y < l+(yb+1)*BLOCK_SIZE && y < u; ++y) {
|
||||
for(int x = xb*BLOCK_SIZE; x < (xb+1)*BLOCK_SIZE && x < width; ++x) {
|
||||
int val = 0;
|
||||
for(int yoff = -1; yoff <= 1; ++yoff) {
|
||||
int yy = std::max(std::min(y+yoff, height-1), 0);
|
||||
for(int xoff = -1; xoff <= 1; ++xoff) {
|
||||
int xx = std::max(std::min(x+xoff, width-1), 0);
|
||||
val += data[yy*width + xx] * kernel[yoff+1][xoff+1];
|
||||
}
|
||||
}
|
||||
out[y*width + x] = abs(val);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// deposterization: smoothes posterized gradients from low-color-depth (e.g. 444, 565, compressed) sources
|
||||
void deposterizeH(u32* data, u32* out, int w, int l, int u) {
|
||||
static const int T = 8;
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < w; ++x) {
|
||||
int inpos = y*w + x;
|
||||
u32 center = data[inpos];
|
||||
if(x==0 || x==w-1) {
|
||||
out[y*w + x] = center;
|
||||
continue;
|
||||
}
|
||||
u32 left = data[inpos - 1];
|
||||
u32 right = data[inpos + 1];
|
||||
out[y*w + x] = 0;
|
||||
for(int c=0; c<4; ++c) {
|
||||
u8 lc = (( left>>c*8)&0xFF);
|
||||
u8 cc = ((center>>c*8)&0xFF);
|
||||
u8 rc = (( right>>c*8)&0xFF);
|
||||
if((lc != rc) && ((lc == cc && abs((int)((int)rc)-cc) <= T) || (rc == cc && abs((int)((int)lc)-cc) <= T))) {
|
||||
// blend this component
|
||||
out[y*w + x] |= ((rc+lc)/2) << (c*8);
|
||||
} else {
|
||||
// no change for this component
|
||||
out[y*w + x] |= cc << (c*8);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
void deposterizeV(u32* data, u32* out, int w, int h, int l, int u) {
|
||||
static const int T = 8;
|
||||
for(int xb = 0; xb < w/BLOCK_SIZE+1; ++xb) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = xb*BLOCK_SIZE; x < (xb+1)*BLOCK_SIZE && x < w; ++x) {
|
||||
u32 center = data[ y * w + x];
|
||||
if(y==0 || y==h-1) {
|
||||
out[y*w + x] = center;
|
||||
continue;
|
||||
}
|
||||
u32 upper = data[(y-1) * w + x];
|
||||
u32 lower = data[(y+1) * w + x];
|
||||
out[y*w + x] = 0;
|
||||
for(int c=0; c<4; ++c) {
|
||||
u8 uc = (( upper>>c*8)&0xFF);
|
||||
u8 cc = ((center>>c*8)&0xFF);
|
||||
u8 lc = (( lower>>c*8)&0xFF);
|
||||
if((uc != lc) && ((uc == cc && abs((int)((int)lc)-cc) <= T) || (lc == cc && abs((int)((int)uc)-cc) <= T))) {
|
||||
// blend this component
|
||||
out[y*w + x] |= ((lc+uc)/2) << (c*8);
|
||||
} else {
|
||||
// no change for this component
|
||||
out[y*w + x] |= cc << (c*8);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// generates a distance mask value for each pixel in data
|
||||
// higher values -> larger distance to the surrounding pixels
|
||||
void generateDistanceMask(u32* data, u32* out, int width, int height, int l, int u) {
|
||||
for(int yb = 0; yb < (u-l)/BLOCK_SIZE+1; ++yb) {
|
||||
for(int xb = 0; xb < width/BLOCK_SIZE+1; ++xb) {
|
||||
for(int y = l+yb*BLOCK_SIZE; y < l+(yb+1)*BLOCK_SIZE && y < u; ++y) {
|
||||
for(int x = xb*BLOCK_SIZE; x < (xb+1)*BLOCK_SIZE && x < width; ++x) {
|
||||
out[y*width + x] = 0;
|
||||
u32 center = data[y*width + x];
|
||||
for(int yoff = -1; yoff <= 1; ++yoff) {
|
||||
int yy = y+yoff;
|
||||
if(yy == height || yy == -1) {
|
||||
out[y*width + x] += 1200; // assume distance at borders, usually makes for better result
|
||||
continue;
|
||||
}
|
||||
for(int xoff = -1; xoff <= 1; ++xoff) {
|
||||
if(yoff == 0 && xoff == 0) continue;
|
||||
int xx = x+xoff;
|
||||
if(xx == width || xx == -1) {
|
||||
out[y*width + x] += 400; // assume distance at borders, usually makes for better result
|
||||
continue;
|
||||
}
|
||||
out[y*width + x] += DISTANCE(data[yy*width + xx], center);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// mix two images based on a mask
|
||||
void mix(u32* data, u32* source, u32* mask, u32 maskmax, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
int pos = y*width + x;
|
||||
u8 mixFactors[2] = { 0, static_cast<u8>((std::min(mask[pos], maskmax)*255)/maskmax) };
|
||||
mixFactors[0] = 255-mixFactors[1];
|
||||
data[pos] = MIX_PIXELS(data[pos], source[pos], mixFactors);
|
||||
if(A(source[pos]) == 0) data[pos] = data[pos] & 0x00FFFFFF; // xBRZ always does a better job with hard alpha
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////// Bicubic scaling
|
||||
|
||||
// generate the value of a Mitchell-Netravali scaling spline at distance d, with parameters A and B
|
||||
// B=1 C=0 : cubic B spline (very smooth)
|
||||
// B=C=1/3 : recommended for general upscaling
|
||||
// B=0 C=1/2 : Catmull-Rom spline (sharp, ringing)
|
||||
// see Mitchell & Netravali, "Reconstruction Filters in Computer Graphics"
|
||||
inline float mitchell(float x, float B, float C) {
|
||||
float ax = fabs(x);
|
||||
if(ax>=2.0f) return 0.0f;
|
||||
if(ax>=1.0f) return ((-B-6*C)*(x*x*x) + (6*B+30*C)*(x*x) + (-12*B-48*C)*x + (8*B+24*C))/6.0f;
|
||||
return ((12-9*B-6*C)*(x*x*x) + (-18+12*B+6*C)*(x*x) + (6-2*B))/6.0f;
|
||||
}
|
||||
|
||||
// arrays for pre-calculating weights and sums (~20KB)
|
||||
// Dimensions:
|
||||
// 0: 0 = BSpline, 1 = mitchell
|
||||
// 2: 2-5x scaling
|
||||
// 2,3: 5x5 generated pixels
|
||||
// 4,5: 5x5 pixels sampled from
|
||||
float bicubicWeights[2][4][5][5][5][5];
|
||||
float bicubicInvSums[2][4][5][5];
|
||||
|
||||
// initialize pre-computed weights array
|
||||
void initBicubicWeights() {
|
||||
float B[2] = { 1.0f, 0.334f };
|
||||
float C[2] = { 0.0f, 0.334f };
|
||||
for(int type=0; type<2; ++type) {
|
||||
for(int factor=2; factor<=5; ++factor) {
|
||||
for(int x=0; x<factor; ++x) {
|
||||
for(int y=0; y<factor; ++y) {
|
||||
float sum = 0.0f;
|
||||
for(int sx = -2; sx <= 2; ++sx) {
|
||||
for(int sy = -2; sy <= 2; ++sy) {
|
||||
float dx = (x+0.5f)/factor - (sx+0.5f);
|
||||
float dy = (y+0.5f)/factor - (sy+0.5f);
|
||||
float dist = sqrt(dx*dx + dy*dy);
|
||||
float weight = mitchell(dist, B[type], C[type]);
|
||||
bicubicWeights[type][factor-2][x][y][sx+2][sy+2] = weight;
|
||||
sum += weight;
|
||||
}
|
||||
}
|
||||
bicubicInvSums[type][factor-2][x][y] = 1.0f/sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// perform bicubic scaling by factor f, with precomputed spline type T
|
||||
template<int f, int T>
|
||||
void scaleBicubicT(u32* data, u32* out, int w, int h, int l, int u) {
|
||||
int outw = w*f;
|
||||
for(int yb = 0; yb < (u-l)*f/BLOCK_SIZE+1; ++yb) {
|
||||
for(int xb = 0; xb < w*f/BLOCK_SIZE+1; ++xb) {
|
||||
for(int y = l*f+yb*BLOCK_SIZE; y < l*f+(yb+1)*BLOCK_SIZE && y < u*f; ++y) {
|
||||
for(int x = xb*BLOCK_SIZE; x < (xb+1)*BLOCK_SIZE && x < w*f; ++x) {
|
||||
float r = 0.0f, g = 0.0f, b = 0.0f, a = 0.0f;
|
||||
int cx = x/f, cy = y/f;
|
||||
// sample supporting pixels in original image
|
||||
for(int sx = -2; sx <= 2; ++sx) {
|
||||
for(int sy = -2; sy <= 2; ++sy) {
|
||||
float weight = bicubicWeights[T][f-2][x%f][y%f][sx+2][sy+2];
|
||||
if(weight != 0.0f) {
|
||||
// clamp pixel locations
|
||||
int csy = std::max(std::min(sy+cy,h-1),0);
|
||||
int csx = std::max(std::min(sx+cx,w-1),0);
|
||||
// sample & add weighted components
|
||||
u32 sample = data[csy*w+csx];
|
||||
r += weight*R(sample);
|
||||
g += weight*G(sample);
|
||||
b += weight*B(sample);
|
||||
a += weight*A(sample);
|
||||
}
|
||||
}
|
||||
}
|
||||
// generate and write result
|
||||
float invSum = bicubicInvSums[T][f-2][x%f][y%f];
|
||||
int ri = std::min(std::max(static_cast<int>(ceilf(r*invSum)),0),255);
|
||||
int gi = std::min(std::max(static_cast<int>(ceilf(g*invSum)),0),255);
|
||||
int bi = std::min(std::max(static_cast<int>(ceilf(b*invSum)),0),255);
|
||||
int ai = std::min(std::max(static_cast<int>(ceilf(a*invSum)),0),255);
|
||||
out[y*outw + x] = (ai << 24) | (bi << 16) | (gi << 8) | ri;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#if _M_SSE >= 0x401
|
||||
template<int f, int T>
|
||||
void scaleBicubicTSSE41(u32* data, u32* out, int w, int h, int l, int u) {
|
||||
int outw = w*f;
|
||||
for(int yb = 0; yb < (u-l)*f/BLOCK_SIZE+1; ++yb) {
|
||||
for(int xb = 0; xb < w*f/BLOCK_SIZE+1; ++xb) {
|
||||
for(int y = l*f+yb*BLOCK_SIZE; y < l*f+(yb+1)*BLOCK_SIZE && y < u*f; ++y) {
|
||||
for(int x = xb*BLOCK_SIZE; x < (xb+1)*BLOCK_SIZE && x < w*f; ++x) {
|
||||
__m128 result = _mm_set1_ps(0.0f);
|
||||
int cx = x/f, cy = y/f;
|
||||
// sample supporting pixels in original image
|
||||
for(int sx = -2; sx <= 2; ++sx) {
|
||||
for(int sy = -2; sy <= 2; ++sy) {
|
||||
float weight = bicubicWeights[T][f-2][x%f][y%f][sx+2][sy+2];
|
||||
if(weight != 0.0f) {
|
||||
// clamp pixel locations
|
||||
int csy = std::max(std::min(sy+cy,h-1),0);
|
||||
int csx = std::max(std::min(sx+cx,w-1),0);
|
||||
// sample & add weighted components
|
||||
__m128i sample = _mm_cvtsi32_si128(data[csy*w+csx]);
|
||||
sample = _mm_cvtepu8_epi32(sample);
|
||||
__m128 col = _mm_cvtepi32_ps(sample);
|
||||
col = _mm_mul_ps(col, _mm_set1_ps(weight));
|
||||
result = _mm_add_ps(result, col);
|
||||
}
|
||||
}
|
||||
}
|
||||
// generate and write result
|
||||
__m128i pixel = _mm_cvtps_epi32(_mm_mul_ps(result, _mm_set1_ps(bicubicInvSums[T][f-2][x%f][y%f])));
|
||||
pixel = _mm_packs_epi32(pixel, pixel);
|
||||
pixel = _mm_packus_epi16(pixel, pixel);
|
||||
out[y*outw + x] = _mm_cvtsi128_si32(pixel);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void scaleBicubicBSpline(int factor, u32* data, u32* out, int w, int h, int l, int u) {
|
||||
#if _M_SSE >= 0x401
|
||||
if(cpu_info.bSSE4_1) {
|
||||
switch(factor) {
|
||||
case 2: scaleBicubicTSSE41<2, 0>(data, out, w, h, l, u); break; // when I first tested this,
|
||||
case 3: scaleBicubicTSSE41<3, 0>(data, out, w, h, l, u); break; // it was even slower than I had expected
|
||||
case 4: scaleBicubicTSSE41<4, 0>(data, out, w, h, l, u); break; // turns out I had not included
|
||||
case 5: scaleBicubicTSSE41<5, 0>(data, out, w, h, l, u); break; // any of these break statements
|
||||
default: ERROR_LOG(G3D, "Bicubic upsampling only implemented for factors 2 to 5");
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
switch(factor) {
|
||||
case 2: scaleBicubicT<2, 0>(data, out, w, h, l, u); break; // when I first tested this,
|
||||
case 3: scaleBicubicT<3, 0>(data, out, w, h, l, u); break; // it was even slower than I had expected
|
||||
case 4: scaleBicubicT<4, 0>(data, out, w, h, l, u); break; // turns out I had not included
|
||||
case 5: scaleBicubicT<5, 0>(data, out, w, h, l, u); break; // any of these break statements
|
||||
default: ERROR_LOG(G3D, "Bicubic upsampling only implemented for factors 2 to 5");
|
||||
}
|
||||
#if _M_SSE >= 0x401
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void scaleBicubicMitchell(int factor, u32* data, u32* out, int w, int h, int l, int u) {
|
||||
#if _M_SSE >= 0x401
|
||||
if(cpu_info.bSSE4_1) {
|
||||
switch(factor) {
|
||||
case 2: scaleBicubicTSSE41<2, 1>(data, out, w, h, l, u); break;
|
||||
case 3: scaleBicubicTSSE41<3, 1>(data, out, w, h, l, u); break;
|
||||
case 4: scaleBicubicTSSE41<4, 1>(data, out, w, h, l, u); break;
|
||||
case 5: scaleBicubicTSSE41<5, 1>(data, out, w, h, l, u); break;
|
||||
default: ERROR_LOG(G3D, "Bicubic upsampling only implemented for factors 2 to 5");
|
||||
}
|
||||
} else {
|
||||
#endif
|
||||
switch(factor) {
|
||||
case 2: scaleBicubicT<2, 1>(data, out, w, h, l, u); break;
|
||||
case 3: scaleBicubicT<3, 1>(data, out, w, h, l, u); break;
|
||||
case 4: scaleBicubicT<4, 1>(data, out, w, h, l, u); break;
|
||||
case 5: scaleBicubicT<5, 1>(data, out, w, h, l, u); break;
|
||||
default: ERROR_LOG(G3D, "Bicubic upsampling only implemented for factors 2 to 5");
|
||||
}
|
||||
#if _M_SSE >= 0x401
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////// Bilinear scaling
|
||||
|
||||
const static u8 BILINEAR_FACTORS[4][3][2] = {
|
||||
{ { 44,211}, { 0, 0}, { 0, 0} }, // x2
|
||||
{ { 64,191}, { 0,255}, { 0, 0} }, // x3
|
||||
{ { 77,178}, { 26,229}, { 0, 0} }, // x4
|
||||
{ {102,153}, { 51,204}, { 0,255} }, // x5
|
||||
};
|
||||
// integral bilinear upscaling by factor f, horizontal part
|
||||
template<int f>
|
||||
void bilinearHt(u32* data, u32* out, int w, int l, int u) {
|
||||
static_assert(f>1 && f<=5, "Bilinear scaling only implemented for factors 2 to 5");
|
||||
int outw = w*f;
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < w; ++x) {
|
||||
int inpos = y*w + x;
|
||||
u32 left = data[inpos - (x==0 ?0:1)];
|
||||
u32 center = data[inpos];
|
||||
u32 right = data[inpos + (x==w-1?0:1)];
|
||||
int i=0;
|
||||
for(; i<f/2+f%2; ++i) { // first half of the new pixels + center, hope the compiler unrolls this
|
||||
out[y*outw + x*f + i] = MIX_PIXELS(left, center, BILINEAR_FACTORS[f-2][i]);
|
||||
}
|
||||
for(; i<f ; ++i) { // second half of the new pixels, hope the compiler unrolls this
|
||||
out[y*outw + x*f + i] = MIX_PIXELS(right, center, BILINEAR_FACTORS[f-2][f-1-i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
void bilinearH(int factor, u32* data, u32* out, int w, int l, int u) {
|
||||
switch(factor) {
|
||||
case 2: bilinearHt<2>(data, out, w, l, u); break;
|
||||
case 3: bilinearHt<3>(data, out, w, l, u); break;
|
||||
case 4: bilinearHt<4>(data, out, w, l, u); break;
|
||||
case 5: bilinearHt<5>(data, out, w, l, u); break;
|
||||
default: ERROR_LOG(G3D, "Bilinear upsampling only implemented for factors 2 to 5");
|
||||
}
|
||||
}
|
||||
// integral bilinear upscaling by factor f, vertical part
|
||||
// gl/gu == global lower and upper bound
|
||||
template<int f>
|
||||
void bilinearVt(u32* data, u32* out, int w, int gl, int gu, int l, int u) {
|
||||
static_assert(f>1 && f<=5, "Bilinear scaling only implemented for 2x, 3x, 4x, and 5x");
|
||||
int outw = w*f;
|
||||
for(int xb = 0; xb < outw/BLOCK_SIZE+1; ++xb) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
u32 uy = y - (y==gl ?0:1);
|
||||
u32 ly = y + (y==gu-1?0:1);
|
||||
for(int x = xb*BLOCK_SIZE; x < (xb+1)*BLOCK_SIZE && x < outw; ++x) {
|
||||
u32 upper = data[uy * outw + x];
|
||||
u32 center = data[y * outw + x];
|
||||
u32 lower = data[ly * outw + x];
|
||||
int i=0;
|
||||
for(; i<f/2+f%2; ++i) { // first half of the new pixels + center, hope the compiler unrolls this
|
||||
out[(y*f + i)*outw + x] = MIX_PIXELS(upper, center, BILINEAR_FACTORS[f-2][i]);
|
||||
}
|
||||
for(; i<f ; ++i) { // second half of the new pixels, hope the compiler unrolls this
|
||||
out[(y*f + i)*outw + x] = MIX_PIXELS(lower, center, BILINEAR_FACTORS[f-2][f-1-i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
void bilinearV(int factor, u32* data, u32* out, int w, int gl, int gu, int l, int u) {
|
||||
switch(factor) {
|
||||
case 2: bilinearVt<2>(data, out, w, gl, gu, l, u); break;
|
||||
case 3: bilinearVt<3>(data, out, w, gl, gu, l, u); break;
|
||||
case 4: bilinearVt<4>(data, out, w, gl, gu, l, u); break;
|
||||
case 5: bilinearVt<5>(data, out, w, gl, gu, l, u); break;
|
||||
default: ERROR_LOG(G3D, "Bilinear upsampling only implemented for factors 2 to 5");
|
||||
}
|
||||
}
|
||||
|
||||
#undef BLOCK_SIZE
|
||||
#undef MIX_PIXELS
|
||||
#undef DISTANCE
|
||||
#undef R
|
||||
#undef G
|
||||
#undef B
|
||||
#undef A
|
||||
|
||||
// used for debugging texture scaling (writing textures to files)
|
||||
static int g_imgCount = 0;
|
||||
void dbgPPM(int w, int h, u8* pixels, const char* prefix = "dbg") { // 3 component RGB
|
||||
char fn[32];
|
||||
snprintf(fn, 32, "%s%04d.ppm", prefix, g_imgCount++);
|
||||
FILE *fp = fopen(fn, "wb");
|
||||
fprintf(fp, "P6\n%d %d\n255\n", w, h);
|
||||
for(int j = 0; j < h; ++j) {
|
||||
for(int i = 0; i < w; ++i) {
|
||||
static unsigned char color[3];
|
||||
color[0] = pixels[(j*w+i)*4+0]; /* red */
|
||||
color[1] = pixels[(j*w+i)*4+1]; /* green */
|
||||
color[2] = pixels[(j*w+i)*4+2]; /* blue */
|
||||
fwrite(color, 1, 3, fp);
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
void dbgPGM(int w, int h, u32* pixels, const char* prefix = "dbg") { // 1 component
|
||||
char fn[32];
|
||||
snprintf(fn, 32, "%s%04d.pgm", prefix, g_imgCount++);
|
||||
FILE *fp = fopen(fn, "wb");
|
||||
fprintf(fp, "P5\n%d %d\n65536\n", w, h);
|
||||
for(int j = 0; j < h; ++j) {
|
||||
for(int i = 0; i < w; ++i) {
|
||||
fwrite((pixels+(j*w+i)), 1, 2, fp);
|
||||
}
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////// Texture Scaler
|
||||
|
||||
namespace DX9 {
|
||||
|
||||
TextureScalerDX9::TextureScalerDX9() {
|
||||
initBicubicWeights();
|
||||
}
|
||||
|
||||
bool TextureScalerDX9::IsEmptyOrFlat(u32* data, int pixels, u32 fmt) {
|
||||
int pixelsPerWord = (fmt == D3DFMT_A8R8G8B8) ? 1 : 2;
|
||||
u32 ref = data[0];
|
||||
for(int i=0; i<pixels/pixelsPerWord; ++i) {
|
||||
if(data[i]!=ref) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void TextureScalerDX9::Scale(u32* &data, u32 &dstFmt, int &width, int &height, int factor) {
|
||||
// prevent processing empty or flat textures (this happens a lot in some games)
|
||||
// doesn't hurt the standard case, will be very quick for textures with actual texture
|
||||
if(IsEmptyOrFlat(data, width*height, dstFmt)) {
|
||||
INFO_LOG(G3D, "TextureScaler: early exit -- empty/flat texture");
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef SCALING_MEASURE_TIME
|
||||
double t_start = real_time_now();
|
||||
#endif
|
||||
|
||||
bufInput.resize(width*height); // used to store the input image image if it needs to be reformatted
|
||||
bufOutput.resize(width*height*factor*factor); // used to store the upscaled image
|
||||
u32 *inputBuf = bufInput.data();
|
||||
u32 *outputBuf = bufOutput.data();
|
||||
|
||||
// convert texture to correct format for scaling
|
||||
ConvertTo8888(dstFmt, data, inputBuf, width, height);
|
||||
|
||||
// deposterize
|
||||
if(g_Config.bTexDeposterize) {
|
||||
bufDeposter.resize(width*height);
|
||||
DePosterize(inputBuf, bufDeposter.data(), width, height);
|
||||
inputBuf = bufDeposter.data();
|
||||
}
|
||||
|
||||
// scale
|
||||
switch(g_Config.iTexScalingType) {
|
||||
case XBRZ:
|
||||
ScaleXBRZ(factor, inputBuf, outputBuf, width, height);
|
||||
break;
|
||||
case HYBRID:
|
||||
ScaleHybrid(factor, inputBuf, outputBuf, width, height);
|
||||
break;
|
||||
case BICUBIC:
|
||||
ScaleBicubicMitchell(factor, inputBuf, outputBuf, width, height);
|
||||
break;
|
||||
case HYBRID_BICUBIC:
|
||||
ScaleHybrid(factor, inputBuf, outputBuf, width, height, true);
|
||||
break;
|
||||
default:
|
||||
ERROR_LOG(G3D, "Unknown scaling type: %d", g_Config.iTexScalingType);
|
||||
}
|
||||
|
||||
// update values accordingly
|
||||
data = outputBuf;
|
||||
dstFmt = D3DFMT_A8R8G8B8;
|
||||
width *= factor;
|
||||
height *= factor;
|
||||
|
||||
#ifdef SCALING_MEASURE_TIME
|
||||
if(width*height > 64*64*factor*factor) {
|
||||
double t = real_time_now() - t_start;
|
||||
NOTICE_LOG(MASTER_LOG, "TextureScaler: processed %9d pixels in %6.5lf seconds. (%9.2lf Mpixels/second)",
|
||||
width*height, t, (width*height)/(t*1000*1000));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void TextureScalerDX9::ScaleXBRZ(int factor, u32* source, u32* dest, int width, int height) {
|
||||
xbrz::ScalerCfg cfg;
|
||||
GlobalThreadPool::Loop(std::bind(&xbrz::scale, factor, source, dest, width, height, xbrz::ColorFormat::ARGB, cfg, placeholder::_1, placeholder::_2), 0, height);
|
||||
}
|
||||
|
||||
void TextureScalerDX9::ScaleBilinear(int factor, u32* source, u32* dest, int width, int height) {
|
||||
bufTmp1.resize(width*height*factor);
|
||||
u32 *tmpBuf = bufTmp1.data();
|
||||
GlobalThreadPool::Loop(std::bind(&bilinearH, factor, source, tmpBuf, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
GlobalThreadPool::Loop(std::bind(&bilinearV, factor, tmpBuf, dest, width, 0, height, placeholder::_1, placeholder::_2), 0, height);
|
||||
}
|
||||
|
||||
void TextureScalerDX9::ScaleBicubicBSpline(int factor, u32* source, u32* dest, int width, int height) {
|
||||
GlobalThreadPool::Loop(std::bind(&scaleBicubicBSpline, factor, source, dest, width, height, placeholder::_1, placeholder::_2), 0, height);
|
||||
}
|
||||
|
||||
void TextureScalerDX9::ScaleBicubicMitchell(int factor, u32* source, u32* dest, int width, int height) {
|
||||
GlobalThreadPool::Loop(std::bind(&scaleBicubicMitchell, factor, source, dest, width, height, placeholder::_1, placeholder::_2), 0, height);
|
||||
}
|
||||
|
||||
void TextureScalerDX9::ScaleHybrid(int factor, u32* source, u32* dest, int width, int height, bool bicubic) {
|
||||
// Basic algorithm:
|
||||
// 1) determine a feature mask C based on a sobel-ish filter + splatting, and upscale that mask bilinearly
|
||||
// 2) generate 2 scaled images: A - using Bilinear filtering, B - using xBRZ
|
||||
// 3) output = A*C + B*(1-C)
|
||||
|
||||
const static int KERNEL_SPLAT[3][3] = {
|
||||
{ 1, 1, 1 }, { 1, 1, 1 }, { 1, 1, 1 }
|
||||
};
|
||||
|
||||
bufTmp1.resize(width*height);
|
||||
bufTmp2.resize(width*height*factor*factor);
|
||||
bufTmp3.resize(width*height*factor*factor);
|
||||
GlobalThreadPool::Loop(std::bind(&generateDistanceMask, source, bufTmp1.data(), width, height, placeholder::_1, placeholder::_2), 0, height);
|
||||
GlobalThreadPool::Loop(std::bind(&convolve3x3, bufTmp1.data(), bufTmp2.data(), KERNEL_SPLAT, width, height, placeholder::_1, placeholder::_2), 0, height);
|
||||
ScaleBilinear(factor, bufTmp2.data(), bufTmp3.data(), width, height);
|
||||
// mask C is now in bufTmp3
|
||||
|
||||
ScaleXBRZ(factor, source, bufTmp2.data(), width, height);
|
||||
// xBRZ upscaled source is in bufTmp2
|
||||
|
||||
if(bicubic) ScaleBicubicBSpline(factor, source, dest, width, height);
|
||||
else ScaleBilinear(factor, source, dest, width, height);
|
||||
// Upscaled source is in dest
|
||||
|
||||
// Now we can mix it all together
|
||||
// The factor 8192 was found through practical testing on a variety of textures
|
||||
GlobalThreadPool::Loop(std::bind(&mix, dest, bufTmp2.data(), bufTmp3.data(), 8192, width*factor, placeholder::_1, placeholder::_2), 0, height*factor);
|
||||
}
|
||||
|
||||
void TextureScalerDX9::DePosterize(u32* source, u32* dest, int width, int height) {
|
||||
bufTmp3.resize(width*height);
|
||||
GlobalThreadPool::Loop(std::bind(&deposterizeH, source, bufTmp3.data(), width, placeholder::_1, placeholder::_2), 0, height);
|
||||
GlobalThreadPool::Loop(std::bind(&deposterizeV, bufTmp3.data(), dest, width, height, placeholder::_1, placeholder::_2), 0, height);
|
||||
GlobalThreadPool::Loop(std::bind(&deposterizeH, dest, bufTmp3.data(), width, placeholder::_1, placeholder::_2), 0, height);
|
||||
GlobalThreadPool::Loop(std::bind(&deposterizeV, bufTmp3.data(), dest, width, height, placeholder::_1, placeholder::_2), 0, height);
|
||||
}
|
||||
|
||||
void TextureScalerDX9::ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height) {
|
||||
switch(format) {
|
||||
case D3DFMT_A8R8G8B8:
|
||||
dest = source; // already fine
|
||||
break;
|
||||
|
||||
case D3DFMT_A4R4G4B4:
|
||||
GlobalThreadPool::Loop(std::bind(&convert4444, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
break;
|
||||
|
||||
case D3DFMT_R5G6B5:
|
||||
GlobalThreadPool::Loop(std::bind(&convert565, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
break;
|
||||
|
||||
case D3DFMT_A1R5G5B5:
|
||||
GlobalThreadPool::Loop(std::bind(&convert5551, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
break;
|
||||
|
||||
default:
|
||||
dest = source;
|
||||
ERROR_LOG(G3D, "iXBRZTexScaling: unsupported texture format");
|
||||
}
|
||||
}
|
||||
|
||||
};
|
56
GPU/Directx9/TextureScalerDX9.h
Normal file
56
GPU/Directx9/TextureScalerDX9.h
Normal file
|
@ -0,0 +1,56 @@
|
|||
// Copyright (c) 2012- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
// the Free Software Foundation, version 2.0 or later versions.
|
||||
|
||||
// This program is distributed in the hope that it will be useful,
|
||||
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
// GNU General Public License 2.0 for more details.
|
||||
|
||||
// A copy of the GPL 2.0 should have been included with the program.
|
||||
// If not, see http://www.gnu.org/licenses/
|
||||
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "Common/MemoryUtil.h"
|
||||
#include "../Globals.h"
|
||||
#include "helper/global.h"
|
||||
//#include "gfx/gl_common.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace DX9 {
|
||||
|
||||
|
||||
class TextureScalerDX9 {
|
||||
public:
|
||||
TextureScalerDX9();
|
||||
|
||||
void Scale(u32* &data, u32 &dstfmt, int &width, int &height, int factor);
|
||||
|
||||
enum { XBRZ= 0, HYBRID = 1, BICUBIC = 2, HYBRID_BICUBIC = 3 };
|
||||
|
||||
private:
|
||||
void ScaleXBRZ(int factor, u32* source, u32* dest, int width, int height);
|
||||
void ScaleBilinear(int factor, u32* source, u32* dest, int width, int height);
|
||||
void ScaleBicubicBSpline(int factor, u32* source, u32* dest, int width, int height);
|
||||
void ScaleBicubicMitchell(int factor, u32* source, u32* dest, int width, int height);
|
||||
void ScaleHybrid(int factor, u32* source, u32* dest, int width, int height, bool bicubic = false);
|
||||
void ConvertTo8888(u32 format, u32* source, u32* &dest, int width, int height);
|
||||
|
||||
void DePosterize(u32* source, u32* dest, int width, int height);
|
||||
|
||||
bool IsEmptyOrFlat(u32* data, int pixels, u32 fmt);
|
||||
|
||||
// depending on the factor and texture sizes, these can get pretty large
|
||||
// maximum is (100 MB total for a 512 by 512 texture with scaling factor 5 and hybrid scaling)
|
||||
// of course, scaling factor 5 is totally silly anyway
|
||||
SimpleBuf<u32> bufInput, bufDeposter, bufOutput, bufTmp1, bufTmp2, bufTmp3;
|
||||
};
|
||||
|
||||
};
|
|
@ -31,7 +31,6 @@
|
|||
#include "Core/System.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "Core/HLE/sceDisplay.h"
|
||||
#include "Common/ColorConv.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/GPUState.h"
|
||||
|
||||
|
@ -97,6 +96,22 @@ static const char color_vs[] =
|
|||
" gl_Position = a_position;\n"
|
||||
"}\n";
|
||||
|
||||
inline u16 RGBA8888toRGB565(u32 px) {
|
||||
return ((px >> 3) & 0x001F) | ((px >> 5) & 0x07E0) | ((px >> 8) & 0xF800);
|
||||
}
|
||||
|
||||
inline u16 RGBA8888toRGBA4444(u32 px) {
|
||||
return ((px >> 4) & 0x000F) | ((px >> 8) & 0x00F0) | ((px >> 12) & 0x0F00) | ((px >> 16) & 0xF000);
|
||||
}
|
||||
|
||||
inline u16 BGRA8888toRGB565(u32 px) {
|
||||
return ((px >> 19) & 0x001F) | ((px >> 5) & 0x07E0) | ((px << 8) & 0xF800);
|
||||
}
|
||||
|
||||
inline u16 BGRA8888toRGBA4444(u32 px) {
|
||||
return ((px >> 20) & 0x000F) | ((px >> 8) & 0x00F0) | ((px << 4) & 0x0F00) | ((px >> 16) & 0xF000);
|
||||
}
|
||||
|
||||
void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format);
|
||||
|
||||
void CenterRect(float *x, float *y, float *w, float *h,
|
||||
|
@ -406,7 +421,14 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
|
|||
{
|
||||
const u16 *src = (const u16 *)srcPixels + srcStride * y;
|
||||
u8 *dst = convBuf_ + 4 * width * y;
|
||||
ConvertRGB565ToRGBA888F((u32 *)dst, src, width);
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
u16 col = src[x];
|
||||
dst[x * 4] = Convert5To8((col) & 0x1f);
|
||||
dst[x * 4 + 1] = Convert6To8((col >> 5) & 0x3f);
|
||||
dst[x * 4 + 2] = Convert5To8((col >> 11) & 0x1f);
|
||||
dst[x * 4 + 3] = 255;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -414,7 +436,14 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
|
|||
{
|
||||
const u16 *src = (const u16 *)srcPixels + srcStride * y;
|
||||
u8 *dst = convBuf_ + 4 * width * y;
|
||||
ConvertRGBA5551ToRGBA8888((u32 *)dst, src, width);
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
u16 col = src[x];
|
||||
dst[x * 4] = Convert5To8((col) & 0x1f);
|
||||
dst[x * 4 + 1] = Convert5To8((col >> 5) & 0x1f);
|
||||
dst[x * 4 + 2] = Convert5To8((col >> 10) & 0x1f);
|
||||
dst[x * 4 + 3] = (col >> 15) ? 255 : 0;
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -422,7 +451,14 @@ void FramebufferManager::MakePixelTexture(const u8 *srcPixels, GEBufferFormat sr
|
|||
{
|
||||
const u16 *src = (const u16 *)srcPixels + srcStride * y;
|
||||
u8 *dst = convBuf_ + 4 * width * y;
|
||||
ConvertRGBA4444ToRGBA8888((u32 *)dst, src, width);
|
||||
for (int x = 0; x < width; x++)
|
||||
{
|
||||
u16 col = src[x];
|
||||
dst[x * 4] = Convert4To8((col >> 8) & 0xf);
|
||||
dst[x * 4 + 1] = Convert4To8((col >> 4) & 0xf);
|
||||
dst[x * 4 + 2] = Convert4To8(col & 0xf);
|
||||
dst[x * 4 + 3] = Convert4To8(col >> 12);
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
|
@ -1325,6 +1361,7 @@ void FramebufferManager::BlitFramebuffer(VirtualFramebuffer *dst, int dstX, int
|
|||
void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u32 width, u32 height, GEBufferFormat format) {
|
||||
// Must skip stride in the cases below. Some games pack data into the cracks, like MotoGP.
|
||||
const u32 *src32 = (const u32 *)src;
|
||||
|
||||
if (format == GE_FORMAT_8888) {
|
||||
u32 *dst32 = (u32 *)dst;
|
||||
if (src == dst) {
|
||||
|
@ -1350,13 +1387,17 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u
|
|||
case GE_FORMAT_565: // BGR 565
|
||||
if (UseBGRA8888()) {
|
||||
for (u32 y = 0; y < height; ++y) {
|
||||
ConvertBGRA8888ToRGB565(dst16, src32, width);
|
||||
for (u32 x = 0; x < width; ++x) {
|
||||
dst16[x] = BGRA8888toRGB565(src32[x]);
|
||||
}
|
||||
src32 += srcStride;
|
||||
dst16 += dstStride;
|
||||
}
|
||||
} else {
|
||||
for (u32 y = 0; y < height; ++y) {
|
||||
ConvertRGBA8888ToRGB565(dst16, src32, width);
|
||||
for (u32 x = 0; x < width; ++x) {
|
||||
dst16[x] = RGBA8888toRGB565(src32[x]);
|
||||
}
|
||||
src32 += srcStride;
|
||||
dst16 += dstStride;
|
||||
}
|
||||
|
@ -1380,13 +1421,17 @@ void ConvertFromRGBA8888(u8 *dst, const u8 *src, u32 dstStride, u32 srcStride, u
|
|||
case GE_FORMAT_4444: // ABGR 4444
|
||||
if (UseBGRA8888()) {
|
||||
for (u32 y = 0; y < height; ++y) {
|
||||
ConvertBGRA8888ToRGBA4444(dst16, src32, width);
|
||||
for (u32 x = 0; x < width; ++x) {
|
||||
dst16[x] = BGRA8888toRGBA4444(src32[x]);
|
||||
}
|
||||
src32 += srcStride;
|
||||
dst16 += dstStride;
|
||||
}
|
||||
} else {
|
||||
for (u32 y = 0; y < height; ++y) {
|
||||
ConvertRGBA8888ToRGBA4444(dst16, src32, width);
|
||||
for (u32 x = 0; x < width; ++x) {
|
||||
dst16[x] = RGBA8888toRGBA4444(src32[x]);
|
||||
}
|
||||
src32 += srcStride;
|
||||
dst16 += dstStride;
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
|
||||
#include "gfx_es2/glsl_program.h"
|
||||
#include "gfx_es2/gl_state.h"
|
||||
#include "Common/ColorConv.h"
|
||||
#include "Core/Reporting.h"
|
||||
#include "GPU/GLES/Framebuffer.h"
|
||||
#include "GPU/GLES/ShaderManager.h"
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
#include <algorithm>
|
||||
#include <cstring>
|
||||
|
||||
#include "Common/ColorConv.h"
|
||||
#include "Core/Host.h"
|
||||
#include "Core/MemMap.h"
|
||||
#include "Core/Reporting.h"
|
||||
|
@ -2002,22 +2001,8 @@ void TextureCache::LoadTextureLevel(TexCacheEntry &entry, int level, bool replac
|
|||
bool useBGRA = UseBGRA8888() && dstFmt == GL_UNSIGNED_BYTE;
|
||||
|
||||
u32 *pixelData = (u32 *)finalBuf;
|
||||
if (scaleFactor > 1 && (entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
GEBufferFormat dstFormat;
|
||||
switch (dstFmt) {
|
||||
case GL_UNSIGNED_BYTE: dstFormat = GE_FORMAT_8888; break;
|
||||
case GL_UNSIGNED_SHORT_4_4_4_4: dstFormat = GE_FORMAT_4444; break;
|
||||
case GL_UNSIGNED_SHORT_5_6_5: dstFormat = GE_FORMAT_565; break;
|
||||
case GL_UNSIGNED_SHORT_5_5_5_1: dstFormat = GE_FORMAT_5551; break;
|
||||
}
|
||||
scaler.Scale(pixelData, dstFormat, w, h, scaleFactor);
|
||||
switch (dstFormat) {
|
||||
case GE_FORMAT_565: dstFmt = GL_UNSIGNED_SHORT_5_6_5; break;
|
||||
case GE_FORMAT_5551: dstFmt = GL_UNSIGNED_SHORT_5_5_5_1; break;
|
||||
case GE_FORMAT_4444: dstFmt = GL_UNSIGNED_SHORT_4_4_4_4; break;
|
||||
case GE_FORMAT_8888: dstFmt = GL_UNSIGNED_BYTE; break;
|
||||
}
|
||||
}
|
||||
if (scaleFactor > 1 && (entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0)
|
||||
scaler.Scale(pixelData, dstFmt, w, h, scaleFactor);
|
||||
|
||||
if ((entry.status & TexCacheEntry::STATUS_CHANGE_FREQUENT) == 0) {
|
||||
TexCacheEntry::Status alphaStatus = CheckAlpha(pixelData, dstFmt, useUnpack ? bufw : w, w, h);
|
||||
|
@ -2080,33 +2065,47 @@ bool TextureCache::DecodeTexture(u8* output, const GPUgstate &state) {
|
|||
|
||||
switch (dstFmt) {
|
||||
case GL_UNSIGNED_SHORT_4_4_4_4:
|
||||
for (int y = 0; y < h; y++) {
|
||||
u16 *src = (u16*)finalBuf + y*bufw;
|
||||
u32 *dst = (u32*)output + y*w;
|
||||
ConvertRGBA4444ToRGBA8888(dst, src, bufw);
|
||||
}
|
||||
for (int y = 0; y < h; y++)
|
||||
for (int x = 0; x < bufw; x++) {
|
||||
u32 val = ((u16*)finalBuf)[y*bufw + x];
|
||||
u32 r = ((val>>12) & 0xF) * 17;
|
||||
u32 g = ((val>> 8) & 0xF) * 17;
|
||||
u32 b = ((val>> 4) & 0xF) * 17;
|
||||
u32 a = ((val>> 0) & 0xF) * 17;
|
||||
((u32*)output)[y*w + x] = (a << 24) | (r << 16) | (g << 8) | b;
|
||||
}
|
||||
break;
|
||||
|
||||
case GL_UNSIGNED_SHORT_5_5_5_1:
|
||||
for (int y = 0; y < h; y++) {
|
||||
u16 *src = (u16*)finalBuf + y*bufw;
|
||||
u32 *dst = (u32*)output + y*w;
|
||||
ConvertRGBA5551ToRGBA8888(dst, src, bufw);
|
||||
}
|
||||
for (int y = 0; y < h; y++)
|
||||
for (int x = 0; x < bufw; x++) {
|
||||
u32 val = ((u16*)finalBuf)[y*bufw + x];
|
||||
u32 r = Convert5To8((val>>11) & 0x1F);
|
||||
u32 g = Convert5To8((val>> 6) & 0x1F);
|
||||
u32 b = Convert5To8((val>> 1) & 0x1F);
|
||||
u32 a = (val & 0x1) * 255;
|
||||
((u32*)output)[y*w + x] = (a << 24) | (r << 16) | (g << 8) | b;
|
||||
}
|
||||
break;
|
||||
|
||||
case GL_UNSIGNED_SHORT_5_6_5:
|
||||
for (int y = 0; y < h; y++) {
|
||||
u16 *src = (u16*)finalBuf + y*bufw;
|
||||
u32 *dst = (u32*)output + y*w;
|
||||
ConvertRGB565ToRGBA888F(dst, src, bufw);
|
||||
}
|
||||
for (int y = 0; y < h; y++)
|
||||
for (int x = 0; x < bufw; x++) {
|
||||
u32 val = ((u16*)finalBuf)[y*bufw + x];
|
||||
u32 a = 0xFF;
|
||||
u32 r = Convert5To8((val>>11) & 0x1F);
|
||||
u32 g = Convert6To8((val>> 5) & 0x3F);
|
||||
u32 b = Convert5To8((val ) & 0x1F);
|
||||
((u32*)output)[y*w + x] = (a << 24) | (r << 16) | (g << 8) | b;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
for (int y = 0; y < h; y++) {
|
||||
ConvertBGRA8888ToRGBA8888((u32 *)output + y * w, (u32 *)(finalBuf)+y * bufw, bufw);
|
||||
}
|
||||
for (int y = 0; y < h; y++)
|
||||
for (int x = 0; x < bufw; x++) {
|
||||
u32 val = ((u32*)finalBuf)[y*bufw + x];
|
||||
((u32*)output)[y*w + x] = ((val & 0xFF000000)) | ((val & 0x00FF0000)>>16) | ((val & 0x0000FF00)) | ((val & 0x000000FF)<<16);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@
|
|||
#include "Globals.h"
|
||||
#include "GPU/GPUInterface.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/Common/TextureScaler.h"
|
||||
#include "GPU/GLES/TextureScaler.h"
|
||||
#include "GPU/Common/TextureCacheCommon.h"
|
||||
|
||||
struct VirtualFramebuffer;
|
||||
|
|
|
@ -21,20 +21,18 @@
|
|||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
#include "GPU/Common/TextureScaler.h"
|
||||
#include "GPU/GLES/TextureScaler.h"
|
||||
|
||||
#include "Core/Config.h"
|
||||
#include "Common/Common.h"
|
||||
#include "Common/ColorConv.h"
|
||||
#include "Common/Log.h"
|
||||
#include "Common/MsgHandler.h"
|
||||
#include "Common/CommonFuncs.h"
|
||||
#include "Common/ThreadPools.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "ext/xbrz/xbrz.h"
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#if _M_SSE >= 0x402
|
||||
#include <nmmintrin.h>
|
||||
|
@ -47,11 +45,53 @@
|
|||
#include "native/base/timeutil.h"
|
||||
#endif
|
||||
|
||||
// Helper Functions (mostly math for parallelization)
|
||||
/////////////////////////////////////// Helper Functions (mostly math for parallelization)
|
||||
|
||||
namespace {
|
||||
//////////////////////////////////////////////////////////////////// Color space conversion
|
||||
|
||||
// Various image processing
|
||||
// convert 4444 image to 8888, parallelizable
|
||||
void convert4444(u16* data, u32* out, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = ((val>>12) & 0xF) * 17;
|
||||
u32 g = ((val>> 8) & 0xF) * 17;
|
||||
u32 b = ((val>> 4) & 0xF) * 17;
|
||||
u32 a = ((val>> 0) & 0xF) * 17;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 565 image to 8888, parallelizable
|
||||
void convert565(u16* data, u32* out, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val>>11) & 0x1F);
|
||||
u32 g = Convert6To8((val>> 5) & 0x3F);
|
||||
u32 b = Convert5To8((val ) & 0x1F);
|
||||
out[y*width + x] = (0xFF << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// convert 5551 image to 8888, parallelizable
|
||||
void convert5551(u16* data, u32* out, int width, int l, int u) {
|
||||
for(int y = l; y < u; ++y) {
|
||||
for(int x = 0; x < width; ++x) {
|
||||
u32 val = data[y*width + x];
|
||||
u32 r = Convert5To8((val>>11) & 0x1F);
|
||||
u32 g = Convert5To8((val>> 6) & 0x1F);
|
||||
u32 b = Convert5To8((val>> 1) & 0x1F);
|
||||
u32 a = (val & 0x1) * 255;
|
||||
out[y*width + x] = (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////// Various image processing
|
||||
|
||||
#define R(_col) ((_col>> 0)&0xFF)
|
||||
#define G(_col) ((_col>> 8)&0xFF)
|
||||
|
@ -174,8 +214,7 @@ namespace {
|
|||
out[y*width + x] += 400; // assume distance at borders, usually makes for better result
|
||||
continue;
|
||||
}
|
||||
u32 d = data[yy*width + xx];
|
||||
out[y*width + x] += DISTANCE(d, center);
|
||||
out[y*width + x] += DISTANCE(data[yy*width + xx], center);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -500,7 +539,7 @@ bool TextureScaler::IsEmptyOrFlat(u32* data, int pixels, GLenum fmt) {
|
|||
return true;
|
||||
}
|
||||
|
||||
void TextureScaler::Scale(u32* &data, GEBufferFormat &dstFmt, int &width, int &height, int factor) {
|
||||
void TextureScaler::Scale(u32* &data, GLenum &dstFmt, int &width, int &height, int factor) {
|
||||
// prevent processing empty or flat textures (this happens a lot in some games)
|
||||
// doesn't hurt the standard case, will be very quick for textures with actual texture
|
||||
if(IsEmptyOrFlat(data, width*height, dstFmt)) {
|
||||
|
@ -547,7 +586,7 @@ void TextureScaler::Scale(u32* &data, GEBufferFormat &dstFmt, int &width, int &h
|
|||
|
||||
// update values accordingly
|
||||
data = outputBuf;
|
||||
dstFmt = GE_FORMAT_8888;
|
||||
dstFmt = GL_UNSIGNED_BYTE;
|
||||
width *= factor;
|
||||
height *= factor;
|
||||
|
||||
|
@ -618,39 +657,21 @@ void TextureScaler::DePosterize(u32* source, u32* dest, int width, int height) {
|
|||
GlobalThreadPool::Loop(std::bind(&deposterizeV, bufTmp3.data(), dest, width, height, placeholder::_1, placeholder::_2), 0, height);
|
||||
}
|
||||
|
||||
static void convert4444(u16* data, u32* out, int width, int l, int u) {
|
||||
for (int y = l; y < u; ++y) {
|
||||
ConvertRGBA4444ToRGBA8888(out + y * width, data + y * width, width);
|
||||
}
|
||||
}
|
||||
|
||||
static void convert565(u16* data, u32* out, int width, int l, int u) {
|
||||
for (int y = l; y < u; ++y) {
|
||||
ConvertRGB565ToRGBA888F(out + y * width, data + y * width, width);
|
||||
}
|
||||
}
|
||||
|
||||
static void convert5551(u16* data, u32* out, int width, int l, int u) {
|
||||
for (int y = l; y < u; ++y) {
|
||||
ConvertRGBA5551ToRGBA8888(out + y * width, data + y * width, width);
|
||||
}
|
||||
}
|
||||
|
||||
void TextureScaler::ConvertTo8888(GEBufferFormat format, u32* source, u32* &dest, int width, int height) {
|
||||
void TextureScaler::ConvertTo8888(GLenum format, u32* source, u32* &dest, int width, int height) {
|
||||
switch(format) {
|
||||
case GE_FORMAT_8888:
|
||||
case GL_UNSIGNED_BYTE:
|
||||
dest = source; // already fine
|
||||
break;
|
||||
|
||||
case GE_FORMAT_4444:
|
||||
case GL_UNSIGNED_SHORT_4_4_4_4:
|
||||
GlobalThreadPool::Loop(std::bind(&convert4444, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
break;
|
||||
|
||||
case GE_FORMAT_565:
|
||||
case GL_UNSIGNED_SHORT_5_6_5:
|
||||
GlobalThreadPool::Loop(std::bind(&convert565, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
break;
|
||||
|
||||
case GE_FORMAT_5551:
|
||||
case GL_UNSIGNED_SHORT_5_5_5_1:
|
||||
GlobalThreadPool::Loop(std::bind(&convert5551, (u16*)source, dest, width, placeholder::_1, placeholder::_2), 0, height);
|
||||
break;
|
||||
|
|
@ -20,15 +20,15 @@
|
|||
#include "Common/MemoryUtil.h"
|
||||
#include "../Globals.h"
|
||||
#include "gfx/gl_common.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
||||
class TextureScaler {
|
||||
public:
|
||||
TextureScaler();
|
||||
|
||||
void Scale(u32* &data, GEBufferFormat &dstfmt, int &width, int &height, int factor);
|
||||
void Scale(u32* &data, GLenum &dstfmt, int &width, int &height, int factor);
|
||||
|
||||
enum { XBRZ= 0, HYBRID = 1, BICUBIC = 2, HYBRID_BICUBIC = 3 };
|
||||
|
||||
|
@ -38,7 +38,7 @@ private:
|
|||
void ScaleBicubicBSpline(int factor, u32* source, u32* dest, int width, int height);
|
||||
void ScaleBicubicMitchell(int factor, u32* source, u32* dest, int width, int height);
|
||||
void ScaleHybrid(int factor, u32* source, u32* dest, int width, int height, bool bicubic = false);
|
||||
void ConvertTo8888(GEBufferFormat format, u32* source, u32* &dest, int width, int height);
|
||||
void ConvertTo8888(GLenum format, u32* source, u32* &dest, int width, int height);
|
||||
|
||||
void DePosterize(u32* source, u32* dest, int width, int height);
|
||||
|
|
@ -191,7 +191,6 @@
|
|||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Common\TextureCacheCommon.h" />
|
||||
<ClInclude Include="Common\TextureScaler.h" />
|
||||
<ClInclude Include="Common\TransformCommon.h" />
|
||||
<ClInclude Include="Common\VertexDecoderCommon.h" />
|
||||
<ClInclude Include="Debugger\Breakpoints.h" />
|
||||
|
@ -205,6 +204,7 @@
|
|||
<ClInclude Include="Directx9\ShaderManagerDX9.h" />
|
||||
<ClInclude Include="Directx9\StateMappingDX9.h" />
|
||||
<ClInclude Include="Directx9\TextureCacheDX9.h" />
|
||||
<ClInclude Include="Directx9\TextureScalerDX9.h" />
|
||||
<ClInclude Include="Directx9\TransformPipelineDX9.h" />
|
||||
<ClInclude Include="Directx9\VertexShaderGeneratorDX9.h" />
|
||||
<ClInclude Include="ge_constants.h" />
|
||||
|
@ -217,6 +217,7 @@
|
|||
<ClInclude Include="GLES\ShaderManager.h" />
|
||||
<ClInclude Include="GLES\StateMapping.h" />
|
||||
<ClInclude Include="GLES\TextureCache.h" />
|
||||
<ClInclude Include="GLES\TextureScaler.h" />
|
||||
<ClInclude Include="GLES\TransformPipeline.h" />
|
||||
<ClInclude Include="GLES\VertexShaderGenerator.h" />
|
||||
<ClInclude Include="GPUCommon.h" />
|
||||
|
@ -225,6 +226,7 @@
|
|||
<ClInclude Include="Math3D.h" />
|
||||
<ClInclude Include="Null\NullGpu.h" />
|
||||
<ClInclude Include="Software\Clipper.h" />
|
||||
<ClInclude Include="Software\Colors.h" />
|
||||
<ClInclude Include="Software\Lighting.h" />
|
||||
<ClInclude Include="Software\Rasterizer.h" />
|
||||
<ClInclude Include="Software\SoftGpu.h" />
|
||||
|
@ -245,7 +247,6 @@
|
|||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Common\TextureCacheCommon.cpp" />
|
||||
<ClCompile Include="Common\TextureScaler.cpp" />
|
||||
<ClCompile Include="Common\TransformCommon.cpp" />
|
||||
<ClCompile Include="Common\SoftwareTransformCommon.cpp" />
|
||||
<ClCompile Include="Common\VertexDecoderArm.cpp">
|
||||
|
@ -269,6 +270,7 @@
|
|||
<ClCompile Include="Directx9\StateMappingDX9.cpp" />
|
||||
<ClCompile Include="Directx9\StencilBufferDX9.cpp" />
|
||||
<ClCompile Include="Directx9\TextureCacheDX9.cpp" />
|
||||
<ClCompile Include="Directx9\TextureScalerDX9.cpp" />
|
||||
<ClCompile Include="Directx9\TransformPipelineDX9.cpp" />
|
||||
<ClCompile Include="Directx9\VertexShaderGeneratorDX9.cpp" />
|
||||
<ClCompile Include="GeDisasm.cpp" />
|
||||
|
@ -282,6 +284,7 @@
|
|||
<ClCompile Include="GLES\StateMapping.cpp" />
|
||||
<ClCompile Include="GLES\StencilBuffer.cpp" />
|
||||
<ClCompile Include="GLES\TextureCache.cpp" />
|
||||
<ClCompile Include="GLES\TextureScaler.cpp" />
|
||||
<ClCompile Include="GLES\TransformPipeline.cpp" />
|
||||
<ClCompile Include="GLES\VertexShaderGenerator.cpp" />
|
||||
<ClCompile Include="GPUCommon.cpp" />
|
||||
|
|
|
@ -42,8 +42,8 @@
|
|||
<ClInclude Include="GPUCommon.h">
|
||||
<Filter>Common</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Common\TextureScaler.h">
|
||||
<Filter>Common</Filter>
|
||||
<ClInclude Include="Software\Colors.h">
|
||||
<Filter>Software</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Software\Clipper.h">
|
||||
<Filter>Software</Filter>
|
||||
|
@ -72,6 +72,9 @@
|
|||
<ClInclude Include="GLES\TextureCache.h">
|
||||
<Filter>GLES</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GLES\TextureScaler.h">
|
||||
<Filter>GLES</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GLES\TransformPipeline.h">
|
||||
<Filter>GLES</Filter>
|
||||
</ClInclude>
|
||||
|
@ -93,6 +96,9 @@
|
|||
<ClInclude Include="Directx9\TransformPipelineDX9.h">
|
||||
<Filter>DirectX9</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Directx9\TextureScalerDX9.h">
|
||||
<Filter>DirectX9</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="Directx9\TextureCacheDX9.h">
|
||||
<Filter>DirectX9</Filter>
|
||||
</ClInclude>
|
||||
|
@ -251,8 +257,11 @@
|
|||
<ClCompile Include="Directx9\PixelShaderGeneratorDX9.cpp">
|
||||
<Filter>DirectX9</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Common\TextureScaler.cpp">
|
||||
<Filter>Common</Filter>
|
||||
<ClCompile Include="Directx9\TextureScalerDX9.cpp">
|
||||
<Filter>DirectX9</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GLES\TextureScaler.cpp">
|
||||
<Filter>GLES</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="Common\IndexGenerator.cpp">
|
||||
<Filter>Common</Filter>
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
// Copyright (C) 2015 PPSSPP Project.
|
||||
// Copyright (c) 2013- PPSSPP Project.
|
||||
|
||||
// This program is free software: you can redistribute it and/or modify
|
||||
// it under the terms of the GNU General Public License as published by
|
||||
|
@ -15,27 +15,12 @@
|
|||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "CommonTypes.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
|
||||
inline u8 Convert4To8(u8 v) {
|
||||
// Swizzle bits: 00001234 -> 12341234
|
||||
return (v << 4) | (v);
|
||||
}
|
||||
|
||||
inline u8 Convert5To8(u8 v) {
|
||||
// Swizzle bits: 00012345 -> 12345123
|
||||
return (v << 3) | (v >> 2);
|
||||
}
|
||||
|
||||
inline u8 Convert6To8(u8 v) {
|
||||
// Swizzle bits: 00123456 -> 12345612
|
||||
return (v << 2) | (v >> 4);
|
||||
}
|
||||
|
||||
inline u32 DecodeRGBA4444(u16 src) {
|
||||
static inline u32 DecodeRGBA4444(u16 src)
|
||||
{
|
||||
const u32 r = (src & 0x000F) << 0;
|
||||
const u32 g = (src & 0x00F0) << 4;
|
||||
const u32 b = (src & 0x0F00) << 8;
|
||||
|
@ -45,7 +30,8 @@ inline u32 DecodeRGBA4444(u16 src) {
|
|||
return c | (c << 4);
|
||||
}
|
||||
|
||||
inline u32 DecodeRGBA5551(u16 src) {
|
||||
static inline u32 DecodeRGBA5551(u16 src)
|
||||
{
|
||||
u8 r = Convert5To8((src >> 0) & 0x1F);
|
||||
u8 g = Convert5To8((src >> 5) & 0x1F);
|
||||
u8 b = Convert5To8((src >> 10) & 0x1F);
|
||||
|
@ -54,7 +40,8 @@ inline u32 DecodeRGBA5551(u16 src) {
|
|||
return (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
|
||||
inline u32 DecodeRGB565(u16 src) {
|
||||
static inline u32 DecodeRGB565(u16 src)
|
||||
{
|
||||
u8 r = Convert5To8((src >> 0) & 0x1F);
|
||||
u8 g = Convert6To8((src >> 5) & 0x3F);
|
||||
u8 b = Convert5To8((src >> 11) & 0x1F);
|
||||
|
@ -62,7 +49,8 @@ inline u32 DecodeRGB565(u16 src) {
|
|||
return (a << 24) | (b << 16) | (g << 8) | r;
|
||||
}
|
||||
|
||||
inline u32 DecodeRGBA8888(u32 src) {
|
||||
static inline u32 DecodeRGBA8888(u32 src)
|
||||
{
|
||||
#if 1
|
||||
return src;
|
||||
#else
|
||||
|
@ -75,7 +63,8 @@ inline u32 DecodeRGBA8888(u32 src) {
|
|||
#endif
|
||||
}
|
||||
|
||||
inline u16 RGBA8888To565(u32 value) {
|
||||
static inline u16 RGBA8888To565(u32 value)
|
||||
{
|
||||
u8 r = value & 0xFF;
|
||||
u8 g = (value >> 8) & 0xFF;
|
||||
u8 b = (value >> 16) & 0xFF;
|
||||
|
@ -85,7 +74,8 @@ inline u16 RGBA8888To565(u32 value) {
|
|||
return (u16)r | ((u16)g << 5) | ((u16)b << 11);
|
||||
}
|
||||
|
||||
inline u16 RGBA8888To5551(u32 value) {
|
||||
static inline u16 RGBA8888To5551(u32 value)
|
||||
{
|
||||
u8 r = value & 0xFF;
|
||||
u8 g = (value >> 8) & 0xFF;
|
||||
u8 b = (value >> 16) & 0xFF;
|
||||
|
@ -97,24 +87,12 @@ inline u16 RGBA8888To5551(u32 value) {
|
|||
return (u16)r | ((u16)g << 5) | ((u16)b << 10) | ((u16)a << 15);
|
||||
}
|
||||
|
||||
static inline u16 RGBA8888To4444(u32 value) {
|
||||
static inline u16 RGBA8888To4444(u32 value)
|
||||
{
|
||||
const u32 c = value >> 4;
|
||||
const u16 r = (c >> 0) & 0x000F;
|
||||
const u16 g = (c >> 4) & 0x00F0;
|
||||
const u16 b = (c >> 8) & 0x0F00;
|
||||
const u16 r = (c >> 0) & 0x000F;
|
||||
const u16 g = (c >> 4) & 0x00F0;
|
||||
const u16 b = (c >> 8) & 0x0F00;
|
||||
const u16 a = (c >> 12) & 0xF000;
|
||||
return r | g | b | a;
|
||||
}
|
||||
|
||||
void ConvertBGRA8888ToRGB565(u16 *dst, const u32 *src, int numPixels);
|
||||
void ConvertRGBA8888ToRGB565(u16 *dst, const u32 *src, int numPixels);
|
||||
void ConvertBGRA8888ToRGBA4444(u16 *dst, const u32 *src, int numPixels);
|
||||
void ConvertRGBA8888ToRGBA4444(u16 *dst, const u32 *src, int numPixels);
|
||||
void ConvertRGBA8888ToRGBA5551(u16 *dst, const u32 *src, int numPixels);
|
||||
void ConvertBGRA8888ToRGBA5551(u16 *dst, const u32 *src, int numPixels);
|
||||
|
||||
void ConvertRGB565ToRGBA888F(u32 *dst, const u16 *src, int numPixels);
|
||||
void ConvertRGBA5551ToRGBA8888(u32 *dst, const u16 *src, int numPixels);
|
||||
void ConvertRGBA4444ToRGBA8888(u32 *dst, const u16 *src, int numPixels);
|
||||
|
||||
void ConvertBGRA8888ToRGBA8888(u32 *dst, const u32 *src, int numPixels);
|
|
@ -17,7 +17,6 @@
|
|||
|
||||
#include "base/basictypes.h"
|
||||
|
||||
#include "Common/ColorConv.h"
|
||||
#include "Common/ThreadPools.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/MemMap.h"
|
||||
|
@ -27,6 +26,7 @@
|
|||
#include "GPU/Common/TextureDecoder.h"
|
||||
#include "GPU/Software/SoftGpu.h"
|
||||
#include "GPU/Software/Rasterizer.h"
|
||||
#include "GPU/Software/Colors.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
|
||||
#include "Common/ColorConv.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "GPU/ge_constants.h"
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
|
@ -33,6 +32,7 @@
|
|||
|
||||
#include "GPU/Software/SoftGpu.h"
|
||||
#include "GPU/Software/TransformUnit.h"
|
||||
#include "GPU/Software/Colors.h"
|
||||
#include "GPU/Software/Rasterizer.h"
|
||||
|
||||
static GLuint temp_texture = 0;
|
||||
|
@ -181,7 +181,8 @@ void SoftGPU::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat for
|
|||
}
|
||||
|
||||
// Copies RGBA8 data from RAM to the currently bound render target.
|
||||
void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
|
||||
void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight)
|
||||
{
|
||||
float dstwidth = (float)PSP_CoreParameter().pixelWidth;
|
||||
float dstheight = (float)PSP_CoreParameter().pixelHeight;
|
||||
|
||||
|
@ -212,15 +213,21 @@ void SoftGPU::CopyToCurrentFboFromDisplayRam(int srcwidth, int srcheight) {
|
|||
|
||||
switch (displayFormat_) {
|
||||
case GE_FORMAT_565:
|
||||
ConvertRGB565ToRGBA888F(buf_line, fb_line, srcwidth);
|
||||
for (int x = 0; x < srcwidth; ++x) {
|
||||
buf_line[x] = DecodeRGB565(fb_line[x]);
|
||||
}
|
||||
break;
|
||||
|
||||
case GE_FORMAT_5551:
|
||||
ConvertRGBA5551ToRGBA8888(buf_line, fb_line, srcwidth);
|
||||
for (int x = 0; x < srcwidth; ++x) {
|
||||
buf_line[x] = DecodeRGBA5551(fb_line[x]);
|
||||
}
|
||||
break;
|
||||
|
||||
case GE_FORMAT_4444:
|
||||
ConvertRGBA4444ToRGBA8888(buf_line, fb_line, srcwidth);
|
||||
for (int x = 0; x < srcwidth; ++x) {
|
||||
buf_line[x] = DecodeRGBA4444(fb_line[x]);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
|
|
18
Globals.h
18
Globals.h
|
@ -30,6 +30,24 @@
|
|||
#define IS_LITTLE_ENDIAN (*(const u16 *)"\0\xff" >= 0x100)
|
||||
#define IS_BIG_ENDIAN (*(const u16 *)"\0\xff" < 0x100)
|
||||
|
||||
inline u8 Convert4To8(u8 v)
|
||||
{
|
||||
// Swizzle bits: 00001234 -> 12341234
|
||||
return (v << 4) | (v);
|
||||
}
|
||||
|
||||
inline u8 Convert5To8(u8 v)
|
||||
{
|
||||
// Swizzle bits: 00012345 -> 12345123
|
||||
return (v << 3) | (v >> 2);
|
||||
}
|
||||
|
||||
inline u8 Convert6To8(u8 v)
|
||||
{
|
||||
// Swizzle bits: 00123456 -> 12345612
|
||||
return (v << 2) | (v >> 4);
|
||||
}
|
||||
|
||||
static inline u8 clamp_u8(int i) {
|
||||
#ifdef ARM
|
||||
asm("usat %0, #8, %1" : "=r"(i) : "r"(i));
|
||||
|
|
|
@ -45,7 +45,6 @@ win32 {
|
|||
|
||||
SOURCES += $$P/Common/ChunkFile.cpp \
|
||||
$$P/Common/ConsoleListener.cpp \
|
||||
$$P/Common/ColorConv.cpp \
|
||||
$$P/Common/FileUtil.cpp \
|
||||
$$P/Common/LogManager.cpp \
|
||||
$$P/Common/KeyMap.cpp \
|
||||
|
@ -58,7 +57,6 @@ SOURCES += $$P/Common/ChunkFile.cpp \
|
|||
$$P/Common/Crypto/*.cpp
|
||||
HEADERS += $$P/Common/ChunkFile.h \
|
||||
$$P/Common/ConsoleListener.h \
|
||||
$$P/Common/ColorConv.h \
|
||||
$$P/Common/FileUtil.h \
|
||||
$$P/Common/LogManager.h \
|
||||
$$P/Common/KeyMap.h \
|
||||
|
|
|
@ -36,6 +36,7 @@ SOURCES += $$P/GPU/GeDisasm.cpp \ # GPU
|
|||
$$P/GPU/GLES/StateMapping.cpp \
|
||||
$$P/GPU/GLES/StencilBuffer.cpp \
|
||||
$$P/GPU/GLES/TextureCache.cpp \
|
||||
$$P/GPU/GLES/TextureScaler.cpp \
|
||||
$$P/GPU/GLES/TransformPipeline.cpp \
|
||||
$$P/GPU/GLES/VertexShaderGenerator.cpp \
|
||||
$$P/GPU/Software/*.cpp \
|
||||
|
@ -43,7 +44,6 @@ SOURCES += $$P/GPU/GeDisasm.cpp \ # GPU
|
|||
$$P/GPU/Common/IndexGenerator.cpp \
|
||||
$$P/GPU/Common/TextureDecoder.cpp \
|
||||
$$P/GPU/Common/VertexDecoderCommon.cpp \
|
||||
$$P/GPU/Common/TextureScaler.cpp \
|
||||
$$P/GPU/Common/TextureCacheCommon.cpp \
|
||||
$$P/GPU/Common/TransformCommon.cpp \
|
||||
$$P/GPU/Common/SoftwareTransformCommon.cpp \
|
||||
|
|
|
@ -68,7 +68,7 @@
|
|||
#include "GPU/GPUInterface.h"
|
||||
#include "GPU/GPUState.h"
|
||||
#include "gfx_es2/gpu_features.h"
|
||||
#include "GPU/Common/TextureScaler.h"
|
||||
#include "GPU/GLES/TextureScaler.h"
|
||||
#include "GPU/GLES/TextureCache.h"
|
||||
#include "GPU/GLES/Framebuffer.h"
|
||||
#include "ControlMapping.h"
|
||||
|
|
|
@ -129,7 +129,6 @@ EXEC_AND_LIB_FILES := \
|
|||
$(SRC)/ext/udis86/udis86.c \
|
||||
$(SRC)/ext/xbrz/xbrz.cpp \
|
||||
$(SRC)/ext/xxhash.c \
|
||||
$(SRC)/Common/ColorConv.cpp \
|
||||
$(SRC)/Common/Crypto/md5.cpp \
|
||||
$(SRC)/Common/Crypto/sha1.cpp \
|
||||
$(SRC)/Common/Crypto/sha256.cpp \
|
||||
|
@ -153,7 +152,6 @@ EXEC_AND_LIB_FILES := \
|
|||
$(SRC)/GPU/Common/SoftwareTransformCommon.cpp.arm \
|
||||
$(SRC)/GPU/Common/VertexDecoderCommon.cpp.arm \
|
||||
$(SRC)/GPU/Common/TextureCacheCommon.cpp.arm \
|
||||
$(SRC)/GPU/Common/TextureScaler.cpp.arm \
|
||||
$(SRC)/GPU/Common/SplineCommon.cpp.arm \
|
||||
$(SRC)/GPU/Common/DrawEngineCommon.cpp.arm \
|
||||
$(SRC)/GPU/Common/TransformCommon.cpp.arm \
|
||||
|
@ -172,6 +170,7 @@ EXEC_AND_LIB_FILES := \
|
|||
$(SRC)/GPU/GLES/VertexShaderGenerator.cpp.arm \
|
||||
$(SRC)/GPU/GLES/FragmentShaderGenerator.cpp.arm \
|
||||
$(SRC)/GPU/GLES/FragmentTestCache.cpp.arm \
|
||||
$(SRC)/GPU/GLES/TextureScaler.cpp \
|
||||
$(SRC)/GPU/GLES/Spline.cpp \
|
||||
$(SRC)/GPU/Null/NullGpu.cpp \
|
||||
$(SRC)/GPU/Software/Clipper.cpp \
|
||||
|
|
|
@ -18,7 +18,6 @@
|
|||
#include "headless/Compare.h"
|
||||
#include "file/file_util.h"
|
||||
#include "Core/Host.h"
|
||||
#include "Common/ColorConv.h"
|
||||
#include "GPU/Common/GPUDebugInterface.h"
|
||||
#include "GPU/Common/TextureDecoder.h"
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue