softgpu: Use cached sampler state outside jit.
This commit is contained in:
parent
a2abf9402b
commit
a228b2ab6c
5 changed files with 78 additions and 76 deletions
|
@ -39,9 +39,9 @@ extern u32 clut[4096];
|
|||
|
||||
namespace Sampler {
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleFetch(int u, int v, const u8 *tptr, int bufw, int level);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleFetch(int u, int v, const u8 *tptr, int bufw, int level, const SamplerID &samplerID);
|
||||
|
||||
std::mutex jitCacheLock;
|
||||
SamplerJitCache *jitCache = nullptr;
|
||||
|
@ -242,10 +242,9 @@ FetchFunc SamplerJitCache::GetFetch(const SamplerID &id) {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
template <unsigned int texel_size_bits>
|
||||
static inline int GetPixelDataOffset(unsigned int row_pitch_pixels, unsigned int u, unsigned int v)
|
||||
{
|
||||
if (!gstate.isTextureSwizzled())
|
||||
template <uint32_t texel_size_bits>
|
||||
static inline int GetPixelDataOffset(uint32_t row_pitch_pixels, uint32_t u, uint32_t v, bool swizzled) {
|
||||
if (!swizzled)
|
||||
return (v * (row_pitch_pixels * texel_size_bits >> 3)) + (u * texel_size_bits >> 3);
|
||||
|
||||
const int tile_size_bits = 32;
|
||||
|
@ -263,12 +262,10 @@ static inline int GetPixelDataOffset(unsigned int row_pitch_pixels, unsigned int
|
|||
return tile_idx * (tile_size_bits / 8) + ((u % texels_per_tile) * texel_size_bits) / 8;
|
||||
}
|
||||
|
||||
static inline u32 LookupColor(unsigned int index, unsigned int level)
|
||||
{
|
||||
const bool mipmapShareClut = gstate.isClutSharedForMipmaps();
|
||||
const int clutSharingOffset = mipmapShareClut ? 0 : level * 16;
|
||||
static inline u32 LookupColor(unsigned int index, unsigned int level, const SamplerID &samplerID) {
|
||||
const int clutSharingOffset = samplerID.useSharedClut ? 0 : level * 16;
|
||||
|
||||
switch (gstate.getClutPaletteFormat()) {
|
||||
switch (samplerID.ClutFmt()) {
|
||||
case GE_CMODE_16BIT_BGR5650:
|
||||
return RGB565ToRGBA8888(reinterpret_cast<u16*>(clut)[index + clutSharingOffset]);
|
||||
|
||||
|
@ -282,11 +279,15 @@ static inline u32 LookupColor(unsigned int index, unsigned int level)
|
|||
return clut[index + clutSharingOffset];
|
||||
|
||||
default:
|
||||
ERROR_LOG_REPORT(G3D, "Software: Unsupported palette format: %x", gstate.getClutPaletteFormat());
|
||||
ERROR_LOG_REPORT(G3D, "Software: Unsupported palette format: %x", samplerID.ClutFmt());
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t TransformClutIndex(uint32_t index, const SamplerID &samplerID) {
|
||||
return gstate.transformClutIndex(index);
|
||||
}
|
||||
|
||||
struct Nearest4 {
|
||||
alignas(16) u32 v[4];
|
||||
|
||||
|
@ -296,76 +297,74 @@ struct Nearest4 {
|
|||
};
|
||||
|
||||
template <int N>
|
||||
inline static Nearest4 SOFTRAST_CALL SampleNearest(const int u[N], const int v[N], const u8 *srcptr, int texbufw, int level) {
|
||||
inline static Nearest4 SOFTRAST_CALL SampleNearest(const int u[N], const int v[N], const u8 *srcptr, int texbufw, int level, const SamplerID &samplerID) {
|
||||
Nearest4 res;
|
||||
if (!srcptr) {
|
||||
memset(res.v, 0, sizeof(res.v));
|
||||
return res;
|
||||
}
|
||||
|
||||
GETextureFormat texfmt = gstate.getTextureFormat();
|
||||
|
||||
// TODO: Should probably check if textures are aligned properly...
|
||||
|
||||
switch (texfmt) {
|
||||
switch (samplerID.TexFmt()) {
|
||||
case GE_TFMT_4444:
|
||||
for (int i = 0; i < N; ++i) {
|
||||
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i]);
|
||||
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i], samplerID.swizzle);
|
||||
res.v[i] = RGBA4444ToRGBA8888(*(const u16 *)src);
|
||||
}
|
||||
return res;
|
||||
|
||||
case GE_TFMT_5551:
|
||||
for (int i = 0; i < N; ++i) {
|
||||
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i]);
|
||||
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i], samplerID.swizzle);
|
||||
res.v[i] = RGBA5551ToRGBA8888(*(const u16 *)src);
|
||||
}
|
||||
return res;
|
||||
|
||||
case GE_TFMT_5650:
|
||||
for (int i = 0; i < N; ++i) {
|
||||
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i]);
|
||||
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i], samplerID.swizzle);
|
||||
res.v[i] = RGB565ToRGBA8888(*(const u16 *)src);
|
||||
}
|
||||
return res;
|
||||
|
||||
case GE_TFMT_8888:
|
||||
for (int i = 0; i < N; ++i) {
|
||||
const u8 *src = srcptr + GetPixelDataOffset<32>(texbufw, u[i], v[i]);
|
||||
const u8 *src = srcptr + GetPixelDataOffset<32>(texbufw, u[i], v[i], samplerID.swizzle);
|
||||
res.v[i] = *(const u32 *)src;
|
||||
}
|
||||
return res;
|
||||
|
||||
case GE_TFMT_CLUT32:
|
||||
for (int i = 0; i < N; ++i) {
|
||||
const u8 *src = srcptr + GetPixelDataOffset<32>(texbufw, u[i], v[i]);
|
||||
const u8 *src = srcptr + GetPixelDataOffset<32>(texbufw, u[i], v[i], samplerID.swizzle);
|
||||
u32 val = src[0] + (src[1] << 8) + (src[2] << 16) + (src[3] << 24);
|
||||
res.v[i] = LookupColor(gstate.transformClutIndex(val), 0);
|
||||
res.v[i] = LookupColor(TransformClutIndex(val, samplerID), 0, samplerID);
|
||||
}
|
||||
return res;
|
||||
|
||||
case GE_TFMT_CLUT16:
|
||||
for (int i = 0; i < N; ++i) {
|
||||
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i]);
|
||||
const u8 *src = srcptr + GetPixelDataOffset<16>(texbufw, u[i], v[i], samplerID.swizzle);
|
||||
u16 val = src[0] + (src[1] << 8);
|
||||
res.v[i] = LookupColor(gstate.transformClutIndex(val), 0);
|
||||
res.v[i] = LookupColor(TransformClutIndex(val, samplerID), 0, samplerID);
|
||||
}
|
||||
return res;
|
||||
|
||||
case GE_TFMT_CLUT8:
|
||||
for (int i = 0; i < N; ++i) {
|
||||
const u8 *src = srcptr + GetPixelDataOffset<8>(texbufw, u[i], v[i]);
|
||||
const u8 *src = srcptr + GetPixelDataOffset<8>(texbufw, u[i], v[i], samplerID.swizzle);
|
||||
u8 val = *src;
|
||||
res.v[i] = LookupColor(gstate.transformClutIndex(val), 0);
|
||||
res.v[i] = LookupColor(TransformClutIndex(val, samplerID), 0, samplerID);
|
||||
}
|
||||
return res;
|
||||
|
||||
case GE_TFMT_CLUT4:
|
||||
for (int i = 0; i < N; ++i) {
|
||||
const u8 *src = srcptr + GetPixelDataOffset<4>(texbufw, u[i], v[i]);
|
||||
const u8 *src = srcptr + GetPixelDataOffset<4>(texbufw, u[i], v[i], samplerID.swizzle);
|
||||
u8 val = (u[i] & 1) ? (src[0] >> 4) : (src[0] & 0xF);
|
||||
// Only CLUT4 uses separate mipmap palettes.
|
||||
res.v[i] = LookupColor(gstate.transformClutIndex(val), level);
|
||||
res.v[i] = LookupColor(TransformClutIndex(val, samplerID), level, samplerID);
|
||||
}
|
||||
return res;
|
||||
|
||||
|
@ -391,7 +390,7 @@ inline static Nearest4 SOFTRAST_CALL SampleNearest(const int u[N], const int v[N
|
|||
return res;
|
||||
|
||||
default:
|
||||
ERROR_LOG_REPORT(G3D, "Software: Unsupported texture format: %x", texfmt);
|
||||
ERROR_LOG_REPORT(G3D, "Software: Unsupported texture format: %x", samplerID.TexFmt());
|
||||
memset(res.v, 0, sizeof(res.v));
|
||||
return res;
|
||||
}
|
||||
|
@ -410,8 +409,8 @@ static inline int WrapUV(int v, int height) {
|
|||
}
|
||||
|
||||
template <int N>
|
||||
static inline void ApplyTexelClamp(int out_u[N], int out_v[N], const int u[N], const int v[N], int width, int height) {
|
||||
if (gstate.isTexCoordClampedS()) {
|
||||
static inline void ApplyTexelClamp(int out_u[N], int out_v[N], const int u[N], const int v[N], int width, int height, const SamplerID &samplerID) {
|
||||
if (samplerID.clampS) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
out_u[i] = ClampUV(u[i], width);
|
||||
}
|
||||
|
@ -420,7 +419,7 @@ static inline void ApplyTexelClamp(int out_u[N], int out_v[N], const int u[N], c
|
|||
out_u[i] = WrapUV(u[i], width);
|
||||
}
|
||||
}
|
||||
if (gstate.isTexCoordClampedT()) {
|
||||
if (samplerID.clampT) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
out_v[i] = ClampUV(v[i], height);
|
||||
}
|
||||
|
@ -431,7 +430,7 @@ static inline void ApplyTexelClamp(int out_u[N], int out_v[N], const int u[N], c
|
|||
}
|
||||
}
|
||||
|
||||
static inline void GetTexelCoordinates(int level, float s, float t, int &out_u, int &out_v, int x, int y) {
|
||||
static inline void GetTexelCoordinates(int level, float s, float t, int &out_u, int &out_v, int x, int y, const SamplerID &samplerID) {
|
||||
int width = gstate.getTextureWidth(level);
|
||||
int height = gstate.getTextureHeight(level);
|
||||
|
||||
|
@ -441,28 +440,28 @@ static inline void GetTexelCoordinates(int level, float s, float t, int &out_u,
|
|||
base_u >>= 8;
|
||||
base_v >>= 8;
|
||||
|
||||
ApplyTexelClamp<1>(&out_u, &out_v, &base_u, &base_v, width, height);
|
||||
ApplyTexelClamp<1>(&out_u, &out_v, &base_u, &base_v, width, height, samplerID);
|
||||
}
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac) {
|
||||
static Vec4IntResult SOFTRAST_CALL SampleNearest(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int level, int levelFrac, const SamplerID &samplerID) {
|
||||
int u, v;
|
||||
|
||||
// Nearest filtering only. Round texcoords.
|
||||
GetTexelCoordinates(level, s, t, u, v, x, y);
|
||||
Vec4<int> c0 = Vec4<int>::FromRGBA(SampleNearest<1>(&u, &v, tptr[0], bufw[0], level).v[0]);
|
||||
GetTexelCoordinates(level, s, t, u, v, x, y, samplerID);
|
||||
Vec4<int> c0 = Vec4<int>::FromRGBA(SampleNearest<1>(&u, &v, tptr[0], bufw[0], level, samplerID).v[0]);
|
||||
|
||||
if (levelFrac) {
|
||||
GetTexelCoordinates(level + 1, s, t, u, v, x, y);
|
||||
Vec4<int> c1 = Vec4<int>::FromRGBA(SampleNearest<1>(&u, &v, tptr[1], bufw[1], level + 1).v[0]);
|
||||
GetTexelCoordinates(level + 1, s, t, u, v, x, y, samplerID);
|
||||
Vec4<int> c1 = Vec4<int>::FromRGBA(SampleNearest<1>(&u, &v, tptr[1], bufw[1], level + 1, samplerID).v[0]);
|
||||
|
||||
c0 = (c1 * levelFrac + c0 * (16 - levelFrac)) / 16;
|
||||
}
|
||||
|
||||
return GetTextureFunctionOutput(prim_color, ToVec4IntArg(c0));
|
||||
return GetTextureFunctionOutput(prim_color, ToVec4IntArg(c0), samplerID);
|
||||
}
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleFetch(int u, int v, const u8 *tptr, int bufw, int level) {
|
||||
Nearest4 c = SampleNearest<1>(&u, &v, tptr, bufw, level);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleFetch(int u, int v, const u8 *tptr, int bufw, int level, const SamplerID &samplerID) {
|
||||
Nearest4 c = SampleNearest<1>(&u, &v, tptr, bufw, level, samplerID);
|
||||
return ToVec4IntResult(Vec4<int>::FromRGBA(c.v[0]));
|
||||
}
|
||||
|
||||
|
@ -518,7 +517,7 @@ static inline Vec4IntResult SOFTRAST_CALL ApplyTexelClampQuadT(bool clamp, int v
|
|||
#endif
|
||||
}
|
||||
|
||||
static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadS(int level, float in_s, int &frac_u, int x) {
|
||||
static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadS(int level, float in_s, int &frac_u, int x, const SamplerID &samplerID) {
|
||||
int width = gstate.getTextureWidth(level);
|
||||
|
||||
int base_u = (int)(in_s * width * 256) + 12 - x - 128;
|
||||
|
@ -526,10 +525,10 @@ static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadS(int level, fl
|
|||
base_u >>= 8;
|
||||
|
||||
// Need to generate and individually wrap/clamp the four sample coordinates. Ugh.
|
||||
return ApplyTexelClampQuadS(gstate.isTexCoordClampedS(), base_u, width);
|
||||
return ApplyTexelClampQuadS(samplerID.clampS, base_u, width);
|
||||
}
|
||||
|
||||
static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadT(int level, float in_t, int &frac_v, int y) {
|
||||
static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadT(int level, float in_t, int &frac_v, int y, const SamplerID &samplerID) {
|
||||
int height = gstate.getTextureHeight(level);
|
||||
|
||||
int base_v = (int)(in_t * height * 256) + 12 - y - 128;
|
||||
|
@ -537,14 +536,14 @@ static inline Vec4IntResult SOFTRAST_CALL GetTexelCoordinatesQuadT(int level, fl
|
|||
base_v >>= 8;
|
||||
|
||||
// Need to generate and individually wrap/clamp the four sample coordinates. Ugh.
|
||||
return ApplyTexelClampQuadT(gstate.isTexCoordClampedT(), base_v, height);
|
||||
return ApplyTexelClampQuadT(samplerID.clampT, base_v, height);
|
||||
}
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, int y, const u8 *const *tptr, const int *bufw, int texlevel) {
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, int y, const u8 *const *tptr, const int *bufw, int texlevel, const SamplerID &samplerID) {
|
||||
int frac_u, frac_v;
|
||||
const Vec4<int> u = GetTexelCoordinatesQuadS(texlevel, s, frac_u, x);
|
||||
const Vec4<int> v = GetTexelCoordinatesQuadT(texlevel, t, frac_v, y);
|
||||
Nearest4 c = SampleNearest<4>(u.AsArray(), v.AsArray(), tptr[0], bufw[0], texlevel);
|
||||
const Vec4<int> u = GetTexelCoordinatesQuadS(texlevel, s, frac_u, x, samplerID);
|
||||
const Vec4<int> v = GetTexelCoordinatesQuadT(texlevel, t, frac_v, y, samplerID);
|
||||
Nearest4 c = SampleNearest<4>(u.AsArray(), v.AsArray(), tptr[0], bufw[0], texlevel, samplerID);
|
||||
|
||||
Vec4<int> texcolor_tl = Vec4<int>::FromRGBA(c.v[0]);
|
||||
Vec4<int> texcolor_tr = Vec4<int>::FromRGBA(c.v[1]);
|
||||
|
@ -555,13 +554,13 @@ static Vec4IntResult SOFTRAST_CALL SampleLinearLevel(float s, float t, int x, in
|
|||
return ToVec4IntResult((top * (0x10 - frac_v) + bot * frac_v) / (16 * 16));
|
||||
}
|
||||
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int texlevel, int levelFrac) {
|
||||
Vec4<int> c0 = SampleLinearLevel(s, t, x, y, tptr, bufw, texlevel);
|
||||
static Vec4IntResult SOFTRAST_CALL SampleLinear(float s, float t, int x, int y, Vec4IntArg prim_color, const u8 *const *tptr, const int *bufw, int texlevel, int levelFrac, const SamplerID &samplerID) {
|
||||
Vec4<int> c0 = SampleLinearLevel(s, t, x, y, tptr, bufw, texlevel, samplerID);
|
||||
if (levelFrac) {
|
||||
const Vec4<int> c1 = SampleLinearLevel(s, t, x, y, tptr + 1, bufw + 1, texlevel + 1);
|
||||
const Vec4<int> c1 = SampleLinearLevel(s, t, x, y, tptr + 1, bufw + 1, texlevel + 1, samplerID);
|
||||
c0 = (c1 * levelFrac + c0 * (16 - levelFrac)) / 16;
|
||||
}
|
||||
return GetTextureFunctionOutput(prim_color, ToVec4IntArg(c0));
|
||||
return GetTextureFunctionOutput(prim_color, ToVec4IntArg(c0), samplerID);
|
||||
}
|
||||
|
||||
};
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue