softgpu: Simply 5551 blending fast path.
Since it only supports multiply and add, let's just stick with that.
This commit is contained in:
parent
1eeb4f0bcf
commit
c47d7eab38
4 changed files with 244 additions and 211 deletions
|
@ -270,206 +270,6 @@ static inline bool IsRightSideOrFlatBottomLine(const Vec2<int>& vertex, const Ve
|
|||
}
|
||||
}
|
||||
|
||||
static inline Vec3<int> GetSourceFactor(PixelBlendFactor factor, const Vec4<int> &source, const Vec4<int> &dst, uint32_t fix) {
|
||||
switch (factor) {
|
||||
case PixelBlendFactor::OTHERCOLOR:
|
||||
return dst.rgb();
|
||||
|
||||
case PixelBlendFactor::INVOTHERCOLOR:
|
||||
return Vec3<int>::AssignToAll(255) - dst.rgb();
|
||||
|
||||
case PixelBlendFactor::SRCALPHA:
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)));
|
||||
#else
|
||||
return Vec3<int>::AssignToAll(source.a());
|
||||
#endif
|
||||
|
||||
case PixelBlendFactor::INVSRCALPHA:
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))));
|
||||
#else
|
||||
return Vec3<int>::AssignToAll(255 - source.a());
|
||||
#endif
|
||||
|
||||
case PixelBlendFactor::DSTALPHA:
|
||||
return Vec3<int>::AssignToAll(dst.a());
|
||||
|
||||
case PixelBlendFactor::INVDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - dst.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLESRCALPHA:
|
||||
return Vec3<int>::AssignToAll(2 * source.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLEINVSRCALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - std::min(2 * source.a(), 255));
|
||||
|
||||
case PixelBlendFactor::DOUBLEDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(2 * dst.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLEINVDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - std::min(2 * dst.a(), 255));
|
||||
|
||||
case PixelBlendFactor::FIX:
|
||||
default:
|
||||
// All other dest factors (> 10) are treated as FIXA.
|
||||
return Vec3<int>::FromRGB(fix);
|
||||
|
||||
case PixelBlendFactor::ZERO:
|
||||
return Vec3<int>::AssignToAll(0);
|
||||
|
||||
case PixelBlendFactor::ONE:
|
||||
return Vec3<int>::AssignToAll(255);
|
||||
}
|
||||
}
|
||||
|
||||
static inline Vec3<int> GetDestFactor(PixelBlendFactor factor, const Vec4<int> &source, const Vec4<int> &dst, uint32_t fix) {
|
||||
switch (factor) {
|
||||
case PixelBlendFactor::OTHERCOLOR:
|
||||
return source.rgb();
|
||||
|
||||
case PixelBlendFactor::INVOTHERCOLOR:
|
||||
return Vec3<int>::AssignToAll(255) - source.rgb();
|
||||
|
||||
case PixelBlendFactor::SRCALPHA:
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(_mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3)));
|
||||
#else
|
||||
return Vec3<int>::AssignToAll(source.a());
|
||||
#endif
|
||||
|
||||
case PixelBlendFactor::INVSRCALPHA:
|
||||
#if defined(_M_SSE)
|
||||
return Vec3<int>(_mm_sub_epi32(_mm_set1_epi32(255), _mm_shuffle_epi32(source.ivec, _MM_SHUFFLE(3, 3, 3, 3))));
|
||||
#else
|
||||
return Vec3<int>::AssignToAll(255 - source.a());
|
||||
#endif
|
||||
|
||||
case PixelBlendFactor::DSTALPHA:
|
||||
return Vec3<int>::AssignToAll(dst.a());
|
||||
|
||||
case PixelBlendFactor::INVDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - dst.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLESRCALPHA:
|
||||
return Vec3<int>::AssignToAll(2 * source.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLEINVSRCALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - std::min(2 * source.a(), 255));
|
||||
|
||||
case PixelBlendFactor::DOUBLEDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(2 * dst.a());
|
||||
|
||||
case PixelBlendFactor::DOUBLEINVDSTALPHA:
|
||||
return Vec3<int>::AssignToAll(255 - std::min(2 * dst.a(), 255));
|
||||
|
||||
case PixelBlendFactor::FIX:
|
||||
default:
|
||||
// All other dest factors (> 10) are treated as FIXB.
|
||||
return Vec3<int>::FromRGB(fix);
|
||||
|
||||
case PixelBlendFactor::ZERO:
|
||||
return Vec3<int>::AssignToAll(0);
|
||||
|
||||
case PixelBlendFactor::ONE:
|
||||
return Vec3<int>::AssignToAll(255);
|
||||
}
|
||||
}
|
||||
|
||||
// Removed inline here - it was never chosen to be inlined by the compiler anyway, too complex.
|
||||
Vec3<int> AlphaBlendingResult(const PixelFuncID &pixelID, const Vec4<int> &source, const Vec4<int> &dst) {
|
||||
// Note: These factors cannot go below 0, but they can go above 255 when doubling.
|
||||
Vec3<int> srcfactor = GetSourceFactor(pixelID.AlphaBlendSrc(), source, dst, pixelID.cached.alphaBlendSrc);
|
||||
Vec3<int> dstfactor = GetDestFactor(pixelID.AlphaBlendDst(), source, dst, pixelID.cached.alphaBlendDst);
|
||||
|
||||
switch (pixelID.AlphaBlendEq()) {
|
||||
case GE_BLENDMODE_MUL_AND_ADD:
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
// We switch to 16 bit to use mulhi, and we use 4 bits of decimal to make the 16 bit shift free.
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
return Vec3<int>(_mm_unpacklo_epi16(_mm_adds_epi16(s, d), _mm_setzero_si128()));
|
||||
#else
|
||||
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
return lhs + rhs;
|
||||
#endif
|
||||
}
|
||||
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT:
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
return Vec3<int>(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(s, d), _mm_setzero_si128()), _mm_setzero_si128()));
|
||||
#else
|
||||
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
return lhs - rhs;
|
||||
#endif
|
||||
}
|
||||
|
||||
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
|
||||
{
|
||||
#if defined(_M_SSE)
|
||||
const __m128i half = _mm_set1_epi16(1 << 3);
|
||||
|
||||
const __m128i srgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(source.ivec, source.ivec), 4), half);
|
||||
const __m128i sf = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(srcfactor.ivec, srcfactor.ivec), 4), half);
|
||||
const __m128i s = _mm_mulhi_epi16(srgb, sf);
|
||||
|
||||
const __m128i drgb = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dst.ivec, dst.ivec), 4), half);
|
||||
const __m128i df = _mm_add_epi16(_mm_slli_epi16(_mm_packs_epi32(dstfactor.ivec, dstfactor.ivec), 4), half);
|
||||
const __m128i d = _mm_mulhi_epi16(drgb, df);
|
||||
|
||||
return Vec3<int>(_mm_unpacklo_epi16(_mm_max_epi16(_mm_subs_epi16(d, s), _mm_setzero_si128()), _mm_setzero_si128()));
|
||||
#else
|
||||
static constexpr Vec3<int> half = Vec3<int>::AssignToAll(1);
|
||||
Vec3<int> lhs = ((source.rgb() * 2 + half) * (srcfactor * 2 + half)) / 1024;
|
||||
Vec3<int> rhs = ((dst.rgb() * 2 + half) * (dstfactor * 2 + half)) / 1024;
|
||||
return rhs - lhs;
|
||||
#endif
|
||||
}
|
||||
|
||||
case GE_BLENDMODE_MIN:
|
||||
return Vec3<int>(std::min(source.r(), dst.r()),
|
||||
std::min(source.g(), dst.g()),
|
||||
std::min(source.b(), dst.b()));
|
||||
|
||||
case GE_BLENDMODE_MAX:
|
||||
return Vec3<int>(std::max(source.r(), dst.r()),
|
||||
std::max(source.g(), dst.g()),
|
||||
std::max(source.b(), dst.b()));
|
||||
|
||||
case GE_BLENDMODE_ABSDIFF:
|
||||
return Vec3<int>(::abs(source.r() - dst.r()),
|
||||
::abs(source.g() - dst.g()),
|
||||
::abs(source.b() - dst.b()));
|
||||
|
||||
default:
|
||||
return source.rgb();
|
||||
}
|
||||
}
|
||||
|
||||
static inline Vec4IntResult SOFTRAST_CALL ApplyTexturing(float s, float t, int x, int y, Vec4IntArg prim_color, int texlevel, int frac_texlevel, bool bilinear, const RasterizerState &state) {
|
||||
const u8 **tptr0 = const_cast<const u8 **>(&state.texptr[texlevel]);
|
||||
const uint16_t *bufw0 = &state.texbufw[texlevel];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue