TexCache: Simplify CheckAlpha funcs and SIMD.
Only check for full alpha now, which is simpler.
This commit is contained in:
parent
e3b3828b15
commit
f087b87b0c
2 changed files with 79 additions and 181 deletions
|
@ -277,96 +277,57 @@ static inline bool VectorIsNonZeroNEON(const uint16x8_t &v) {
|
|||
#endif
|
||||
|
||||
CheckAlphaResult CheckAlphaRGBA8888NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const uint32x4_t zero = vdupq_n_u32(0);
|
||||
const uint32x4_t full = vdupq_n_u32(0xFF);
|
||||
|
||||
const u32 *p = (const u32 *)pixelData;
|
||||
|
||||
// Have alpha values == 0 been seen?
|
||||
uint32x4_t foundAZero = zero;
|
||||
|
||||
const uint32x4_t mask = vdupq_n_u32(0xFF000000);
|
||||
uint32x4_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
// Have alpha values > 0 and < 0xFF been seen?
|
||||
uint32x4_t foundFraction = zero;
|
||||
|
||||
for (int i = 0; i < w; i += 4) {
|
||||
const uint32x4_t a = vshrq_n_u32(vld1q_u32(&p[i]), 24);
|
||||
|
||||
const uint32x4_t isZero = vceqq_u32(a, zero);
|
||||
foundAZero = vorrq_u32(foundAZero, isZero);
|
||||
|
||||
// If a = FF, isNotFull will be 0 -> foundFraction will be 0.
|
||||
// If a = 00, a & isNotFull will be 0 -> foundFraction will be 0.
|
||||
// In any other case, foundFraction will have some bits set.
|
||||
const uint32x4_t isNotFull = vcltq_u32(a, full);
|
||||
foundFraction = vorrq_u32(foundFraction, vandq_u32(a, isNotFull));
|
||||
const uint32x4_t a = vld1q_u32(&p[i]);
|
||||
bits = vandq_u32(bits, a);
|
||||
}
|
||||
p += stride;
|
||||
|
||||
// We check any early, in case we can skip the rest of the rows.
|
||||
if (VectorIsNonZeroNEON(foundFraction)) {
|
||||
uint32x4_t result = veorq_u32(bits, mask);
|
||||
if (VectorIsNonZeroNEON(result)) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
// Now let's sum up the bits.
|
||||
if (VectorIsNonZeroNEON(foundAZero)) {
|
||||
return CHECKALPHA_ANY;
|
||||
} else {
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR4444NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const uint16x8_t zero = vdupq_n_u16(0);
|
||||
const uint16x8_t full = vdupq_n_u16(0xF000);
|
||||
|
||||
const u16 *p = (const u16 *)pixelData;
|
||||
|
||||
// Have alpha values == 0 been seen?
|
||||
uint16x8_t foundAZero = zero;
|
||||
|
||||
const uint16x8_t mask = vdupq_n_u16((u16)0x000F);
|
||||
uint16x8_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
// Have alpha values > 0 and < 0xFF been seen?
|
||||
uint16x8_t foundFraction = zero;
|
||||
|
||||
for (int i = 0; i < w; i += 8) {
|
||||
const uint16x8_t a = vshlq_n_u16(vld1q_u16(&p[i]), 12);
|
||||
|
||||
const uint16x8_t isZero = vceqq_u16(a, zero);
|
||||
foundAZero = vorrq_u16(foundAZero, isZero);
|
||||
|
||||
// If a = F, isNotFull will be 0 -> foundFraction will be 0.
|
||||
// If a = 0, a & isNotFull will be 0 -> foundFraction will be 0.
|
||||
// In any other case, foundFraction will have some bits set.
|
||||
const uint16x8_t isNotFull = vcltq_u16(a, full);
|
||||
foundFraction = vorrq_u16(foundFraction, vandq_u16(a, isNotFull));
|
||||
const uint16x8_t a = vld1q_u16(&p[i]);
|
||||
bits = vandq_u16(bits, a);
|
||||
}
|
||||
p += stride;
|
||||
|
||||
// We check any early, in case we can skip the rest of the rows.
|
||||
if (VectorIsNonZeroNEON(foundFraction)) {
|
||||
uint16x8_t result = veorq_u16(bits, mask);
|
||||
if (VectorIsNonZeroNEON(result)) {
|
||||
return CHECKALPHA_ANY;
|
||||
}
|
||||
|
||||
p += stride;
|
||||
}
|
||||
|
||||
// Now let's sum up the bits.
|
||||
if (VectorIsNonZeroNEON(foundAZero)) {
|
||||
return CHECKALPHA_ANY;
|
||||
} else {
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
return CHECKALPHA_FULL;
|
||||
}
|
||||
|
||||
CheckAlphaResult CheckAlphaABGR1555NEON(const u32 *pixelData, int stride, int w, int h) {
|
||||
const u16 *p = (const u16 *)pixelData;
|
||||
|
||||
const uint16x8_t mask = vdupq_n_u16(1);
|
||||
uint16x8_t bits = vdupq_n_u16(1);
|
||||
const uint16x8_t mask = vdupq_n_u16((u16)0x0001);
|
||||
uint16x8_t bits = mask;
|
||||
for (int y = 0; y < h; ++y) {
|
||||
for (int i = 0; i < w; i += 8) {
|
||||
const uint16x8_t a = vld1q_u16(&p[i]);
|
||||
|
||||
bits = vandq_u16(bits, a);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue