ppsspp/GPU/Common/TextureDecoderNEON.cpp
Unknown W. Brackets 4d47ccd5df Add a NEON version of the tex hash.
Should be used only for NEON devices.  Currently only compiled on Android.
2013-11-02 02:09:54 -07:00

57 lines
No EOL
1.9 KiB
C++

// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <arm_neon.h>
#include "GPU/Common/TextureDecoder.h"
#ifndef ARM
#error Should not be compiled on non-ARM.
#endif
static const u16 MEMORY_ALIGNED16(QuickTexHashInitial[8]) = {0x0001U, 0x0083U, 0x4309U, 0x4d9bU, 0xb651U, 0x4b73U, 0x9bd9U, 0xc00bU};
u32 QuickTexHashNEON(const void *checkp, u32 size) {
u32 check = 0;
if (((intptr_t)checkp & 0xf) == 0 && (size & 0x3f) == 0) {
uint32x4_t cursor = vdupq_n_u32(0);
uint32x4_t cursor2 = vld1q_u32((const u32 *)QuickTexHashInitial);
uint32x4_t update = vdupq_n_u32(0x24552455U);
const u32 *p = (const u32 *)checkp;
for (u32 i = 0; i < size / 16; i += 4) {
cursor = vmlaq_u32(cursor, vld1q_u32(&p[4 * 0]), cursor2);
cursor = veorq_u32(cursor, vld1q_u32(&p[4 * 1]));
cursor = vaddq_u32(cursor, vld1q_u32(&p[4 * 2]));
cursor = veorq_u32(cursor, vmulq_u32(vld1q_u32(&p[4 * 3]), cursor2));
cursor2 = vaddq_u32(cursor2, update);
p += 4 * 4;
}
cursor = vaddq_u32(cursor, cursor2);
check = vgetq_lane_u32(cursor, 0) + vgetq_lane_u32(cursor, 1) + vgetq_lane_u32(cursor, 2) + vgetq_lane_u32(cursor, 3);
} else {
const u32 *p = (const u32 *)checkp;
for (u32 i = 0; i < size / 8; ++i) {
check += *p++;
check ^= *p++;
}
}
return check;
}