softgpu: Use SSE for texture sampling.

This commit is contained in:
Unknown W. Brackets 2014-03-16 14:33:42 -07:00
parent b357b00ace
commit b33d0c4046

View file

@ -968,6 +968,29 @@ inline void ApplyTexturing(Vec3<int> &prim_color_rgb, int &prim_color_a, float s
// Nearest filtering only. Round texcoords or just chop bits?
texcolor = Vec4<int>::FromRGBA(SampleNearest(texlevel, u[0], v[0], tptr, bufwbits));
} else {
#if defined(_M_SSE)
MEMORY_ALIGNED16(u32 tc[2]);
MEMORY_ALIGNED16(u32 bc[2]);
tc[0] = SampleNearest(texlevel, u[0], v[0], tptr, bufwbits);
tc[1] = SampleNearest(texlevel, u[1], v[1], tptr, bufwbits);
bc[0] = SampleNearest(texlevel, u[2], v[2], tptr, bufwbits);
bc[1] = SampleNearest(texlevel, u[3], v[3], tptr, bufwbits);
const __m128i z = _mm_setzero_si128();
__m128i tvec = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)tc), z);
tvec = _mm_mullo_epi16(tvec, _mm_set1_epi16(0x100 - frac_v));
__m128i bvec = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)bc), z);
bvec = _mm_mullo_epi16(bvec, _mm_set1_epi16(frac_v));
// This multiplies the left and right sides. We shift right after, although this may round down...
__m128i rowmult = _mm_set_epi16(frac_u, frac_u, frac_u, frac_u, 0x100 - frac_u, 0x100 - frac_u, 0x100 - frac_u, 0x100 - frac_u);
__m128i tmp = _mm_mulhi_epu16(_mm_add_epi16(tvec, bvec), rowmult);
// Now we need to add the left and right sides together.
__m128i res = _mm_add_epi16(tmp, _mm_shuffle_epi32(tmp, _MM_SHUFFLE(3, 2, 3, 2)));
texcolor = Vec4<int>(_mm_unpacklo_epi16(res, z));
#else
Vec4<int> texcolor_tl = Vec4<int>::FromRGBA(SampleNearest(texlevel, u[0], v[0], tptr, bufwbits));
Vec4<int> texcolor_tr = Vec4<int>::FromRGBA(SampleNearest(texlevel, u[1], v[1], tptr, bufwbits));
Vec4<int> texcolor_bl = Vec4<int>::FromRGBA(SampleNearest(texlevel, u[2], v[2], tptr, bufwbits));
@ -976,6 +999,7 @@ inline void ApplyTexturing(Vec3<int> &prim_color_rgb, int &prim_color_a, float s
Vec4<int> t = texcolor_tl * (0x100 - frac_u) + texcolor_tr * frac_u;
Vec4<int> b = texcolor_bl * (0x100 - frac_u) + texcolor_br * frac_u;
texcolor = (t * (0x100 - frac_v) + b * frac_v) / (256 * 256);
#endif
}
Vec4<int> out = GetTextureFunctionOutput(prim_color_rgb, prim_color_a, texcolor);
prim_color_rgb = out.rgb();