softgpu: Use SSE for texture sampling.
This commit is contained in:
parent
b357b00ace
commit
b33d0c4046
1 changed files with 24 additions and 0 deletions
|
@ -968,6 +968,29 @@ inline void ApplyTexturing(Vec3<int> &prim_color_rgb, int &prim_color_a, float s
|
|||
// Nearest filtering only. Round texcoords or just chop bits?
|
||||
texcolor = Vec4<int>::FromRGBA(SampleNearest(texlevel, u[0], v[0], tptr, bufwbits));
|
||||
} else {
|
||||
#if defined(_M_SSE)
|
||||
MEMORY_ALIGNED16(u32 tc[2]);
|
||||
MEMORY_ALIGNED16(u32 bc[2]);
|
||||
tc[0] = SampleNearest(texlevel, u[0], v[0], tptr, bufwbits);
|
||||
tc[1] = SampleNearest(texlevel, u[1], v[1], tptr, bufwbits);
|
||||
bc[0] = SampleNearest(texlevel, u[2], v[2], tptr, bufwbits);
|
||||
bc[1] = SampleNearest(texlevel, u[3], v[3], tptr, bufwbits);
|
||||
|
||||
const __m128i z = _mm_setzero_si128();
|
||||
|
||||
__m128i tvec = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)tc), z);
|
||||
tvec = _mm_mullo_epi16(tvec, _mm_set1_epi16(0x100 - frac_v));
|
||||
__m128i bvec = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)bc), z);
|
||||
bvec = _mm_mullo_epi16(bvec, _mm_set1_epi16(frac_v));
|
||||
|
||||
// This multiplies the left and right sides. We shift right after, although this may round down...
|
||||
__m128i rowmult = _mm_set_epi16(frac_u, frac_u, frac_u, frac_u, 0x100 - frac_u, 0x100 - frac_u, 0x100 - frac_u, 0x100 - frac_u);
|
||||
__m128i tmp = _mm_mulhi_epu16(_mm_add_epi16(tvec, bvec), rowmult);
|
||||
|
||||
// Now we need to add the left and right sides together.
|
||||
__m128i res = _mm_add_epi16(tmp, _mm_shuffle_epi32(tmp, _MM_SHUFFLE(3, 2, 3, 2)));
|
||||
texcolor = Vec4<int>(_mm_unpacklo_epi16(res, z));
|
||||
#else
|
||||
Vec4<int> texcolor_tl = Vec4<int>::FromRGBA(SampleNearest(texlevel, u[0], v[0], tptr, bufwbits));
|
||||
Vec4<int> texcolor_tr = Vec4<int>::FromRGBA(SampleNearest(texlevel, u[1], v[1], tptr, bufwbits));
|
||||
Vec4<int> texcolor_bl = Vec4<int>::FromRGBA(SampleNearest(texlevel, u[2], v[2], tptr, bufwbits));
|
||||
|
@ -976,6 +999,7 @@ inline void ApplyTexturing(Vec3<int> &prim_color_rgb, int &prim_color_a, float s
|
|||
Vec4<int> t = texcolor_tl * (0x100 - frac_u) + texcolor_tr * frac_u;
|
||||
Vec4<int> b = texcolor_bl * (0x100 - frac_u) + texcolor_br * frac_u;
|
||||
texcolor = (t * (0x100 - frac_v) + b * frac_v) / (256 * 256);
|
||||
#endif
|
||||
}
|
||||
Vec4<int> out = GetTextureFunctionOutput(prim_color_rgb, prim_color_a, texcolor);
|
||||
prim_color_rgb = out.rgb();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue