Replaced SDL_memset4() implementation with a call to SDL_memset().

The implementation was slower than the C runtime on Mac OS X, Linux, and
 Windows...quite a bit slower when using the C fallback instead of the inline
 asm, too.

Fixes Bugzilla #1755.
This commit is contained in:
Ryan C. Gordon 2013-07-08 23:22:36 -04:00
parent 6f3b5c3641
commit 287d2529bd

View file

@ -257,33 +257,9 @@ extern DECLSPEC void *SDLCALL SDL_memset(void *dst, int c, size_t len);
/* Note that the semantics are different from memset() in that this is a 32-bit assignment */
SDL_FORCE_INLINE void SDL_memset4(void *dst, int val, size_t dwords)
{
#if defined(__GNUC__) && defined(i386)
int u0, u1, u2;
__asm__ __volatile__ (
"cld \n\t"
"rep ; stosl \n\t"
: "=&D" (u0), "=&a" (u1), "=&c" (u2)
: "0" (dst), "1" (val), "2" (SDL_static_cast(Uint32, dwords))
: "memory"
);
#else
size_t _n = (dwords + 3) / 4;
Uint32 *_p = SDL_static_cast(Uint32 *, dst);
Uint32 _val = (val);
if (dwords == 0)
return;
switch (dwords % 4)
{
case 0: do { *_p++ = _val;
case 3: *_p++ = _val;
case 2: *_p++ = _val;
case 1: *_p++ = _val;
} while ( --_n );
}
#endif
SDL_memset(dst, val, dwords * 4);
}
extern DECLSPEC void *SDLCALL SDL_memcpy(void *dst, const void *src, size_t len);
SDL_FORCE_INLINE void *SDL_memcpy4(void *dst, const void *src, size_t dwords)