Replaced SDL_memset4() implementation with a call to SDL_memset().
The implementation was slower than the C runtime on Mac OS X, Linux, and Windows...quite a bit slower when using the C fallback instead of the inline asm, too. Fixes Bugzilla #1755.
This commit is contained in:
parent
6f3b5c3641
commit
287d2529bd
1 changed files with 1 additions and 25 deletions
|
@ -257,33 +257,9 @@ extern DECLSPEC void *SDLCALL SDL_memset(void *dst, int c, size_t len);
|
|||
/* Note that the semantics are different from memset() in that this is a 32-bit assignment */
|
||||
SDL_FORCE_INLINE void SDL_memset4(void *dst, int val, size_t dwords)
|
||||
{
|
||||
#if defined(__GNUC__) && defined(i386)
|
||||
int u0, u1, u2;
|
||||
__asm__ __volatile__ (
|
||||
"cld \n\t"
|
||||
"rep ; stosl \n\t"
|
||||
: "=&D" (u0), "=&a" (u1), "=&c" (u2)
|
||||
: "0" (dst), "1" (val), "2" (SDL_static_cast(Uint32, dwords))
|
||||
: "memory"
|
||||
);
|
||||
#else
|
||||
size_t _n = (dwords + 3) / 4;
|
||||
Uint32 *_p = SDL_static_cast(Uint32 *, dst);
|
||||
Uint32 _val = (val);
|
||||
if (dwords == 0)
|
||||
return;
|
||||
switch (dwords % 4)
|
||||
{
|
||||
case 0: do { *_p++ = _val;
|
||||
case 3: *_p++ = _val;
|
||||
case 2: *_p++ = _val;
|
||||
case 1: *_p++ = _val;
|
||||
} while ( --_n );
|
||||
}
|
||||
#endif
|
||||
SDL_memset(dst, val, dwords * 4);
|
||||
}
|
||||
|
||||
|
||||
extern DECLSPEC void *SDLCALL SDL_memcpy(void *dst, const void *src, size_t len);
|
||||
|
||||
SDL_FORCE_INLINE void *SDL_memcpy4(void *dst, const void *src, size_t dwords)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue