Fixed bug #1090 (SDL_BlitCopyOverlap() assumes memcpy() operates in order)
Even if we're blitting between two different surfaces their pixels might still overlap, because of SDL_CreateRGBSurfaceFrom(), so always use SDL_BlitCopy() and check for overlap in that function. When handling overlapping surfaces, don't assume that memcpy() iterates forward, instead use memmove() correctly, and provide a fallback implementation of SDL_memmove() that handles the different cases. Fixed a bug with SDL_memset() not completely filling lengths that aren't a multiple of 4. Optimized SDL_memcpy() a bit using the same technique as SDL_memset().
This commit is contained in:
parent
dc23c42ea9
commit
d2b922f555
5 changed files with 76 additions and 111 deletions
|
@ -352,8 +352,8 @@ do { \
|
|||
#endif
|
||||
|
||||
/* We can count on memcpy existing on Mac OS X and being well-tuned. */
|
||||
#if defined(__MACH__) && defined(__APPLE__)
|
||||
#define SDL_memcpy(dst, src, len) memcpy(dst, src, len)
|
||||
#if defined(__MACOSX__)
|
||||
#define SDL_memcpy memcpy
|
||||
#elif defined(__GNUC__) && defined(i386)
|
||||
#define SDL_memcpy(dst, src, len) \
|
||||
do { \
|
||||
|
@ -385,8 +385,8 @@ extern DECLSPEC void *SDLCALL SDL_memcpy(void *dst, const void *src,
|
|||
#endif
|
||||
|
||||
/* We can count on memcpy existing on Mac OS X and being well-tuned. */
|
||||
#if defined(__MACH__) && defined(__APPLE__)
|
||||
#define SDL_memcpy4(dst, src, len) memcpy(dst, src, (len)*4)
|
||||
#if defined(__MACOSX__)
|
||||
#define SDL_memcpy4(dst, src, len) SDL_memcpy((dst), (src), (len) << 2)
|
||||
#elif defined(__GNUC__) && defined(i386)
|
||||
#define SDL_memcpy4(dst, src, len) \
|
||||
do { \
|
||||
|
@ -400,54 +400,14 @@ do { \
|
|||
} while(0)
|
||||
#endif
|
||||
#ifndef SDL_memcpy4
|
||||
#define SDL_memcpy4(dst, src, len) SDL_memcpy(dst, src, (len) << 2)
|
||||
#endif
|
||||
|
||||
#if defined(__GNUC__) && defined(i386)
|
||||
#define SDL_revcpy(dst, src, len) \
|
||||
do { \
|
||||
int u0, u1, u2; \
|
||||
char *dstp = SDL_static_cast(char *, dst); \
|
||||
char *srcp = SDL_static_cast(char *, src); \
|
||||
int n = (len); \
|
||||
if ( n >= 4 ) { \
|
||||
__asm__ __volatile__ ( \
|
||||
"std\n\t" \
|
||||
"rep ; movsl\n\t" \
|
||||
"cld\n\t" \
|
||||
: "=&c" (u0), "=&D" (u1), "=&S" (u2) \
|
||||
: "0" (n >> 2), \
|
||||
"1" (dstp+(n-4)), "2" (srcp+(n-4)) \
|
||||
: "memory" ); \
|
||||
} \
|
||||
switch (n & 3) { \
|
||||
case 3: dstp[2] = srcp[2]; \
|
||||
case 2: dstp[1] = srcp[1]; \
|
||||
case 1: dstp[0] = srcp[0]; \
|
||||
break; \
|
||||
default: \
|
||||
break; \
|
||||
} \
|
||||
} while(0)
|
||||
#endif
|
||||
#ifndef SDL_revcpy
|
||||
extern DECLSPEC void *SDLCALL SDL_revcpy(void *dst, const void *src,
|
||||
size_t len);
|
||||
#define SDL_memcpy4(dst, src, len) SDL_memcpy((dst), (src), (len) << 2)
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MEMMOVE
|
||||
#define SDL_memmove memmove
|
||||
#elif defined(HAVE_BCOPY)
|
||||
#define SDL_memmove(d, s, n) bcopy((s), (d), (n))
|
||||
#else
|
||||
#define SDL_memmove(dst, src, len) \
|
||||
do { \
|
||||
if ( dst < src ) { \
|
||||
SDL_memcpy(dst, src, len); \
|
||||
} else { \
|
||||
SDL_revcpy(dst, src, len); \
|
||||
} \
|
||||
} while(0)
|
||||
extern DECLSPEC void *SDLCALL SDL_memmove(void *dst, const void *src,
|
||||
size_t len);
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_MEMCMP
|
||||
|
|
|
@ -265,31 +265,27 @@ void *
|
|||
SDL_memset(void *dst, int c, size_t len)
|
||||
{
|
||||
size_t left = (len % 4);
|
||||
if (len >= 4) {
|
||||
Uint32 value = 0;
|
||||
Uint32 *dstp = (Uint32 *) dst;
|
||||
int i;
|
||||
for (i = 0; i < 4; ++i) {
|
||||
value <<= 8;
|
||||
value |= c;
|
||||
}
|
||||
Uint32 *dstp4;
|
||||
Uint8 *dstp1;
|
||||
Uint32 value4 = (c | (c << 8) | (c << 16) | (c << 24));
|
||||
Uint8 value1 = (Uint8) c;
|
||||
|
||||
dstp4 = (Uint32 *) dst;
|
||||
len /= 4;
|
||||
while (len--) {
|
||||
*dstp++ = value;
|
||||
*dstp4++ = value4;
|
||||
}
|
||||
}
|
||||
if (left > 0) {
|
||||
Uint8 value = (Uint8) c;
|
||||
Uint8 *dstp = (Uint8 *) dst;
|
||||
|
||||
dstp1 = (Uint8 *) dstp4;
|
||||
switch (left) {
|
||||
case 3:
|
||||
*dstp++ = value;
|
||||
*dstp1++ = value1;
|
||||
case 2:
|
||||
*dstp++ = value;
|
||||
*dstp1++ = value1;
|
||||
case 1:
|
||||
*dstp++ = value;
|
||||
}
|
||||
*dstp1++ = value1;
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
#endif
|
||||
|
@ -298,26 +294,50 @@ SDL_memset(void *dst, int c, size_t len)
|
|||
void *
|
||||
SDL_memcpy(void *dst, const void *src, size_t len)
|
||||
{
|
||||
char *srcp = (char *) src;
|
||||
char *dstp = (char *) dst;
|
||||
size_t left = (len % 4);
|
||||
Uint32 *srcp4, *dstp4;
|
||||
Uint8 *srcp1, *dstp1;
|
||||
|
||||
srcp4 = (Uint32 *) src;
|
||||
dstp4 = (Uint32 *) dst;
|
||||
len /= 4;
|
||||
while (len--) {
|
||||
*dstp++ = *srcp++;
|
||||
*dstp4++ = *srcp4++;
|
||||
}
|
||||
|
||||
srcp1 = (Uint8 *) srcp4;
|
||||
dstp1 = (Uint8 *) dstp4;
|
||||
switch (left) {
|
||||
case 3:
|
||||
*dstp1++ = *srcp1++;
|
||||
case 2:
|
||||
*dstp1++ = *srcp1++;
|
||||
case 1:
|
||||
*dstp1++ = *srcp1++;
|
||||
}
|
||||
|
||||
return dst;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef SDL_revcpy
|
||||
#ifndef SDL_memmove
|
||||
void *
|
||||
SDL_revcpy(void *dst, const void *src, size_t len)
|
||||
SDL_memmove(void *dst, const void *src, size_t len)
|
||||
{
|
||||
char *srcp = (char *) src;
|
||||
char *dstp = (char *) dst;
|
||||
|
||||
if (src < dst) {
|
||||
srcp += len - 1;
|
||||
dstp += len - 1;
|
||||
while (len--) {
|
||||
*dstp-- = *srcp--;
|
||||
}
|
||||
} else {
|
||||
while (len--) {
|
||||
*dstp++ = *srcp++;
|
||||
}
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -205,12 +205,7 @@ SDL_CalculateBlit(SDL_Surface * surface)
|
|||
|
||||
/* Choose a standard blit function */
|
||||
if (map->identity && !(map->info.flags & ~SDL_COPY_RLE_DESIRED)) {
|
||||
/* Handle overlapping blits on the same surface */
|
||||
if (surface == dst) {
|
||||
blit = SDL_BlitCopyOverlap;
|
||||
} else {
|
||||
blit = SDL_BlitCopy;
|
||||
}
|
||||
} else if (surface->format->BitsPerPixel < 8) {
|
||||
blit = SDL_CalculateBlit0(surface);
|
||||
} else if (surface->format->BytesPerPixel == 1) {
|
||||
|
|
|
@ -96,6 +96,7 @@ SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
|
|||
void
|
||||
SDL_BlitCopy(SDL_BlitInfo * info)
|
||||
{
|
||||
SDL_bool overlap;
|
||||
Uint8 *src, *dst;
|
||||
int w, h;
|
||||
int srcskip, dstskip;
|
||||
|
@ -107,6 +108,21 @@ SDL_BlitCopy(SDL_BlitInfo * info)
|
|||
srcskip = info->src_pitch;
|
||||
dstskip = info->dst_pitch;
|
||||
|
||||
/* Properly handle overlapping blits */
|
||||
if (src < dst) {
|
||||
overlap = (dst < (src + h*srcskip));
|
||||
} else {
|
||||
overlap = (src < (dst + h*dstskip));
|
||||
}
|
||||
if (overlap) {
|
||||
while (h--) {
|
||||
SDL_memmove(dst, src, w);
|
||||
src += srcskip;
|
||||
dst += dstskip;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef __SSE__
|
||||
if (SDL_HasSSE() &&
|
||||
!((uintptr_t) src & 15) && !(srcskip & 15) &&
|
||||
|
@ -141,29 +157,4 @@ SDL_BlitCopy(SDL_BlitInfo * info)
|
|||
}
|
||||
}
|
||||
|
||||
void
|
||||
SDL_BlitCopyOverlap(SDL_BlitInfo * info)
|
||||
{
|
||||
Uint8 *src, *dst;
|
||||
int w, h;
|
||||
int skip;
|
||||
|
||||
w = info->dst_w * info->dst_fmt->BytesPerPixel;
|
||||
h = info->dst_h;
|
||||
src = info->src;
|
||||
dst = info->dst;
|
||||
skip = info->src_pitch;
|
||||
if ((dst < src) || (dst >= (src + h * skip))) {
|
||||
SDL_BlitCopy(info);
|
||||
} else {
|
||||
src += ((h - 1) * skip);
|
||||
dst += ((h - 1) * skip);
|
||||
while (h--) {
|
||||
SDL_revcpy(dst, src, w);
|
||||
src -= skip;
|
||||
dst -= skip;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* vi: set ts=4 sw=4 expandtab: */
|
||||
|
|
|
@ -21,6 +21,5 @@
|
|||
*/
|
||||
|
||||
void SDL_BlitCopy(SDL_BlitInfo * info);
|
||||
void SDL_BlitCopyOverlap(SDL_BlitInfo * info);
|
||||
|
||||
/* vi: set ts=4 sw=4 expandtab: */
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue