Fixed bug #1090 (SDL_BlitCopyOverlap() assumes memcpy() operates in order)
Even if we're blitting between two different surfaces their pixels might still overlap, because of SDL_CreateRGBSurfaceFrom(), so always use SDL_BlitCopy() and check for overlap in that function. When handling overlapping surfaces, don't assume that memcpy() iterates forward, instead use memmove() correctly, and provide a fallback implementation of SDL_memmove() that handles the different cases. Fixed a bug with SDL_memset() not completely filling lengths that aren't a multiple of 4. Optimized SDL_memcpy() a bit using the same technique as SDL_memset().
This commit is contained in:
parent
dc23c42ea9
commit
d2b922f555
5 changed files with 76 additions and 111 deletions
|
@ -352,8 +352,8 @@ do { \
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* We can count on memcpy existing on Mac OS X and being well-tuned. */
|
/* We can count on memcpy existing on Mac OS X and being well-tuned. */
|
||||||
#if defined(__MACH__) && defined(__APPLE__)
|
#if defined(__MACOSX__)
|
||||||
#define SDL_memcpy(dst, src, len) memcpy(dst, src, len)
|
#define SDL_memcpy memcpy
|
||||||
#elif defined(__GNUC__) && defined(i386)
|
#elif defined(__GNUC__) && defined(i386)
|
||||||
#define SDL_memcpy(dst, src, len) \
|
#define SDL_memcpy(dst, src, len) \
|
||||||
do { \
|
do { \
|
||||||
|
@ -385,8 +385,8 @@ extern DECLSPEC void *SDLCALL SDL_memcpy(void *dst, const void *src,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* We can count on memcpy existing on Mac OS X and being well-tuned. */
|
/* We can count on memcpy existing on Mac OS X and being well-tuned. */
|
||||||
#if defined(__MACH__) && defined(__APPLE__)
|
#if defined(__MACOSX__)
|
||||||
#define SDL_memcpy4(dst, src, len) memcpy(dst, src, (len)*4)
|
#define SDL_memcpy4(dst, src, len) SDL_memcpy((dst), (src), (len) << 2)
|
||||||
#elif defined(__GNUC__) && defined(i386)
|
#elif defined(__GNUC__) && defined(i386)
|
||||||
#define SDL_memcpy4(dst, src, len) \
|
#define SDL_memcpy4(dst, src, len) \
|
||||||
do { \
|
do { \
|
||||||
|
@ -400,54 +400,14 @@ do { \
|
||||||
} while(0)
|
} while(0)
|
||||||
#endif
|
#endif
|
||||||
#ifndef SDL_memcpy4
|
#ifndef SDL_memcpy4
|
||||||
#define SDL_memcpy4(dst, src, len) SDL_memcpy(dst, src, (len) << 2)
|
#define SDL_memcpy4(dst, src, len) SDL_memcpy((dst), (src), (len) << 2)
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__GNUC__) && defined(i386)
|
|
||||||
#define SDL_revcpy(dst, src, len) \
|
|
||||||
do { \
|
|
||||||
int u0, u1, u2; \
|
|
||||||
char *dstp = SDL_static_cast(char *, dst); \
|
|
||||||
char *srcp = SDL_static_cast(char *, src); \
|
|
||||||
int n = (len); \
|
|
||||||
if ( n >= 4 ) { \
|
|
||||||
__asm__ __volatile__ ( \
|
|
||||||
"std\n\t" \
|
|
||||||
"rep ; movsl\n\t" \
|
|
||||||
"cld\n\t" \
|
|
||||||
: "=&c" (u0), "=&D" (u1), "=&S" (u2) \
|
|
||||||
: "0" (n >> 2), \
|
|
||||||
"1" (dstp+(n-4)), "2" (srcp+(n-4)) \
|
|
||||||
: "memory" ); \
|
|
||||||
} \
|
|
||||||
switch (n & 3) { \
|
|
||||||
case 3: dstp[2] = srcp[2]; \
|
|
||||||
case 2: dstp[1] = srcp[1]; \
|
|
||||||
case 1: dstp[0] = srcp[0]; \
|
|
||||||
break; \
|
|
||||||
default: \
|
|
||||||
break; \
|
|
||||||
} \
|
|
||||||
} while(0)
|
|
||||||
#endif
|
|
||||||
#ifndef SDL_revcpy
|
|
||||||
extern DECLSPEC void *SDLCALL SDL_revcpy(void *dst, const void *src,
|
|
||||||
size_t len);
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_MEMMOVE
|
#ifdef HAVE_MEMMOVE
|
||||||
#define SDL_memmove memmove
|
#define SDL_memmove memmove
|
||||||
#elif defined(HAVE_BCOPY)
|
|
||||||
#define SDL_memmove(d, s, n) bcopy((s), (d), (n))
|
|
||||||
#else
|
#else
|
||||||
#define SDL_memmove(dst, src, len) \
|
extern DECLSPEC void *SDLCALL SDL_memmove(void *dst, const void *src,
|
||||||
do { \
|
size_t len);
|
||||||
if ( dst < src ) { \
|
|
||||||
SDL_memcpy(dst, src, len); \
|
|
||||||
} else { \
|
|
||||||
SDL_revcpy(dst, src, len); \
|
|
||||||
} \
|
|
||||||
} while(0)
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef HAVE_MEMCMP
|
#ifdef HAVE_MEMCMP
|
||||||
|
|
|
@ -265,31 +265,27 @@ void *
|
||||||
SDL_memset(void *dst, int c, size_t len)
|
SDL_memset(void *dst, int c, size_t len)
|
||||||
{
|
{
|
||||||
size_t left = (len % 4);
|
size_t left = (len % 4);
|
||||||
if (len >= 4) {
|
Uint32 *dstp4;
|
||||||
Uint32 value = 0;
|
Uint8 *dstp1;
|
||||||
Uint32 *dstp = (Uint32 *) dst;
|
Uint32 value4 = (c | (c << 8) | (c << 16) | (c << 24));
|
||||||
int i;
|
Uint8 value1 = (Uint8) c;
|
||||||
for (i = 0; i < 4; ++i) {
|
|
||||||
value <<= 8;
|
dstp4 = (Uint32 *) dst;
|
||||||
value |= c;
|
len /= 4;
|
||||||
}
|
while (len--) {
|
||||||
len /= 4;
|
*dstp4++ = value4;
|
||||||
while (len--) {
|
|
||||||
*dstp++ = value;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if (left > 0) {
|
|
||||||
Uint8 value = (Uint8) c;
|
dstp1 = (Uint8 *) dstp4;
|
||||||
Uint8 *dstp = (Uint8 *) dst;
|
switch (left) {
|
||||||
switch (left) {
|
case 3:
|
||||||
case 3:
|
*dstp1++ = value1;
|
||||||
*dstp++ = value;
|
case 2:
|
||||||
case 2:
|
*dstp1++ = value1;
|
||||||
*dstp++ = value;
|
case 1:
|
||||||
case 1:
|
*dstp1++ = value1;
|
||||||
*dstp++ = value;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -298,25 +294,49 @@ SDL_memset(void *dst, int c, size_t len)
|
||||||
void *
|
void *
|
||||||
SDL_memcpy(void *dst, const void *src, size_t len)
|
SDL_memcpy(void *dst, const void *src, size_t len)
|
||||||
{
|
{
|
||||||
char *srcp = (char *) src;
|
size_t left = (len % 4);
|
||||||
char *dstp = (char *) dst;
|
Uint32 *srcp4, *dstp4;
|
||||||
|
Uint8 *srcp1, *dstp1;
|
||||||
|
|
||||||
|
srcp4 = (Uint32 *) src;
|
||||||
|
dstp4 = (Uint32 *) dst;
|
||||||
|
len /= 4;
|
||||||
while (len--) {
|
while (len--) {
|
||||||
*dstp++ = *srcp++;
|
*dstp4++ = *srcp4++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
srcp1 = (Uint8 *) srcp4;
|
||||||
|
dstp1 = (Uint8 *) dstp4;
|
||||||
|
switch (left) {
|
||||||
|
case 3:
|
||||||
|
*dstp1++ = *srcp1++;
|
||||||
|
case 2:
|
||||||
|
*dstp1++ = *srcp1++;
|
||||||
|
case 1:
|
||||||
|
*dstp1++ = *srcp1++;
|
||||||
|
}
|
||||||
|
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef SDL_revcpy
|
#ifndef SDL_memmove
|
||||||
void *
|
void *
|
||||||
SDL_revcpy(void *dst, const void *src, size_t len)
|
SDL_memmove(void *dst, const void *src, size_t len)
|
||||||
{
|
{
|
||||||
char *srcp = (char *) src;
|
char *srcp = (char *) src;
|
||||||
char *dstp = (char *) dst;
|
char *dstp = (char *) dst;
|
||||||
srcp += len - 1;
|
|
||||||
dstp += len - 1;
|
if (src < dst) {
|
||||||
while (len--) {
|
srcp += len - 1;
|
||||||
*dstp-- = *srcp--;
|
dstp += len - 1;
|
||||||
|
while (len--) {
|
||||||
|
*dstp-- = *srcp--;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
while (len--) {
|
||||||
|
*dstp++ = *srcp++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return dst;
|
return dst;
|
||||||
}
|
}
|
||||||
|
|
|
@ -205,12 +205,7 @@ SDL_CalculateBlit(SDL_Surface * surface)
|
||||||
|
|
||||||
/* Choose a standard blit function */
|
/* Choose a standard blit function */
|
||||||
if (map->identity && !(map->info.flags & ~SDL_COPY_RLE_DESIRED)) {
|
if (map->identity && !(map->info.flags & ~SDL_COPY_RLE_DESIRED)) {
|
||||||
/* Handle overlapping blits on the same surface */
|
blit = SDL_BlitCopy;
|
||||||
if (surface == dst) {
|
|
||||||
blit = SDL_BlitCopyOverlap;
|
|
||||||
} else {
|
|
||||||
blit = SDL_BlitCopy;
|
|
||||||
}
|
|
||||||
} else if (surface->format->BitsPerPixel < 8) {
|
} else if (surface->format->BitsPerPixel < 8) {
|
||||||
blit = SDL_CalculateBlit0(surface);
|
blit = SDL_CalculateBlit0(surface);
|
||||||
} else if (surface->format->BytesPerPixel == 1) {
|
} else if (surface->format->BytesPerPixel == 1) {
|
||||||
|
|
|
@ -96,6 +96,7 @@ SDL_memcpyMMX(Uint8 * dst, const Uint8 * src, int len)
|
||||||
void
|
void
|
||||||
SDL_BlitCopy(SDL_BlitInfo * info)
|
SDL_BlitCopy(SDL_BlitInfo * info)
|
||||||
{
|
{
|
||||||
|
SDL_bool overlap;
|
||||||
Uint8 *src, *dst;
|
Uint8 *src, *dst;
|
||||||
int w, h;
|
int w, h;
|
||||||
int srcskip, dstskip;
|
int srcskip, dstskip;
|
||||||
|
@ -107,6 +108,21 @@ SDL_BlitCopy(SDL_BlitInfo * info)
|
||||||
srcskip = info->src_pitch;
|
srcskip = info->src_pitch;
|
||||||
dstskip = info->dst_pitch;
|
dstskip = info->dst_pitch;
|
||||||
|
|
||||||
|
/* Properly handle overlapping blits */
|
||||||
|
if (src < dst) {
|
||||||
|
overlap = (dst < (src + h*srcskip));
|
||||||
|
} else {
|
||||||
|
overlap = (src < (dst + h*dstskip));
|
||||||
|
}
|
||||||
|
if (overlap) {
|
||||||
|
while (h--) {
|
||||||
|
SDL_memmove(dst, src, w);
|
||||||
|
src += srcskip;
|
||||||
|
dst += dstskip;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __SSE__
|
#ifdef __SSE__
|
||||||
if (SDL_HasSSE() &&
|
if (SDL_HasSSE() &&
|
||||||
!((uintptr_t) src & 15) && !(srcskip & 15) &&
|
!((uintptr_t) src & 15) && !(srcskip & 15) &&
|
||||||
|
@ -141,29 +157,4 @@ SDL_BlitCopy(SDL_BlitInfo * info)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
|
||||||
SDL_BlitCopyOverlap(SDL_BlitInfo * info)
|
|
||||||
{
|
|
||||||
Uint8 *src, *dst;
|
|
||||||
int w, h;
|
|
||||||
int skip;
|
|
||||||
|
|
||||||
w = info->dst_w * info->dst_fmt->BytesPerPixel;
|
|
||||||
h = info->dst_h;
|
|
||||||
src = info->src;
|
|
||||||
dst = info->dst;
|
|
||||||
skip = info->src_pitch;
|
|
||||||
if ((dst < src) || (dst >= (src + h * skip))) {
|
|
||||||
SDL_BlitCopy(info);
|
|
||||||
} else {
|
|
||||||
src += ((h - 1) * skip);
|
|
||||||
dst += ((h - 1) * skip);
|
|
||||||
while (h--) {
|
|
||||||
SDL_revcpy(dst, src, w);
|
|
||||||
src -= skip;
|
|
||||||
dst -= skip;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* vi: set ts=4 sw=4 expandtab: */
|
/* vi: set ts=4 sw=4 expandtab: */
|
||||||
|
|
|
@ -21,6 +21,5 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
void SDL_BlitCopy(SDL_BlitInfo * info);
|
void SDL_BlitCopy(SDL_BlitInfo * info);
|
||||||
void SDL_BlitCopyOverlap(SDL_BlitInfo * info);
|
|
||||||
|
|
||||||
/* vi: set ts=4 sw=4 expandtab: */
|
/* vi: set ts=4 sw=4 expandtab: */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue