Fixed severe bottleneck in the XML Parser code.

Applied Max's patch for character drawing.
Added new FP squareroot function.

svn-id: r34354
This commit is contained in:
Vicent Marti 2008-09-05 14:48:47 +00:00
parent 020be5bc21
commit e0592c7d25
6 changed files with 194 additions and 265 deletions

View file

@ -36,7 +36,71 @@
#define VECTOR_RENDERER_FAST_TRIANGLES
/** HELPER MACROS for BRESENHAM's circle drawing algorithm **/
const uint16 inv_sqrt_tbl[] = {
0x0000, 0x0100, 0x016A, 0x01BB, 0x0200, 0x023C, 0x0273, 0x02A5,
0x02D4, 0x0300, 0x0329, 0x0351, 0x0376, 0x039B, 0x03BD, 0x03DF,
0x0400, 0x041F, 0x043E, 0x045B, 0x0478, 0x0495, 0x04B0, 0x04CB,
0x04E6, 0x0500, 0x0519, 0x0532, 0x054A, 0x0562, 0x057A, 0x0591,
0x05A8, 0x05BE, 0x05D4, 0x05EA, 0x0600, 0x0615, 0x062A, 0x063E,
0x0653, 0x0667, 0x067B, 0x068E, 0x06A2, 0x06B5, 0x06C8, 0x06DB,
0x06ED, 0x0700, 0x0712, 0x0724, 0x0736, 0x0747, 0x0759, 0x076A,
0x077B, 0x078C, 0x079D, 0x07AE, 0x07BE, 0x07CF, 0x07DF, 0x07EF,
0x0800, 0x080F, 0x081F, 0x082F, 0x083F, 0x084E, 0x085D, 0x086D,
0x087C, 0x088B, 0x089A, 0x08A9, 0x08B7, 0x08C6, 0x08D4, 0x08E3,
0x08F1, 0x0900, 0x090E, 0x091C, 0x092A, 0x0938, 0x0946, 0x0953,
0x0961, 0x096F, 0x097C, 0x098A, 0x0997, 0x09A4, 0x09B2, 0x09BF,
0x09CC, 0x09D9, 0x09E6, 0x09F3, 0x0A00, 0x0A0C, 0x0A19, 0x0A26,
0x0A32, 0x0A3F, 0x0A4B, 0x0A58, 0x0A64, 0x0A70, 0x0A7C, 0x0A89,
0x0A95, 0x0AA1, 0x0AAD, 0x0AB9, 0x0AC5, 0x0AD1, 0x0ADC, 0x0AE8,
0x0AF4, 0x0B00, 0x0B0B, 0x0B17, 0x0B22, 0x0B2E, 0x0B39, 0x0B44,
0x0B50, 0x0B5B, 0x0B66, 0x0B72, 0x0B7D, 0x0B88, 0x0B93, 0x0B9E,
0x0BA9, 0x0BB4, 0x0BBF, 0x0BCA, 0x0BD5, 0x0BDF, 0x0BEA, 0x0BF5,
0x0C00, 0x0C0A, 0x0C15, 0x0C1F, 0x0C2A, 0x0C34, 0x0C3F, 0x0C49,
0x0C54, 0x0C5E, 0x0C68, 0x0C73, 0x0C7D, 0x0C87, 0x0C91, 0x0C9C,
0x0CA6, 0x0CB0, 0x0CBA, 0x0CC4, 0x0CCE, 0x0CD8, 0x0CE2, 0x0CEC,
0x0CF6, 0x0D00, 0x0D09, 0x0D13, 0x0D1D, 0x0D27, 0x0D30, 0x0D3A,
0x0D44, 0x0D4D, 0x0D57, 0x0D61, 0x0D6A, 0x0D74, 0x0D7D, 0x0D87,
0x0D90, 0x0D99, 0x0DA3, 0x0DAC, 0x0DB6, 0x0DBF, 0x0DC8, 0x0DD1,
0x0DDB, 0x0DE4, 0x0DED, 0x0DF6, 0x0E00, 0x0E09, 0x0E12, 0x0E1B,
0x0E24, 0x0E2D, 0x0E36, 0x0E3F, 0x0E48, 0x0E51, 0x0E5A, 0x0E63,
0x0E6C, 0x0E74, 0x0E7D, 0x0E86, 0x0E8F, 0x0E98, 0x0EA0, 0x0EA9,
0x0EB2, 0x0EBB, 0x0EC3, 0x0ECC, 0x0ED5, 0x0EDD, 0x0EE6, 0x0EEE,
0x0EF7, 0x0F00, 0x0F08, 0x0F11, 0x0F19, 0x0F21, 0x0F2A, 0x0F32,
0x0F3B, 0x0F43, 0x0F4C, 0x0F54, 0x0F5C, 0x0F65, 0x0F6D, 0x0F75,
0x0F7D, 0x0F86, 0x0F8E, 0x0F96, 0x0F9E, 0x0FA7, 0x0FAF, 0x0FB7,
0x0FBF, 0x0FC7, 0x0FCF, 0x0FD7, 0x0FDF, 0x0FE7, 0x0FEF, 0x0FF7,
0x1000
};
inline uint32 fp_sqroot(uint32 x) {
int bit;
#if defined(__arm__)
__asm__ ("clz %0, %1\nrsb %0, %0, #31\n" : "=r"(bit) : "r" (x));
#elif defined(__i386__)
__asm__("bsrl %1, %0" : "=r" (bit) : "r" (x));
#else
unsigned int mask = 0x40000000;
bit = 30;
while (bit >= 0) {
if (x & mask)
break;
mask = (mask >> 1 | mask >> 2);
bit -= 2;
}
#endif
bit -= 6 + (bit & 1);
return inv_sqrt_tbl[x >> bit] << (bit >> 1);
}
inline uint32 circleSqrt(int x) {
return (x > 255 ? fp_sqroot(x) : inv_sqrt_tbl[x]) ^ 0xFF;
}
/** HELPER MACROS for BESENHALM's circle drawing algorithm **/
#define __BE_ALGORITHM() { \
if (f >= 0) { \
y--; \
@ -111,7 +175,7 @@
blendPixelPtr(ptr4 + (y) + (px), color, a); \
}
#define __WU_ALGORITHM() { \
/*#define __WU_ALGORITHM() { \
oldT = T; \
T = fp_sqroot(rsq - ((y * y) << 16)) ^ 0xFFFF; \
py += p; \
@ -120,6 +184,16 @@
} \
a2 = (T >> 8); \
a1 = ~a2; \
} */
// optimized Wu's algorithm
#define __WU_ALGORITHM() {\
py += p; \
oldT = T; \
T = circleSqrt(rsq - (y * y)); \
a2 = T; \
a1 = ~T; \
if (T < oldT) { x--; px -= p; } \
}
@ -138,30 +212,6 @@ VectorRenderer *createRenderer(int mode) {
return 0;
}
}
/** Fixed point SQUARE ROOT **/
inline uint32 fp_sqroot(uint32 x) {
register uint32 root, remHI, remLO, testDIV, count;
root = 0;
remHI = 0;
remLO = x;
count = 23;
do {
remHI = (remHI << 2) | (remLO >> 30);
remLO <<= 2;
root <<= 1;
testDIV = (root << 1) + 1;
if (remHI >= testDIV) {
remHI -= testDIV;
root++;
}
} while (count--);
return root;
}
template <typename PixelType, typename PixelFormat>
void VectorRendererSpec<PixelType, PixelFormat>::
@ -1512,7 +1562,7 @@ drawRoundedSquareAlg(int x1, int y1, int r, int w, int h, PixelType color, Vecto
x = r; y = 0; T = 0;
px = p * x; py = 0;
while (x > y++) {
while (x > 1 + y++) {
__WU_ALGORITHM();
colorFill(ptr_tl - x - py, ptr_tr + x - py, color);