Turned two vars of the HQ2x/HQ3x ASM implementation into global vars, to make it possible to adjust them for 555 vs. 565 mode (555 mode is still a bit buggy, due to the interpolation code they use)
svn-id: r36046
This commit is contained in:
parent
e5feb689df
commit
16e7a7cd30
4 changed files with 128 additions and 74 deletions
|
@ -54,10 +54,15 @@ extern "C" {
|
||||||
#if !defined(_WIN32) && !defined(MACOSX) && !defined(__OS2__)
|
#if !defined(_WIN32) && !defined(MACOSX) && !defined(__OS2__)
|
||||||
#define RGBtoYUV _RGBtoYUV
|
#define RGBtoYUV _RGBtoYUV
|
||||||
#define LUT16to32 _LUT16to32
|
#define LUT16to32 _LUT16to32
|
||||||
|
#define hqx_highbits _hqx_highbits
|
||||||
|
#define hqx_lowbits _hqx_lowbits
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
uint32 hqx_highbits = 0xF7DEF7DE;
|
||||||
|
uint32 hqx_lowbits = 0x0821;
|
||||||
|
|
||||||
// FIXME/TODO: The following two tables suck up 512 KB. This is bad.
|
// FIXME/TODO: The following two tables suck up 512 KB. This is bad.
|
||||||
// In addition we never free them...
|
// In addition we never free them...
|
||||||
//
|
//
|
||||||
|
@ -114,11 +119,25 @@ void InitLUT(Graphics::PixelFormat format) {
|
||||||
|
|
||||||
void InitScalers(uint32 BitFormat) {
|
void InitScalers(uint32 BitFormat) {
|
||||||
gBitFormat = BitFormat;
|
gBitFormat = BitFormat;
|
||||||
|
|
||||||
#ifndef DISABLE_HQ_SCALERS
|
#ifndef DISABLE_HQ_SCALERS
|
||||||
if (gBitFormat == 555)
|
#undef highBits;
|
||||||
|
#undef lowBits;
|
||||||
|
|
||||||
|
if (gBitFormat == 555) {
|
||||||
InitLUT(Graphics::createPixelFormat<555>());
|
InitLUT(Graphics::createPixelFormat<555>());
|
||||||
if (gBitFormat == 565)
|
#ifdef USE_NASM
|
||||||
|
hqx_highbits = Graphics::ColorMasks<555>::highBits;
|
||||||
|
hqx_lowbits = Graphics::ColorMasks<555>::lowBits & 0xFFFF;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
if (gBitFormat == 565) {
|
||||||
InitLUT(Graphics::createPixelFormat<565>());
|
InitLUT(Graphics::createPixelFormat<565>());
|
||||||
|
#ifdef USE_NASM
|
||||||
|
hqx_highbits = Graphics::ColorMasks<565>::highBits;
|
||||||
|
hqx_lowbits = Graphics::ColorMasks<565>::lowBits & 0xFFFF;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,8 @@ GLOBAL _hq2x_16
|
||||||
|
|
||||||
EXTERN _LUT16to32
|
EXTERN _LUT16to32
|
||||||
EXTERN _RGBtoYUV
|
EXTERN _RGBtoYUV
|
||||||
|
EXTERN _hqx_highbits
|
||||||
|
EXTERN _hqx_lowbits
|
||||||
|
|
||||||
SECTION .bss
|
SECTION .bss
|
||||||
linesleft resd 1
|
linesleft resd 1
|
||||||
|
@ -47,10 +49,6 @@ const5 dd 0x00050005,0x00000005
|
||||||
const6 dd 0x00060006,0x00000006
|
const6 dd 0x00060006,0x00000006
|
||||||
const14 dd 0x000E000E,0x0000000E
|
const14 dd 0x000E000E,0x0000000E
|
||||||
threshold dd 0x00300706,0x00000000
|
threshold dd 0x00300706,0x00000000
|
||||||
; FIXME: zerlowbits assumes 565 mode.
|
|
||||||
; Also, in the code, the constant 0x0821 is used which also assumes 565 mode
|
|
||||||
highbits dd 0xF7DEF7DE
|
|
||||||
lowbits dd 0x0821
|
|
||||||
moduloSrc dd 0
|
moduloSrc dd 0
|
||||||
moduloDst dd 0
|
moduloDst dd 0
|
||||||
|
|
||||||
|
@ -135,17 +133,14 @@ SECTION .text
|
||||||
%macro Interp1 3
|
%macro Interp1 3
|
||||||
mov edx,%2
|
mov edx,%2
|
||||||
mov ecx,%3
|
mov ecx,%3
|
||||||
cmp edx,ecx
|
and edx,[_hqx_highbits]
|
||||||
je %%fin
|
and ecx,[_hqx_highbits]
|
||||||
and edx,[highbits]
|
|
||||||
and ecx,[highbits]
|
|
||||||
add ecx,edx
|
add ecx,edx
|
||||||
shr ecx,1
|
shr ecx,1
|
||||||
add ecx,[lowbits]
|
add ecx,[_hqx_lowbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
%%fin:
|
|
||||||
mov %1,dx
|
mov %1,dx
|
||||||
%endmacro
|
%endmacro
|
||||||
|
|
||||||
|
@ -154,19 +149,15 @@ SECTION .text
|
||||||
%macro Interp2 4
|
%macro Interp2 4
|
||||||
mov edx,%3
|
mov edx,%3
|
||||||
mov ecx,%4
|
mov ecx,%4
|
||||||
cmp edx,ecx
|
and edx,[_hqx_highbits]
|
||||||
je %%fin1
|
and ecx,[_hqx_highbits]
|
||||||
and edx,[highbits]
|
|
||||||
and ecx,[highbits]
|
|
||||||
add ecx,edx
|
add ecx,edx
|
||||||
shr ecx,1
|
shr ecx,1
|
||||||
add ecx,[lowbits]
|
add ecx,[_hqx_lowbits]
|
||||||
%%fin1:
|
%%fin1:
|
||||||
mov edx,%2
|
mov edx,%2
|
||||||
cmp edx,ecx
|
and ecx,[_hqx_highbits]
|
||||||
je %%fin2
|
and edx,[_hqx_highbits]
|
||||||
and ecx,[highbits]
|
|
||||||
and edx,[highbits]
|
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
%%fin2:
|
%%fin2:
|
||||||
|
@ -178,10 +169,8 @@ SECTION .text
|
||||||
%macro Interp5 3
|
%macro Interp5 3
|
||||||
mov edx,%2
|
mov edx,%2
|
||||||
mov ecx,%3
|
mov ecx,%3
|
||||||
cmp edx,ecx
|
and edx,[_hqx_highbits]
|
||||||
je %%fin
|
and ecx,[_hqx_highbits]
|
||||||
and edx,[highbits]
|
|
||||||
and ecx,[highbits]
|
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
%%fin:
|
%%fin:
|
||||||
|
@ -1729,12 +1718,12 @@ _hq2x_16:
|
||||||
shl eax,16
|
shl eax,16
|
||||||
or eax,edx
|
or eax,edx
|
||||||
mov ecx,[w2]
|
mov ecx,[w2]
|
||||||
and edx,[highbits]
|
and edx,[_hqx_highbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add ecx,edx
|
add ecx,edx
|
||||||
shr ecx,1
|
shr ecx,1
|
||||||
add ecx,[lowbits]
|
add ecx,[_hqx_lowbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
mov ecx,edx
|
mov ecx,edx
|
||||||
|
@ -1746,12 +1735,12 @@ _hq2x_16:
|
||||||
..@cross2:
|
..@cross2:
|
||||||
shl eax,16
|
shl eax,16
|
||||||
mov ecx,[w4]
|
mov ecx,[w4]
|
||||||
and edx,[highbits]
|
and edx,[_hqx_highbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add ecx,edx
|
add ecx,edx
|
||||||
shr ecx,1
|
shr ecx,1
|
||||||
add ecx,[lowbits]
|
add ecx,[_hqx_lowbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
or eax,edx
|
or eax,edx
|
||||||
|
@ -1760,12 +1749,12 @@ _hq2x_16:
|
||||||
jmp .loopx_end
|
jmp .loopx_end
|
||||||
..@cross4:
|
..@cross4:
|
||||||
mov ecx,[w6]
|
mov ecx,[w6]
|
||||||
and edx,[highbits]
|
and edx,[_hqx_highbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add ecx,edx
|
add ecx,edx
|
||||||
shr ecx,1
|
shr ecx,1
|
||||||
add ecx,[lowbits]
|
add ecx,[_hqx_lowbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
shl edx,16
|
shl edx,16
|
||||||
|
@ -1778,12 +1767,12 @@ _hq2x_16:
|
||||||
shl eax,16
|
shl eax,16
|
||||||
or eax,edx
|
or eax,edx
|
||||||
mov ecx,[w8]
|
mov ecx,[w8]
|
||||||
and edx,[highbits]
|
and edx,[_hqx_highbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add ecx,edx
|
add ecx,edx
|
||||||
shr ecx,1
|
shr ecx,1
|
||||||
add ecx,[lowbits]
|
add ecx,[_hqx_lowbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
mov ecx,edx
|
mov ecx,edx
|
||||||
|
|
|
@ -22,6 +22,8 @@ GLOBAL _hq3x_16
|
||||||
|
|
||||||
EXTERN _LUT16to32
|
EXTERN _LUT16to32
|
||||||
EXTERN _RGBtoYUV
|
EXTERN _RGBtoYUV
|
||||||
|
EXTERN _hqx_highbits
|
||||||
|
EXTERN _hqx_lowbits
|
||||||
|
|
||||||
SECTION .bss
|
SECTION .bss
|
||||||
linesleft resd 1
|
linesleft resd 1
|
||||||
|
@ -44,10 +46,6 @@ SECTION .data
|
||||||
reg_blank dd 0,0
|
reg_blank dd 0,0
|
||||||
const7 dd 0x00070007,0x00000007
|
const7 dd 0x00070007,0x00000007
|
||||||
threshold dd 0x00300706,0x00000000
|
threshold dd 0x00300706,0x00000000
|
||||||
; FIXME: zerlowbits assumes 565 mode.
|
|
||||||
; Also, in the code, the constant 0x0821 is used which also assumes 565 mode
|
|
||||||
highbits dd 0xF7DEF7DE
|
|
||||||
lowbits dd 0x0821
|
|
||||||
moduloSrc dd 0
|
moduloSrc dd 0
|
||||||
moduloDst dd 0
|
moduloDst dd 0
|
||||||
|
|
||||||
|
@ -134,12 +132,12 @@ SECTION .text
|
||||||
mov ecx,%3
|
mov ecx,%3
|
||||||
cmp edx,ecx
|
cmp edx,ecx
|
||||||
je %%fin
|
je %%fin
|
||||||
and edx,[highbits]
|
and edx,[_hqx_highbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add ecx,edx
|
add ecx,edx
|
||||||
shr ecx,1
|
shr ecx,1
|
||||||
add ecx,[lowbits]
|
add ecx,[_hqx_lowbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
%%fin:
|
%%fin:
|
||||||
|
@ -153,17 +151,17 @@ SECTION .text
|
||||||
mov ecx,%4
|
mov ecx,%4
|
||||||
cmp edx,ecx
|
cmp edx,ecx
|
||||||
je %%fin1
|
je %%fin1
|
||||||
and edx,[highbits]
|
and edx,[_hqx_highbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add ecx,edx
|
add ecx,edx
|
||||||
shr ecx,1
|
shr ecx,1
|
||||||
add ecx,[lowbits]
|
add ecx,[_hqx_lowbits]
|
||||||
%%fin1:
|
%%fin1:
|
||||||
mov edx,%2
|
mov edx,%2
|
||||||
cmp edx,ecx
|
cmp edx,ecx
|
||||||
je %%fin2
|
je %%fin2
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
and edx,[highbits]
|
and edx,[_hqx_highbits]
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
%%fin2:
|
%%fin2:
|
||||||
|
@ -224,8 +222,8 @@ SECTION .text
|
||||||
mov ecx,%3
|
mov ecx,%3
|
||||||
cmp edx,ecx
|
cmp edx,ecx
|
||||||
je %%fin
|
je %%fin
|
||||||
and edx,[highbits]
|
and edx,[_hqx_highbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
%%fin:
|
%%fin:
|
||||||
|
@ -2303,12 +2301,12 @@ _hq3x_16:
|
||||||
shl eax,16
|
shl eax,16
|
||||||
or eax,edx
|
or eax,edx
|
||||||
mov ecx,[w2]
|
mov ecx,[w2]
|
||||||
and edx,[highbits]
|
and edx,[_hqx_highbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add ecx,edx
|
add ecx,edx
|
||||||
shr ecx,1
|
shr ecx,1
|
||||||
add ecx,[lowbits]
|
add ecx,[_hqx_lowbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
mov [edi],dx
|
mov [edi],dx
|
||||||
|
@ -2324,12 +2322,12 @@ _hq3x_16:
|
||||||
shl eax,16
|
shl eax,16
|
||||||
or eax,edx
|
or eax,edx
|
||||||
mov ecx,[w4]
|
mov ecx,[w4]
|
||||||
and edx,[highbits]
|
and edx,[_hqx_highbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add ecx,edx
|
add ecx,edx
|
||||||
shr ecx,1
|
shr ecx,1
|
||||||
add ecx,[lowbits]
|
add ecx,[_hqx_lowbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
mov [edi],dx
|
mov [edi],dx
|
||||||
|
@ -2344,12 +2342,12 @@ _hq3x_16:
|
||||||
shl eax,16
|
shl eax,16
|
||||||
or eax,edx
|
or eax,edx
|
||||||
mov ecx,[w6]
|
mov ecx,[w6]
|
||||||
and edx,[highbits]
|
and edx,[_hqx_highbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add ecx,edx
|
add ecx,edx
|
||||||
shr ecx,1
|
shr ecx,1
|
||||||
add ecx,[lowbits]
|
add ecx,[_hqx_lowbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
mov [edi],eax
|
mov [edi],eax
|
||||||
|
@ -2364,12 +2362,12 @@ _hq3x_16:
|
||||||
shl eax,16
|
shl eax,16
|
||||||
or eax,edx
|
or eax,edx
|
||||||
mov ecx,[w8]
|
mov ecx,[w8]
|
||||||
and edx,[highbits]
|
and edx,[_hqx_highbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add ecx,edx
|
add ecx,edx
|
||||||
shr ecx,1
|
shr ecx,1
|
||||||
add ecx,[lowbits]
|
add ecx,[_hqx_lowbits]
|
||||||
and ecx,[highbits]
|
and ecx,[_hqx_highbits]
|
||||||
add edx,ecx
|
add edx,ecx
|
||||||
shr edx,1
|
shr edx,1
|
||||||
mov [edi],eax
|
mov [edi],eax
|
||||||
|
|
|
@ -76,7 +76,6 @@ static inline uint32 interpolate32_1_1_1_1(uint32 A, uint32 B, uint32 C, uint32
|
||||||
return x + y;
|
return x + y;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interpolate two 16 bit pixels with the weights specified in the template
|
* Interpolate two 16 bit pixels with the weights specified in the template
|
||||||
* parameters. Used by the hq scaler family.
|
* parameters. Used by the hq scaler family.
|
||||||
|
@ -100,6 +99,55 @@ static inline uint16 interpolate16_3(uint16 p1, uint16 p2, uint16 p3) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template<int bitFormat>
|
||||||
|
static inline unsigned interpolate16_3_1(unsigned c1, unsigned c2) {
|
||||||
|
const unsigned lowbits=(((c1<<1)&(lowBits<<1))+(c1&qlowBits)+(c2&qlowBits))&qlowBits;
|
||||||
|
return ((c1*3+c2) - lowbits) >> 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int bitFormat>
|
||||||
|
static inline unsigned interpolate16_2_1_1(unsigned c1, unsigned c2, unsigned c3) {
|
||||||
|
c1<<=1;
|
||||||
|
const unsigned lowbits=((c1&(lowBits<<1))+(c2&qlowBits)+(c3&qlowBits))&qlowBits;
|
||||||
|
return ((c1+c2+c3) - lowbits) >> 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int bitFormat>
|
||||||
|
static inline unsigned interpolate16_1_1(unsigned c1, unsigned c2) {
|
||||||
|
return ( c1+c2 - ((c1^c2)&lowBits) ) >> 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int bitFormat>
|
||||||
|
static inline unsigned interpolate16_5_2_1(unsigned c1, unsigned c2, unsigned c3) {
|
||||||
|
c2<<=1;
|
||||||
|
const unsigned lowbits=( ((c1<<2)&(lowBits<<2))+(c1&0x1CE7)+(c2&0x18C6)+(c3&0x1CE7) ) & 0x1CE7;
|
||||||
|
return ((c1*5+c2+c3) - lowbits) >> 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int bitFormat>
|
||||||
|
static inline unsigned interpolate16_6_1_1(unsigned c1, unsigned c2, unsigned c3) {
|
||||||
|
const unsigned lowbits=(((((c1<<1)&(lowBits<<1))+(c1&qlowBits))<<1)+(c2&0x1CE7)+(c3&0x1CE7))&0x1CE7;
|
||||||
|
return ((c1*6+c2+c3) - lowbits) >> 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int bitFormat>
|
||||||
|
static inline unsigned interpolate16_2_3_3(unsigned c1, unsigned c2, unsigned c3) {
|
||||||
|
c1<<=1;
|
||||||
|
const unsigned rb=(c1&(redblueMask<<1))+((c2&redblueMask)+(c3&redblueMask))*3;
|
||||||
|
const unsigned g=(c1&(greenMask<<1))+((c2&greenMask)+(c3&greenMask))*3;
|
||||||
|
return ((rb&(redblueMask<<3))|(g&(greenMask<<8)))>>3;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<int bitFormat>
|
||||||
|
static inline unsigned interpolate16_14_1_1(unsigned c1, unsigned c2, unsigned c3) {
|
||||||
|
const unsigned rb=(c1&redblueMask)*14+(c2&redblueMask)+(c3&redblueMask);
|
||||||
|
const unsigned g=(c1&greenMask)*14+(c2&greenMask)+(c3&greenMask);
|
||||||
|
return ((rb&(redblueMask<<4))|(g&(greenMask<<4)))>>4;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Compare two YUV values (encoded 8-8-8) and check if they differ by more than
|
* Compare two YUV values (encoded 8-8-8) and check if they differ by more than
|
||||||
* a certain hard coded threshold. Used by the hq scaler family.
|
* a certain hard coded threshold. Used by the hq scaler family.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue