Updated SDL's YUV support, many thanks to Adrien Descamps
New functions get and set the YUV colorspace conversion mode: SDL_SetYUVConversionMode() SDL_GetYUVConversionMode() SDL_GetYUVConversionModeForResolution() SDL_ConvertPixels() converts between all supported RGB and YUV formats, with SSE acceleration for converting from planar YUV formats (YV12, NV12, etc) to common RGB/RGBA formats. Added a new test program, testyuv, to verify correctness and speed of YUV conversion functionality.
This commit is contained in:
parent
e7cc03e0bd
commit
145d2469ae
60 changed files with 8368 additions and 4310 deletions
|
@ -1,409 +0,0 @@
|
|||
/*
|
||||
Simple DirectMedia Layer
|
||||
Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
#include "../SDL_internal.h"
|
||||
|
||||
#include "SDL_yuv_mmx_c.h"
|
||||
|
||||
#ifdef USE_MMX_ASSEMBLY
|
||||
|
||||
#include "SDL_stdinc.h"
|
||||
|
||||
#include "mmx.h"
|
||||
|
||||
/* *INDENT-OFF* */
|
||||
|
||||
static mmx_t MMX_0080w = { .ud = {0x00800080, 0x00800080} };
|
||||
static mmx_t MMX_00FFw = { .ud = {0x00ff00ff, 0x00ff00ff} };
|
||||
static mmx_t MMX_FF00w = { .ud = {0xff00ff00, 0xff00ff00} };
|
||||
|
||||
static mmx_t MMX_Ycoeff = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} };
|
||||
|
||||
static mmx_t MMX_UbluRGB = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} };
|
||||
static mmx_t MMX_VredRGB = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} };
|
||||
static mmx_t MMX_UgrnRGB = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} };
|
||||
static mmx_t MMX_VgrnRGB = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} };
|
||||
|
||||
static mmx_t MMX_Ublu5x5 = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} };
|
||||
static mmx_t MMX_Vred5x5 = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} };
|
||||
static mmx_t MMX_Ugrn565 = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} };
|
||||
static mmx_t MMX_Vgrn565 = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} };
|
||||
|
||||
static mmx_t MMX_red565 = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} };
|
||||
static mmx_t MMX_grn565 = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} };
|
||||
|
||||
/**
|
||||
This MMX assembler is my first assembler/MMX program ever.
|
||||
Thus it maybe buggy.
|
||||
Send patches to:
|
||||
mvogt@rhrk.uni-kl.de
|
||||
|
||||
After it worked fine I have "obfuscated" the code a bit to have
|
||||
more parallism in the MMX units. This means I moved
|
||||
initilisation around and delayed other instruction.
|
||||
Performance measurement did not show that this brought any advantage
|
||||
but in theory it _should_ be faster this way.
|
||||
|
||||
The overall performanve gain to the C based dither was 30%-40%.
|
||||
The MMX routine calculates 256bit=8RGB values in each cycle
|
||||
(4 for row1 & 4 for row2)
|
||||
|
||||
The red/green/blue.. coefficents are taken from the mpeg_play
|
||||
player. They look nice, but I dont know if you can have
|
||||
better values, to avoid integer rounding errors.
|
||||
|
||||
|
||||
IMPORTANT:
|
||||
==========
|
||||
|
||||
It is a requirement that the cr/cb/lum are 8 byte aligned and
|
||||
the out are 16byte aligned or you will/may get segfaults
|
||||
|
||||
*/
|
||||
|
||||
void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
|
||||
unsigned char *lum, unsigned char *cr,
|
||||
unsigned char *cb, unsigned char *out,
|
||||
int rows, int cols, int mod )
|
||||
{
|
||||
Uint32 *row1;
|
||||
Uint32 *row2;
|
||||
|
||||
unsigned char* y = lum +cols*rows; /* Pointer to the end */
|
||||
int x = 0;
|
||||
row1 = (Uint32 *)out; /* 32 bit target */
|
||||
row2 = (Uint32 *)out+cols+mod; /* start of second row */
|
||||
mod = (mod+cols+mod)*4; /* increment for row1 in byte */
|
||||
|
||||
__asm__ __volatile__ (
|
||||
".align 8\n"
|
||||
"1:\n"
|
||||
|
||||
/* create Cr (result in mm1) */
|
||||
"movd (%0),%%mm1\n" /* 0 0 0 0 v3 v2 v1 v0 */
|
||||
"pxor %%mm7,%%mm7\n" /* 00 00 00 00 00 00 00 00 */
|
||||
"movd (%2), %%mm2\n" /* 0 0 0 0 l3 l2 l1 l0 */
|
||||
"punpcklbw %%mm7,%%mm1\n" /* 0 v3 0 v2 00 v1 00 v0 */
|
||||
"punpckldq %%mm1,%%mm1\n" /* 00 v1 00 v0 00 v1 00 v0 */
|
||||
"psubw %9,%%mm1\n" /* mm1-128:r1 r1 r0 r0 r1 r1 r0 r0 */
|
||||
|
||||
/* create Cr_g (result in mm0) */
|
||||
"movq %%mm1,%%mm0\n" /* r1 r1 r0 r0 r1 r1 r0 r0 */
|
||||
"pmullw %10,%%mm0\n" /* red*-46dec=0.7136*64 */
|
||||
"pmullw %11,%%mm1\n" /* red*89dec=1.4013*64 */
|
||||
"psraw $6, %%mm0\n" /* red=red/64 */
|
||||
"psraw $6, %%mm1\n" /* red=red/64 */
|
||||
|
||||
/* create L1 L2 (result in mm2,mm4) */
|
||||
/* L2=lum+cols */
|
||||
"movq (%2,%4),%%mm3\n" /* 0 0 0 0 L3 L2 L1 L0 */
|
||||
"punpckldq %%mm3,%%mm2\n" /* L3 L2 L1 L0 l3 l2 l1 l0 */
|
||||
"movq %%mm2,%%mm4\n" /* L3 L2 L1 L0 l3 l2 l1 l0 */
|
||||
"pand %12,%%mm2\n" /* L3 0 L1 0 l3 0 l1 0 */
|
||||
"pand %13,%%mm4\n" /* 0 L2 0 L0 0 l2 0 l0 */
|
||||
"psrlw $8,%%mm2\n" /* 0 L3 0 L1 0 l3 0 l1 */
|
||||
|
||||
/* create R (result in mm6) */
|
||||
"movq %%mm2,%%mm5\n" /* 0 L3 0 L1 0 l3 0 l1 */
|
||||
"movq %%mm4,%%mm6\n" /* 0 L2 0 L0 0 l2 0 l0 */
|
||||
"paddsw %%mm1, %%mm5\n" /* lum1+red:x R3 x R1 x r3 x r1 */
|
||||
"paddsw %%mm1, %%mm6\n" /* lum1+red:x R2 x R0 x r2 x r0 */
|
||||
"packuswb %%mm5,%%mm5\n" /* R3 R1 r3 r1 R3 R1 r3 r1 */
|
||||
"packuswb %%mm6,%%mm6\n" /* R2 R0 r2 r0 R2 R0 r2 r0 */
|
||||
"pxor %%mm7,%%mm7\n" /* 00 00 00 00 00 00 00 00 */
|
||||
"punpcklbw %%mm5,%%mm6\n" /* R3 R2 R1 R0 r3 r2 r1 r0 */
|
||||
|
||||
/* create Cb (result in mm1) */
|
||||
"movd (%1), %%mm1\n" /* 0 0 0 0 u3 u2 u1 u0 */
|
||||
"punpcklbw %%mm7,%%mm1\n" /* 0 u3 0 u2 00 u1 00 u0 */
|
||||
"punpckldq %%mm1,%%mm1\n" /* 00 u1 00 u0 00 u1 00 u0 */
|
||||
"psubw %9,%%mm1\n" /* mm1-128:u1 u1 u0 u0 u1 u1 u0 u0 */
|
||||
|
||||
/* create Cb_g (result in mm5) */
|
||||
"movq %%mm1,%%mm5\n" /* u1 u1 u0 u0 u1 u1 u0 u0 */
|
||||
"pmullw %14,%%mm5\n" /* blue*-109dec=1.7129*64 */
|
||||
"pmullw %15,%%mm1\n" /* blue*114dec=1.78125*64 */
|
||||
"psraw $6, %%mm5\n" /* blue=red/64 */
|
||||
"psraw $6, %%mm1\n" /* blue=blue/64 */
|
||||
|
||||
/* create G (result in mm7) */
|
||||
"movq %%mm2,%%mm3\n" /* 0 L3 0 L1 0 l3 0 l1 */
|
||||
"movq %%mm4,%%mm7\n" /* 0 L2 0 L0 0 l2 0 l1 */
|
||||
"paddsw %%mm5, %%mm3\n" /* lum1+Cb_g:x G3t x G1t x g3t x g1t */
|
||||
"paddsw %%mm5, %%mm7\n" /* lum1+Cb_g:x G2t x G0t x g2t x g0t */
|
||||
"paddsw %%mm0, %%mm3\n" /* lum1+Cr_g:x G3 x G1 x g3 x g1 */
|
||||
"paddsw %%mm0, %%mm7\n" /* lum1+blue:x G2 x G0 x g2 x g0 */
|
||||
"packuswb %%mm3,%%mm3\n" /* G3 G1 g3 g1 G3 G1 g3 g1 */
|
||||
"packuswb %%mm7,%%mm7\n" /* G2 G0 g2 g0 G2 G0 g2 g0 */
|
||||
"punpcklbw %%mm3,%%mm7\n" /* G3 G2 G1 G0 g3 g2 g1 g0 */
|
||||
|
||||
/* create B (result in mm5) */
|
||||
"movq %%mm2,%%mm3\n" /* 0 L3 0 L1 0 l3 0 l1 */
|
||||
"movq %%mm4,%%mm5\n" /* 0 L2 0 L0 0 l2 0 l1 */
|
||||
"paddsw %%mm1, %%mm3\n" /* lum1+blue:x B3 x B1 x b3 x b1 */
|
||||
"paddsw %%mm1, %%mm5\n" /* lum1+blue:x B2 x B0 x b2 x b0 */
|
||||
"packuswb %%mm3,%%mm3\n" /* B3 B1 b3 b1 B3 B1 b3 b1 */
|
||||
"packuswb %%mm5,%%mm5\n" /* B2 B0 b2 b0 B2 B0 b2 b0 */
|
||||
"punpcklbw %%mm3,%%mm5\n" /* B3 B2 B1 B0 b3 b2 b1 b0 */
|
||||
|
||||
/* fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb) */
|
||||
|
||||
"pxor %%mm2,%%mm2\n" /* 0 0 0 0 0 0 0 0 */
|
||||
"pxor %%mm4,%%mm4\n" /* 0 0 0 0 0 0 0 0 */
|
||||
"movq %%mm6,%%mm1\n" /* R3 R2 R1 R0 r3 r2 r1 r0 */
|
||||
"movq %%mm5,%%mm3\n" /* B3 B2 B1 B0 b3 b2 b1 b0 */
|
||||
|
||||
/* process lower lum */
|
||||
"punpcklbw %%mm4,%%mm1\n" /* 0 r3 0 r2 0 r1 0 r0 */
|
||||
"punpcklbw %%mm4,%%mm3\n" /* 0 b3 0 b2 0 b1 0 b0 */
|
||||
"movq %%mm1,%%mm2\n" /* 0 r3 0 r2 0 r1 0 r0 */
|
||||
"movq %%mm3,%%mm0\n" /* 0 b3 0 b2 0 b1 0 b0 */
|
||||
"punpcklwd %%mm1,%%mm3\n" /* 0 r1 0 b1 0 r0 0 b0 */
|
||||
"punpckhwd %%mm2,%%mm0\n" /* 0 r3 0 b3 0 r2 0 b2 */
|
||||
|
||||
"pxor %%mm2,%%mm2\n" /* 0 0 0 0 0 0 0 0 */
|
||||
"movq %%mm7,%%mm1\n" /* G3 G2 G1 G0 g3 g2 g1 g0 */
|
||||
"punpcklbw %%mm1,%%mm2\n" /* g3 0 g2 0 g1 0 g0 0 */
|
||||
"punpcklwd %%mm4,%%mm2\n" /* 0 0 g1 0 0 0 g0 0 */
|
||||
"por %%mm3, %%mm2\n" /* 0 r1 g1 b1 0 r0 g0 b0 */
|
||||
"movq %%mm2,(%3)\n" /* wrote out ! row1 */
|
||||
|
||||
"pxor %%mm2,%%mm2\n" /* 0 0 0 0 0 0 0 0 */
|
||||
"punpcklbw %%mm1,%%mm4\n" /* g3 0 g2 0 g1 0 g0 0 */
|
||||
"punpckhwd %%mm2,%%mm4\n" /* 0 0 g3 0 0 0 g2 0 */
|
||||
"por %%mm0, %%mm4\n" /* 0 r3 g3 b3 0 r2 g2 b2 */
|
||||
"movq %%mm4,8(%3)\n" /* wrote out ! row1 */
|
||||
|
||||
/* fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb) */
|
||||
/* this can be done "destructive" */
|
||||
"pxor %%mm2,%%mm2\n" /* 0 0 0 0 0 0 0 0 */
|
||||
"punpckhbw %%mm2,%%mm6\n" /* 0 R3 0 R2 0 R1 0 R0 */
|
||||
"punpckhbw %%mm1,%%mm5\n" /* G3 B3 G2 B2 G1 B1 G0 B0 */
|
||||
"movq %%mm5,%%mm1\n" /* G3 B3 G2 B2 G1 B1 G0 B0 */
|
||||
"punpcklwd %%mm6,%%mm1\n" /* 0 R1 G1 B1 0 R0 G0 B0 */
|
||||
"movq %%mm1,(%5)\n" /* wrote out ! row2 */
|
||||
"punpckhwd %%mm6,%%mm5\n" /* 0 R3 G3 B3 0 R2 G2 B2 */
|
||||
"movq %%mm5,8(%5)\n" /* wrote out ! row2 */
|
||||
|
||||
"addl $4,%2\n" /* lum+4 */
|
||||
"leal 16(%3),%3\n" /* row1+16 */
|
||||
"leal 16(%5),%5\n" /* row2+16 */
|
||||
"addl $2,%0\n" /* cr+2 */
|
||||
"addl $2,%1\n" /* cb+2 */
|
||||
|
||||
"addl $4,%6\n" /* x+4 */
|
||||
"cmpl %4,%6\n"
|
||||
|
||||
"jl 1b\n"
|
||||
"addl %4,%2\n" /* lum += cols */
|
||||
"addl %8,%3\n" /* row1+= mod */
|
||||
"addl %8,%5\n" /* row2+= mod */
|
||||
"movl $0,%6\n" /* x=0 */
|
||||
"cmpl %7,%2\n"
|
||||
"jl 1b\n"
|
||||
|
||||
"emms\n" /* reset MMX registers. */
|
||||
:
|
||||
: "r" (cr), "r"(cb),"r"(lum),
|
||||
"r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
|
||||
"m"(MMX_0080w),"m"(MMX_VgrnRGB),"m"(MMX_VredRGB),
|
||||
"m"(MMX_FF00w),"m"(MMX_00FFw),"m"(MMX_UgrnRGB),
|
||||
"m"(MMX_UbluRGB)
|
||||
);
|
||||
}
|
||||
|
||||
void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
|
||||
unsigned char *lum, unsigned char *cr,
|
||||
unsigned char *cb, unsigned char *out,
|
||||
int rows, int cols, int mod )
|
||||
{
|
||||
Uint16 *row1;
|
||||
Uint16 *row2;
|
||||
|
||||
unsigned char* y = lum +cols*rows; /* Pointer to the end */
|
||||
int x = 0;
|
||||
row1 = (Uint16 *)out; /* 16 bit target */
|
||||
row2 = (Uint16 *)out+cols+mod; /* start of second row */
|
||||
mod = (mod+cols+mod)*2; /* increment for row1 in byte */
|
||||
|
||||
__asm__ __volatile__(
|
||||
".align 8\n"
|
||||
"1:\n"
|
||||
|
||||
"movd (%1), %%mm0\n" /* 4 Cb 0 0 0 0 u3 u2 u1 u0 */
|
||||
"pxor %%mm7, %%mm7\n"
|
||||
"movd (%0), %%mm1\n" /* 4 Cr 0 0 0 0 v3 v2 v1 v0 */
|
||||
|
||||
"punpcklbw %%mm7, %%mm0\n" /* 4 W cb 0 u3 0 u2 0 u1 0 u0 */
|
||||
"punpcklbw %%mm7, %%mm1\n" /* 4 W cr 0 v3 0 v2 0 v1 0 v0 */
|
||||
"psubw %9, %%mm0\n"
|
||||
"psubw %9, %%mm1\n"
|
||||
"movq %%mm0, %%mm2\n" /* Cb 0 u3 0 u2 0 u1 0 u0 */
|
||||
"movq %%mm1, %%mm3\n" /* Cr */
|
||||
"pmullw %10, %%mm2\n" /* Cb2green 0 R3 0 R2 0 R1 0 R0 */
|
||||
"movq (%2), %%mm6\n" /* L1 l7 L6 L5 L4 L3 L2 L1 L0 */
|
||||
"pmullw %11, %%mm0\n" /* Cb2blue */
|
||||
"pand %12, %%mm6\n" /* L1 00 L6 00 L4 00 L2 00 L0 */
|
||||
"pmullw %13, %%mm3\n" /* Cr2green */
|
||||
"movq (%2), %%mm7\n" /* L2 */
|
||||
"pmullw %14, %%mm1\n" /* Cr2red */
|
||||
"psrlw $8, %%mm7\n" /* L2 00 L7 00 L5 00 L3 00 L1 */
|
||||
"pmullw %15, %%mm6\n" /* lum1 */
|
||||
"paddw %%mm3, %%mm2\n" /* Cb2green + Cr2green == green */
|
||||
"pmullw %15, %%mm7\n" /* lum2 */
|
||||
|
||||
"movq %%mm6, %%mm4\n" /* lum1 */
|
||||
"paddw %%mm0, %%mm6\n" /* lum1 +blue 00 B6 00 B4 00 B2 00 B0 */
|
||||
"movq %%mm4, %%mm5\n" /* lum1 */
|
||||
"paddw %%mm1, %%mm4\n" /* lum1 +red 00 R6 00 R4 00 R2 00 R0 */
|
||||
"paddw %%mm2, %%mm5\n" /* lum1 +green 00 G6 00 G4 00 G2 00 G0 */
|
||||
"psraw $6, %%mm4\n" /* R1 0 .. 64 */
|
||||
"movq %%mm7, %%mm3\n" /* lum2 00 L7 00 L5 00 L3 00 L1 */
|
||||
"psraw $6, %%mm5\n" /* G1 - .. + */
|
||||
"paddw %%mm0, %%mm7\n" /* Lum2 +blue 00 B7 00 B5 00 B3 00 B1 */
|
||||
"psraw $6, %%mm6\n" /* B1 0 .. 64 */
|
||||
"packuswb %%mm4, %%mm4\n" /* R1 R1 */
|
||||
"packuswb %%mm5, %%mm5\n" /* G1 G1 */
|
||||
"packuswb %%mm6, %%mm6\n" /* B1 B1 */
|
||||
"punpcklbw %%mm4, %%mm4\n"
|
||||
"punpcklbw %%mm5, %%mm5\n"
|
||||
|
||||
"pand %16, %%mm4\n"
|
||||
"psllw $3, %%mm5\n" /* GREEN 1 */
|
||||
"punpcklbw %%mm6, %%mm6\n"
|
||||
"pand %17, %%mm5\n"
|
||||
"pand %16, %%mm6\n"
|
||||
"por %%mm5, %%mm4\n" /* */
|
||||
"psrlw $11, %%mm6\n" /* BLUE 1 */
|
||||
"movq %%mm3, %%mm5\n" /* lum2 */
|
||||
"paddw %%mm1, %%mm3\n" /* lum2 +red 00 R7 00 R5 00 R3 00 R1 */
|
||||
"paddw %%mm2, %%mm5\n" /* lum2 +green 00 G7 00 G5 00 G3 00 G1 */
|
||||
"psraw $6, %%mm3\n" /* R2 */
|
||||
"por %%mm6, %%mm4\n" /* MM4 */
|
||||
"psraw $6, %%mm5\n" /* G2 */
|
||||
"movq (%2, %4), %%mm6\n" /* L3 load lum2 */
|
||||
"psraw $6, %%mm7\n"
|
||||
"packuswb %%mm3, %%mm3\n"
|
||||
"packuswb %%mm5, %%mm5\n"
|
||||
"packuswb %%mm7, %%mm7\n"
|
||||
"pand %12, %%mm6\n" /* L3 */
|
||||
"punpcklbw %%mm3, %%mm3\n"
|
||||
"punpcklbw %%mm5, %%mm5\n"
|
||||
"pmullw %15, %%mm6\n" /* lum3 */
|
||||
"punpcklbw %%mm7, %%mm7\n"
|
||||
"psllw $3, %%mm5\n" /* GREEN 2 */
|
||||
"pand %16, %%mm7\n"
|
||||
"pand %16, %%mm3\n"
|
||||
"psrlw $11, %%mm7\n" /* BLUE 2 */
|
||||
"pand %17, %%mm5\n"
|
||||
"por %%mm7, %%mm3\n"
|
||||
"movq (%2,%4), %%mm7\n" /* L4 load lum2 */
|
||||
"por %%mm5, %%mm3\n"
|
||||
"psrlw $8, %%mm7\n" /* L4 */
|
||||
"movq %%mm4, %%mm5\n"
|
||||
"punpcklwd %%mm3, %%mm4\n"
|
||||
"pmullw %15, %%mm7\n" /* lum4 */
|
||||
"punpckhwd %%mm3, %%mm5\n"
|
||||
|
||||
"movq %%mm4, (%3)\n" /* write row1 */
|
||||
"movq %%mm5, 8(%3)\n" /* write row1 */
|
||||
|
||||
"movq %%mm6, %%mm4\n" /* Lum3 */
|
||||
"paddw %%mm0, %%mm6\n" /* Lum3 +blue */
|
||||
|
||||
"movq %%mm4, %%mm5\n" /* Lum3 */
|
||||
"paddw %%mm1, %%mm4\n" /* Lum3 +red */
|
||||
"paddw %%mm2, %%mm5\n" /* Lum3 +green */
|
||||
"psraw $6, %%mm4\n"
|
||||
"movq %%mm7, %%mm3\n" /* Lum4 */
|
||||
"psraw $6, %%mm5\n"
|
||||
"paddw %%mm0, %%mm7\n" /* Lum4 +blue */
|
||||
"psraw $6, %%mm6\n" /* Lum3 +blue */
|
||||
"movq %%mm3, %%mm0\n" /* Lum4 */
|
||||
"packuswb %%mm4, %%mm4\n"
|
||||
"paddw %%mm1, %%mm3\n" /* Lum4 +red */
|
||||
"packuswb %%mm5, %%mm5\n"
|
||||
"paddw %%mm2, %%mm0\n" /* Lum4 +green */
|
||||
"packuswb %%mm6, %%mm6\n"
|
||||
"punpcklbw %%mm4, %%mm4\n"
|
||||
"punpcklbw %%mm5, %%mm5\n"
|
||||
"punpcklbw %%mm6, %%mm6\n"
|
||||
"psllw $3, %%mm5\n" /* GREEN 3 */
|
||||
"pand %16, %%mm4\n"
|
||||
"psraw $6, %%mm3\n" /* psr 6 */
|
||||
"psraw $6, %%mm0\n"
|
||||
"pand %16, %%mm6\n" /* BLUE */
|
||||
"pand %17, %%mm5\n"
|
||||
"psrlw $11, %%mm6\n" /* BLUE 3 */
|
||||
"por %%mm5, %%mm4\n"
|
||||
"psraw $6, %%mm7\n"
|
||||
"por %%mm6, %%mm4\n"
|
||||
"packuswb %%mm3, %%mm3\n"
|
||||
"packuswb %%mm0, %%mm0\n"
|
||||
"packuswb %%mm7, %%mm7\n"
|
||||
"punpcklbw %%mm3, %%mm3\n"
|
||||
"punpcklbw %%mm0, %%mm0\n"
|
||||
"punpcklbw %%mm7, %%mm7\n"
|
||||
"pand %16, %%mm3\n"
|
||||
"pand %16, %%mm7\n" /* BLUE */
|
||||
"psllw $3, %%mm0\n" /* GREEN 4 */
|
||||
"psrlw $11, %%mm7\n"
|
||||
"pand %17, %%mm0\n"
|
||||
"por %%mm7, %%mm3\n"
|
||||
"por %%mm0, %%mm3\n"
|
||||
|
||||
"movq %%mm4, %%mm5\n"
|
||||
|
||||
"punpcklwd %%mm3, %%mm4\n"
|
||||
"punpckhwd %%mm3, %%mm5\n"
|
||||
|
||||
"movq %%mm4, (%5)\n"
|
||||
"movq %%mm5, 8(%5)\n"
|
||||
|
||||
"addl $8, %6\n"
|
||||
"addl $8, %2\n"
|
||||
"addl $4, %0\n"
|
||||
"addl $4, %1\n"
|
||||
"cmpl %4, %6\n"
|
||||
"leal 16(%3), %3\n"
|
||||
"leal 16(%5),%5\n" /* row2+16 */
|
||||
|
||||
"jl 1b\n"
|
||||
"addl %4, %2\n" /* lum += cols */
|
||||
"addl %8, %3\n" /* row1+= mod */
|
||||
"addl %8, %5\n" /* row2+= mod */
|
||||
"movl $0, %6\n" /* x=0 */
|
||||
"cmpl %7, %2\n"
|
||||
"jl 1b\n"
|
||||
"emms\n"
|
||||
:
|
||||
: "r" (cr), "r"(cb),"r"(lum),
|
||||
"r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
|
||||
"m"(MMX_0080w),"m"(MMX_Ugrn565),"m"(MMX_Ublu5x5),
|
||||
"m"(MMX_00FFw),"m"(MMX_Vgrn565),"m"(MMX_Vred5x5),
|
||||
"m"(MMX_Ycoeff),"m"(MMX_red565),"m"(MMX_grn565)
|
||||
);
|
||||
}
|
||||
|
||||
/* *INDENT-ON* */
|
||||
|
||||
#endif /* USE_MMX_ASSEMBLY */
|
||||
|
||||
/* vi: set ts=4 sw=4 expandtab: */
|
File diff suppressed because it is too large
Load diff
|
@ -30,16 +30,6 @@ struct SDL_SW_YUVTexture
|
|||
Uint32 target_format;
|
||||
int w, h;
|
||||
Uint8 *pixels;
|
||||
int *colortab;
|
||||
Uint32 *rgb_2_pix;
|
||||
void (*Display1X) (int *colortab, Uint32 * rgb_2_pix,
|
||||
unsigned char *lum, unsigned char *cr,
|
||||
unsigned char *cb, unsigned char *out,
|
||||
int rows, int cols, int mod);
|
||||
void (*Display2X) (int *colortab, Uint32 * rgb_2_pix,
|
||||
unsigned char *lum, unsigned char *cr,
|
||||
unsigned char *cb, unsigned char *out,
|
||||
int rows, int cols, int mod);
|
||||
|
||||
/* These are just so we don't have to allocate them separately */
|
||||
Uint16 pitches[3];
|
||||
|
|
|
@ -39,85 +39,7 @@
|
|||
#include <d3d9.h>
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef ASSEMBLE_SHADER
|
||||
#pragma comment(lib, "d3dx9.lib")
|
||||
|
||||
/**************************************************************************
|
||||
* ID3DXBuffer:
|
||||
* ------------
|
||||
* The buffer object is used by D3DX to return arbitrary size data.
|
||||
*
|
||||
* GetBufferPointer -
|
||||
* Returns a pointer to the beginning of the buffer.
|
||||
*
|
||||
* GetBufferSize -
|
||||
* Returns the size of the buffer, in bytes.
|
||||
**************************************************************************/
|
||||
|
||||
typedef interface ID3DXBuffer ID3DXBuffer;
|
||||
typedef interface ID3DXBuffer *LPD3DXBUFFER;
|
||||
|
||||
/* {8BA5FB08-5195-40e2-AC58-0D989C3A0102} */
|
||||
DEFINE_GUID(IID_ID3DXBuffer,
|
||||
0x8ba5fb08, 0x5195, 0x40e2, 0xac, 0x58, 0xd, 0x98, 0x9c, 0x3a, 0x1, 0x2);
|
||||
|
||||
#undef INTERFACE
|
||||
#define INTERFACE ID3DXBuffer
|
||||
|
||||
typedef interface ID3DXBuffer {
|
||||
const struct ID3DXBufferVtbl FAR* lpVtbl;
|
||||
} ID3DXBuffer;
|
||||
typedef const struct ID3DXBufferVtbl ID3DXBufferVtbl;
|
||||
const struct ID3DXBufferVtbl
|
||||
{
|
||||
/* IUnknown */
|
||||
STDMETHOD(QueryInterface)(THIS_ REFIID iid, LPVOID *ppv) PURE;
|
||||
STDMETHOD_(ULONG, AddRef)(THIS) PURE;
|
||||
STDMETHOD_(ULONG, Release)(THIS) PURE;
|
||||
|
||||
/* ID3DXBuffer */
|
||||
STDMETHOD_(LPVOID, GetBufferPointer)(THIS) PURE;
|
||||
STDMETHOD_(DWORD, GetBufferSize)(THIS) PURE;
|
||||
};
|
||||
|
||||
HRESULT WINAPI
|
||||
D3DXAssembleShader(
|
||||
LPCSTR pSrcData,
|
||||
UINT SrcDataLen,
|
||||
CONST LPVOID* pDefines,
|
||||
LPVOID pInclude,
|
||||
DWORD Flags,
|
||||
LPD3DXBUFFER* ppShader,
|
||||
LPD3DXBUFFER* ppErrorMsgs);
|
||||
|
||||
static void PrintShaderData(LPDWORD shader_data, DWORD shader_size)
|
||||
{
|
||||
OutputDebugStringA("const DWORD shader_data[] = {\n\t");
|
||||
{
|
||||
SDL_bool newline = SDL_FALSE;
|
||||
unsigned i;
|
||||
for (i = 0; i < shader_size / sizeof(DWORD); ++i) {
|
||||
char dword[11];
|
||||
if (i > 0) {
|
||||
if ((i%6) == 0) {
|
||||
newline = SDL_TRUE;
|
||||
}
|
||||
if (newline) {
|
||||
OutputDebugStringA(",\n ");
|
||||
newline = SDL_FALSE;
|
||||
} else {
|
||||
OutputDebugStringA(", ");
|
||||
}
|
||||
}
|
||||
SDL_snprintf(dword, sizeof(dword), "0x%8.8x", shader_data[i]);
|
||||
OutputDebugStringA(dword);
|
||||
}
|
||||
OutputDebugStringA("\n};\n");
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* ASSEMBLE_SHADER */
|
||||
#include "SDL_shaders_d3d.h"
|
||||
|
||||
|
||||
/* Direct3D renderer implementation */
|
||||
|
@ -188,7 +110,7 @@ typedef struct
|
|||
IDirect3DSurface9 *defaultRenderTarget;
|
||||
IDirect3DSurface9 *currentRenderTarget;
|
||||
void* d3dxDLL;
|
||||
LPDIRECT3DPIXELSHADER9 ps_yuv;
|
||||
LPDIRECT3DPIXELSHADER9 shaders[NUM_SHADERS];
|
||||
} D3D_RenderData;
|
||||
|
||||
typedef struct
|
||||
|
@ -197,6 +119,7 @@ typedef struct
|
|||
int w, h;
|
||||
DWORD usage;
|
||||
Uint32 format;
|
||||
D3DFORMAT d3dfmt;
|
||||
IDirect3DTexture9 *texture;
|
||||
IDirect3DTexture9 *staging;
|
||||
} D3D_TextureRep;
|
||||
|
@ -313,6 +236,8 @@ PixelFormatToD3DFMT(Uint32 format)
|
|||
return D3DFMT_A8R8G8B8;
|
||||
case SDL_PIXELFORMAT_YV12:
|
||||
case SDL_PIXELFORMAT_IYUV:
|
||||
case SDL_PIXELFORMAT_NV12:
|
||||
case SDL_PIXELFORMAT_NV21:
|
||||
return D3DFMT_L8;
|
||||
default:
|
||||
return D3DFMT_UNKNOWN;
|
||||
|
@ -661,137 +586,19 @@ D3D_CreateRenderer(SDL_Window * window, Uint32 flags)
|
|||
/* Set up parameters for rendering */
|
||||
D3D_InitRenderState(data);
|
||||
|
||||
if (caps.MaxSimultaneousTextures >= 3)
|
||||
{
|
||||
#ifdef ASSEMBLE_SHADER
|
||||
/* This shader was created by running the following HLSL through the fxc compiler
|
||||
and then tuning the generated assembly.
|
||||
|
||||
fxc /T fx_4_0 /O3 /Gfa /Fc yuv.fxc yuv.fx
|
||||
|
||||
--- yuv.fx ---
|
||||
Texture2D g_txY;
|
||||
Texture2D g_txU;
|
||||
Texture2D g_txV;
|
||||
|
||||
SamplerState samLinear
|
||||
{
|
||||
Filter = ANISOTROPIC;
|
||||
AddressU = Clamp;
|
||||
AddressV = Clamp;
|
||||
MaxAnisotropy = 1;
|
||||
};
|
||||
|
||||
struct VS_OUTPUT
|
||||
{
|
||||
float2 TextureUV : TEXCOORD0;
|
||||
};
|
||||
|
||||
struct PS_OUTPUT
|
||||
{
|
||||
float4 RGBAColor : SV_Target;
|
||||
};
|
||||
|
||||
PS_OUTPUT YUV420( VS_OUTPUT In )
|
||||
{
|
||||
const float3 offset = {-0.0627451017, -0.501960814, -0.501960814};
|
||||
const float3 Rcoeff = {1.164, 0.000, 1.596};
|
||||
const float3 Gcoeff = {1.164, -0.391, -0.813};
|
||||
const float3 Bcoeff = {1.164, 2.018, 0.000};
|
||||
|
||||
PS_OUTPUT Output;
|
||||
float2 TextureUV = In.TextureUV;
|
||||
|
||||
float3 yuv;
|
||||
yuv.x = g_txY.Sample( samLinear, TextureUV ).r;
|
||||
yuv.y = g_txU.Sample( samLinear, TextureUV ).r;
|
||||
yuv.z = g_txV.Sample( samLinear, TextureUV ).r;
|
||||
|
||||
yuv += offset;
|
||||
Output.RGBAColor.r = dot(yuv, Rcoeff);
|
||||
Output.RGBAColor.g = dot(yuv, Gcoeff);
|
||||
Output.RGBAColor.b = dot(yuv, Bcoeff);
|
||||
Output.RGBAColor.a = 1.0f;
|
||||
|
||||
return Output;
|
||||
}
|
||||
|
||||
technique10 RenderYUV420
|
||||
{
|
||||
pass P0
|
||||
{
|
||||
SetPixelShader( CompileShader( ps_4_0_level_9_0, YUV420() ) );
|
||||
}
|
||||
}
|
||||
*/
|
||||
const char *shader_text =
|
||||
"ps_2_0\n"
|
||||
"def c0, -0.0627451017, -0.501960814, -0.501960814, 1\n"
|
||||
"def c1, 1.16400003, 0, 1.59599996, 0\n"
|
||||
"def c2, 1.16400003, -0.391000003, -0.813000023, 0\n"
|
||||
"def c3, 1.16400003, 2.01799989, 0, 0\n"
|
||||
"dcl t0.xy\n"
|
||||
"dcl v0.xyzw\n"
|
||||
"dcl_2d s0\n"
|
||||
"dcl_2d s1\n"
|
||||
"dcl_2d s2\n"
|
||||
"texld r0, t0, s0\n"
|
||||
"texld r1, t0, s1\n"
|
||||
"texld r2, t0, s2\n"
|
||||
"mov r0.y, r1.x\n"
|
||||
"mov r0.z, r2.x\n"
|
||||
"add r0.xyz, r0, c0\n"
|
||||
"dp3 r1.x, r0, c1\n"
|
||||
"dp3 r1.y, r0, c2\n"
|
||||
"dp2add r1.z, r0, c3, c3.z\n" /* Logically this is "dp3 r1.z, r0, c3" but the optimizer did its magic */
|
||||
"mov r1.w, c0.w\n"
|
||||
"mul r0, r1, v0\n" /* Not in the HLSL, multiply by vertex color */
|
||||
"mov oC0, r0\n"
|
||||
;
|
||||
LPD3DXBUFFER pCode;
|
||||
LPD3DXBUFFER pErrorMsgs;
|
||||
LPDWORD shader_data = NULL;
|
||||
DWORD shader_size = 0;
|
||||
result = D3DXAssembleShader(shader_text, SDL_strlen(shader_text), NULL, NULL, 0, &pCode, &pErrorMsgs);
|
||||
if (!FAILED(result)) {
|
||||
shader_data = (DWORD*)pCode->lpVtbl->GetBufferPointer(pCode);
|
||||
shader_size = pCode->lpVtbl->GetBufferSize(pCode);
|
||||
PrintShaderData(shader_data, shader_size);
|
||||
} else {
|
||||
const char *error = (const char *)pErrorMsgs->lpVtbl->GetBufferPointer(pErrorMsgs);
|
||||
SDL_SetError("Couldn't assemble shader: %s", error);
|
||||
}
|
||||
if (shader_data != NULL)
|
||||
#else
|
||||
const DWORD shader_data[] = {
|
||||
0xffff0200, 0x05000051, 0xa00f0000, 0xbd808081, 0xbf008081, 0xbf008081,
|
||||
0x3f800000, 0x05000051, 0xa00f0001, 0x3f94fdf4, 0x00000000, 0x3fcc49ba,
|
||||
0x00000000, 0x05000051, 0xa00f0002, 0x3f94fdf4, 0xbec83127, 0xbf5020c5,
|
||||
0x00000000, 0x05000051, 0xa00f0003, 0x3f94fdf4, 0x400126e9, 0x00000000,
|
||||
0x00000000, 0x0200001f, 0x80000000, 0xb0030000, 0x0200001f, 0x80000000,
|
||||
0x900f0000, 0x0200001f, 0x90000000, 0xa00f0800, 0x0200001f, 0x90000000,
|
||||
0xa00f0801, 0x0200001f, 0x90000000, 0xa00f0802, 0x03000042, 0x800f0000,
|
||||
0xb0e40000, 0xa0e40800, 0x03000042, 0x800f0001, 0xb0e40000, 0xa0e40801,
|
||||
0x03000042, 0x800f0002, 0xb0e40000, 0xa0e40802, 0x02000001, 0x80020000,
|
||||
0x80000001, 0x02000001, 0x80040000, 0x80000002, 0x03000002, 0x80070000,
|
||||
0x80e40000, 0xa0e40000, 0x03000008, 0x80010001, 0x80e40000, 0xa0e40001,
|
||||
0x03000008, 0x80020001, 0x80e40000, 0xa0e40002, 0x0400005a, 0x80040001,
|
||||
0x80e40000, 0xa0e40003, 0xa0aa0003, 0x02000001, 0x80080001, 0xa0ff0000,
|
||||
0x03000005, 0x800f0000, 0x80e40001, 0x90e40000, 0x02000001, 0x800f0800,
|
||||
0x80e40000, 0x0000ffff
|
||||
};
|
||||
#endif
|
||||
{
|
||||
result = IDirect3DDevice9_CreatePixelShader(data->device, shader_data, &data->ps_yuv);
|
||||
if (!FAILED(result)) {
|
||||
renderer->info.texture_formats[renderer->info.num_texture_formats++] = SDL_PIXELFORMAT_YV12;
|
||||
renderer->info.texture_formats[renderer->info.num_texture_formats++] = SDL_PIXELFORMAT_IYUV;
|
||||
} else {
|
||||
if (caps.MaxSimultaneousTextures >= 3) {
|
||||
int i;
|
||||
for (i = 0; i < SDL_arraysize(data->shaders); ++i) {
|
||||
result = D3D9_CreatePixelShader(data->device, (D3D9_Shader)i, &data->shaders[i]);
|
||||
if (FAILED(result)) {
|
||||
D3D_SetError("CreatePixelShader()", result);
|
||||
}
|
||||
}
|
||||
if (data->shaders[SHADER_YUV_JPEG] && data->shaders[SHADER_YUV_BT601] && data->shaders[SHADER_YUV_BT709]) {
|
||||
renderer->info.texture_formats[renderer->info.num_texture_formats++] = SDL_PIXELFORMAT_YV12;
|
||||
renderer->info.texture_formats[renderer->info.num_texture_formats++] = SDL_PIXELFORMAT_IYUV;
|
||||
}
|
||||
}
|
||||
|
||||
return renderer;
|
||||
}
|
||||
|
||||
|
@ -870,7 +677,7 @@ GetScaleQuality(void)
|
|||
}
|
||||
|
||||
static int
|
||||
D3D_CreateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, DWORD usage, Uint32 format, int w, int h)
|
||||
D3D_CreateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, DWORD usage, Uint32 format, D3DFORMAT d3dfmt, int w, int h)
|
||||
{
|
||||
HRESULT result;
|
||||
|
||||
|
@ -879,6 +686,7 @@ D3D_CreateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, DWORD us
|
|||
texture->h = h;
|
||||
texture->usage = usage;
|
||||
texture->format = format;
|
||||
texture->d3dfmt = d3dfmt;
|
||||
|
||||
result = IDirect3DDevice9_CreateTexture(device, w, h, 1, usage,
|
||||
PixelFormatToD3DFMT(format),
|
||||
|
@ -897,8 +705,7 @@ D3D_CreateStagingTexture(IDirect3DDevice9 *device, D3D_TextureRep *texture)
|
|||
|
||||
if (texture->staging == NULL) {
|
||||
result = IDirect3DDevice9_CreateTexture(device, texture->w, texture->h, 1, 0,
|
||||
PixelFormatToD3DFMT(texture->format),
|
||||
D3DPOOL_SYSTEMMEM, &texture->staging, NULL);
|
||||
texture->d3dfmt, D3DPOOL_SYSTEMMEM, &texture->staging, NULL);
|
||||
if (FAILED(result)) {
|
||||
return D3D_SetError("CreateTexture(D3DPOOL_SYSTEMMEM)", result);
|
||||
}
|
||||
|
@ -934,7 +741,7 @@ D3D_BindTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, DWORD samp
|
|||
}
|
||||
|
||||
static int
|
||||
D3D_RecreateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, Uint32 format, int w, int h)
|
||||
D3D_RecreateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture)
|
||||
{
|
||||
if (texture->texture) {
|
||||
IDirect3DTexture9_Release(texture->texture);
|
||||
|
@ -948,7 +755,7 @@ D3D_RecreateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, Uint32
|
|||
}
|
||||
|
||||
static int
|
||||
D3D_UpdateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, Uint32 format, int x, int y, int w, int h, const void *pixels, int pitch)
|
||||
D3D_UpdateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, int x, int y, int w, int h, const void *pixels, int pitch)
|
||||
{
|
||||
RECT d3drect;
|
||||
D3DLOCKED_RECT locked;
|
||||
|
@ -972,8 +779,8 @@ D3D_UpdateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, Uint32 f
|
|||
}
|
||||
|
||||
src = (const Uint8 *)pixels;
|
||||
dst = locked.pBits;
|
||||
length = w * SDL_BYTESPERPIXEL(format);
|
||||
dst = (Uint8 *)locked.pBits;
|
||||
length = w * SDL_BYTESPERPIXEL(texture->format);
|
||||
if (length == pitch && length == locked.Pitch) {
|
||||
SDL_memcpy(dst, src, length*h);
|
||||
} else {
|
||||
|
@ -1032,7 +839,7 @@ D3D_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
|
|||
usage = 0;
|
||||
}
|
||||
|
||||
if (D3D_CreateTextureRep(data->device, &texturedata->texture, usage, texture->format, texture->w, texture->h) < 0) {
|
||||
if (D3D_CreateTextureRep(data->device, &texturedata->texture, usage, texture->format, PixelFormatToD3DFMT(texture->format), texture->w, texture->h) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -1040,11 +847,11 @@ D3D_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
|
|||
texture->format == SDL_PIXELFORMAT_IYUV) {
|
||||
texturedata->yuv = SDL_TRUE;
|
||||
|
||||
if (D3D_CreateTextureRep(data->device, &texturedata->utexture, usage, texture->format, texture->w / 2, texture->h / 2) < 0) {
|
||||
if (D3D_CreateTextureRep(data->device, &texturedata->utexture, usage, texture->format, PixelFormatToD3DFMT(texture->format), (texture->w + 1) / 2, (texture->h + 1) / 2) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (D3D_CreateTextureRep(data->device, &texturedata->vtexture, usage, texture->format, texture->w / 2, texture->h / 2) < 0) {
|
||||
if (D3D_CreateTextureRep(data->device, &texturedata->vtexture, usage, texture->format, PixelFormatToD3DFMT(texture->format), (texture->w + 1) / 2, (texture->h + 1) / 2) < 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -1061,16 +868,16 @@ D3D_RecreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
|
|||
return 0;
|
||||
}
|
||||
|
||||
if (D3D_RecreateTextureRep(data->device, &texturedata->texture, texture->format, texture->w, texture->h) < 0) {
|
||||
if (D3D_RecreateTextureRep(data->device, &texturedata->texture) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (texturedata->yuv) {
|
||||
if (D3D_RecreateTextureRep(data->device, &texturedata->utexture, texture->format, texture->w / 2, texture->h / 2) < 0) {
|
||||
if (D3D_RecreateTextureRep(data->device, &texturedata->utexture) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (D3D_RecreateTextureRep(data->device, &texturedata->vtexture, texture->format, texture->w / 2, texture->h / 2) < 0) {
|
||||
if (D3D_RecreateTextureRep(data->device, &texturedata->vtexture) < 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -1089,7 +896,7 @@ D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (D3D_UpdateTextureRep(data->device, &texturedata->texture, texture->format, rect->x, rect->y, rect->w, rect->h, pixels, pitch) < 0) {
|
||||
if (D3D_UpdateTextureRep(data->device, &texturedata->texture, rect->x, rect->y, rect->w, rect->h, pixels, pitch) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -1097,13 +904,13 @@ D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
|
|||
/* Skip to the correct offset into the next texture */
|
||||
pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
|
||||
|
||||
if (D3D_UpdateTextureRep(data->device, texture->format == SDL_PIXELFORMAT_YV12 ? &texturedata->vtexture : &texturedata->utexture, texture->format, rect->x / 2, rect->y / 2, rect->w / 2, rect->h / 2, pixels, pitch / 2) < 0) {
|
||||
if (D3D_UpdateTextureRep(data->device, texture->format == SDL_PIXELFORMAT_YV12 ? &texturedata->vtexture : &texturedata->utexture, rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, pixels, (pitch + 1) / 2) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Skip to the correct offset into the next texture */
|
||||
pixels = (const void*)((const Uint8*)pixels + (rect->h * pitch)/4);
|
||||
if (D3D_UpdateTextureRep(data->device, texture->format == SDL_PIXELFORMAT_YV12 ? &texturedata->utexture : &texturedata->vtexture, texture->format, rect->x / 2, rect->y / 2, rect->w / 2, rect->h / 2, pixels, pitch / 2) < 0) {
|
||||
pixels = (const void*)((const Uint8*)pixels + ((rect->h + 1) / 2) * ((pitch + 1) / 2));
|
||||
if (D3D_UpdateTextureRep(data->device, texture->format == SDL_PIXELFORMAT_YV12 ? &texturedata->utexture : &texturedata->vtexture, rect->x / 2, (rect->y + 1) / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, pixels, (pitch + 1) / 2) < 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
@ -1125,13 +932,13 @@ D3D_UpdateTextureYUV(SDL_Renderer * renderer, SDL_Texture * texture,
|
|||
return -1;
|
||||
}
|
||||
|
||||
if (D3D_UpdateTextureRep(data->device, &texturedata->texture, texture->format, rect->x, rect->y, rect->w, rect->h, Yplane, Ypitch) < 0) {
|
||||
if (D3D_UpdateTextureRep(data->device, &texturedata->texture, rect->x, rect->y, rect->w, rect->h, Yplane, Ypitch) < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (D3D_UpdateTextureRep(data->device, &texturedata->utexture, texture->format, rect->x / 2, rect->y / 2, rect->w / 2, rect->h / 2, Uplane, Upitch) < 0) {
|
||||
if (D3D_UpdateTextureRep(data->device, &texturedata->utexture, rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, Uplane, Upitch) < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (D3D_UpdateTextureRep(data->device, &texturedata->vtexture, texture->format, rect->x / 2, rect->y / 2, rect->w / 2, rect->h / 2, Vplane, Vpitch) < 0) {
|
||||
if (D3D_UpdateTextureRep(data->device, &texturedata->vtexture, rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, Vplane, Vpitch) < 0) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
|
@ -1609,13 +1416,60 @@ D3D_UpdateTextureScaleMode(D3D_RenderData *data, D3D_TextureData *texturedata, u
|
|||
}
|
||||
}
|
||||
|
||||
static int
|
||||
D3D_RenderSetupTextureState(SDL_Renderer * renderer, SDL_Texture * texture, LPDIRECT3DPIXELSHADER9 *shader)
|
||||
{
|
||||
D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
|
||||
D3D_TextureData *texturedata;
|
||||
|
||||
*shader = NULL;
|
||||
|
||||
texturedata = (D3D_TextureData *)texture->driverdata;
|
||||
if (!texturedata) {
|
||||
SDL_SetError("Texture is not currently available");
|
||||
return -1;
|
||||
}
|
||||
|
||||
D3D_UpdateTextureScaleMode(data, texturedata, 0);
|
||||
|
||||
if (D3D_BindTextureRep(data->device, &texturedata->texture, 0) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (texturedata->yuv) {
|
||||
switch (SDL_GetYUVConversionModeForResolution(texture->w, texture->h)) {
|
||||
case SDL_YUV_CONVERSION_JPEG:
|
||||
*shader = data->shaders[SHADER_YUV_JPEG];
|
||||
break;
|
||||
case SDL_YUV_CONVERSION_BT601:
|
||||
*shader = data->shaders[SHADER_YUV_BT601];
|
||||
break;
|
||||
case SDL_YUV_CONVERSION_BT709:
|
||||
*shader = data->shaders[SHADER_YUV_BT709];
|
||||
break;
|
||||
default:
|
||||
return SDL_SetError("Unsupported YUV conversion mode");
|
||||
}
|
||||
|
||||
D3D_UpdateTextureScaleMode(data, texturedata, 1);
|
||||
D3D_UpdateTextureScaleMode(data, texturedata, 2);
|
||||
|
||||
if (D3D_BindTextureRep(data->device, &texturedata->utexture, 1) < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (D3D_BindTextureRep(data->device, &texturedata->vtexture, 2) < 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
D3D_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
|
||||
const SDL_Rect * srcrect, const SDL_FRect * dstrect)
|
||||
{
|
||||
D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
|
||||
D3D_TextureData *texturedata;
|
||||
LPDIRECT3DPIXELSHADER9 shader = NULL;
|
||||
LPDIRECT3DPIXELSHADER9 shader;
|
||||
float minx, miny, maxx, maxy;
|
||||
float minu, maxu, minv, maxv;
|
||||
DWORD color;
|
||||
|
@ -1626,12 +1480,6 @@ D3D_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
|
|||
return -1;
|
||||
}
|
||||
|
||||
texturedata = (D3D_TextureData *)texture->driverdata;
|
||||
if (!texturedata) {
|
||||
SDL_SetError("Texture is not currently available");
|
||||
return -1;
|
||||
}
|
||||
|
||||
minx = dstrect->x - 0.5f;
|
||||
miny = dstrect->y - 0.5f;
|
||||
maxx = dstrect->x + dstrect->w - 0.5f;
|
||||
|
@ -1674,45 +1522,25 @@ D3D_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
|
|||
|
||||
D3D_SetBlendMode(data, texture->blendMode);
|
||||
|
||||
D3D_UpdateTextureScaleMode(data, texturedata, 0);
|
||||
|
||||
if (D3D_BindTextureRep(data->device, &texturedata->texture, 0) < 0) {
|
||||
if (D3D_RenderSetupTextureState(renderer, texture, &shader) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (texturedata->yuv) {
|
||||
shader = data->ps_yuv;
|
||||
|
||||
D3D_UpdateTextureScaleMode(data, texturedata, 1);
|
||||
D3D_UpdateTextureScaleMode(data, texturedata, 2);
|
||||
|
||||
if (D3D_BindTextureRep(data->device, &texturedata->utexture, 1) < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (D3D_BindTextureRep(data->device, &texturedata->vtexture, 2) < 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (shader) {
|
||||
result = IDirect3DDevice9_SetPixelShader(data->device, shader);
|
||||
if (FAILED(result)) {
|
||||
return D3D_SetError("SetShader()", result);
|
||||
}
|
||||
}
|
||||
result =
|
||||
IDirect3DDevice9_DrawPrimitiveUP(data->device, D3DPT_TRIANGLEFAN, 2,
|
||||
vertices, sizeof(*vertices));
|
||||
result = IDirect3DDevice9_DrawPrimitiveUP(data->device, D3DPT_TRIANGLEFAN, 2,
|
||||
vertices, sizeof(*vertices));
|
||||
if (FAILED(result)) {
|
||||
return D3D_SetError("DrawPrimitiveUP()", result);
|
||||
D3D_SetError("DrawPrimitiveUP()", result);
|
||||
}
|
||||
if (shader) {
|
||||
result = IDirect3DDevice9_SetPixelShader(data->device, NULL);
|
||||
if (FAILED(result)) {
|
||||
return D3D_SetError("SetShader()", result);
|
||||
}
|
||||
IDirect3DDevice9_SetPixelShader(data->device, NULL);
|
||||
}
|
||||
return 0;
|
||||
return FAILED(result) ? -1 : 0;
|
||||
}
|
||||
|
||||
|
||||
|
@ -1722,7 +1550,6 @@ D3D_RenderCopyEx(SDL_Renderer * renderer, SDL_Texture * texture,
|
|||
const double angle, const SDL_FPoint * center, const SDL_RendererFlip flip)
|
||||
{
|
||||
D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
|
||||
D3D_TextureData *texturedata;
|
||||
LPDIRECT3DPIXELSHADER9 shader = NULL;
|
||||
float minx, miny, maxx, maxy;
|
||||
float minu, maxu, minv, maxv;
|
||||
|
@ -1736,12 +1563,6 @@ D3D_RenderCopyEx(SDL_Renderer * renderer, SDL_Texture * texture,
|
|||
return -1;
|
||||
}
|
||||
|
||||
texturedata = (D3D_TextureData *)texture->driverdata;
|
||||
if (!texturedata) {
|
||||
SDL_SetError("Texture is not currently available");
|
||||
return -1;
|
||||
}
|
||||
|
||||
centerx = center->x;
|
||||
centery = center->y;
|
||||
|
||||
|
@ -1798,54 +1619,37 @@ D3D_RenderCopyEx(SDL_Renderer * renderer, SDL_Texture * texture,
|
|||
|
||||
D3D_SetBlendMode(data, texture->blendMode);
|
||||
|
||||
if (D3D_RenderSetupTextureState(renderer, texture, &shader) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Rotate and translate */
|
||||
modelMatrix = MatrixMultiply(
|
||||
MatrixRotationZ((float)(M_PI * (float) angle / 180.0f)),
|
||||
MatrixTranslation(dstrect->x + center->x - 0.5f, dstrect->y + center->y - 0.5f, 0));
|
||||
IDirect3DDevice9_SetTransform(data->device, D3DTS_VIEW, (D3DMATRIX*)&modelMatrix);
|
||||
|
||||
D3D_UpdateTextureScaleMode(data, texturedata, 0);
|
||||
|
||||
if (D3D_BindTextureRep(data->device, &texturedata->texture, 0) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (texturedata->yuv) {
|
||||
shader = data->ps_yuv;
|
||||
|
||||
D3D_UpdateTextureScaleMode(data, texturedata, 1);
|
||||
D3D_UpdateTextureScaleMode(data, texturedata, 2);
|
||||
|
||||
if (D3D_BindTextureRep(data->device, &texturedata->utexture, 1) < 0) {
|
||||
return -1;
|
||||
}
|
||||
if (D3D_BindTextureRep(data->device, &texturedata->vtexture, 2) < 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (shader) {
|
||||
result = IDirect3DDevice9_SetPixelShader(data->device, shader);
|
||||
if (FAILED(result)) {
|
||||
return D3D_SetError("SetShader()", result);
|
||||
D3D_SetError("SetShader()", result);
|
||||
goto done;
|
||||
}
|
||||
}
|
||||
result =
|
||||
IDirect3DDevice9_DrawPrimitiveUP(data->device, D3DPT_TRIANGLEFAN, 2,
|
||||
vertices, sizeof(*vertices));
|
||||
result = IDirect3DDevice9_DrawPrimitiveUP(data->device, D3DPT_TRIANGLEFAN, 2,
|
||||
vertices, sizeof(*vertices));
|
||||
if (FAILED(result)) {
|
||||
return D3D_SetError("DrawPrimitiveUP()", result);
|
||||
D3D_SetError("DrawPrimitiveUP()", result);
|
||||
}
|
||||
done:
|
||||
if (shader) {
|
||||
result = IDirect3DDevice9_SetPixelShader(data->device, NULL);
|
||||
if (FAILED(result)) {
|
||||
return D3D_SetError("SetShader()", result);
|
||||
}
|
||||
IDirect3DDevice9_SetPixelShader(data->device, NULL);
|
||||
}
|
||||
|
||||
modelMatrix = MatrixIdentity();
|
||||
IDirect3DDevice9_SetTransform(data->device, D3DTS_VIEW, (D3DMATRIX*)&modelMatrix);
|
||||
return 0;
|
||||
|
||||
return FAILED(result) ? -1 : 0;
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -1955,6 +1759,8 @@ D3D_DestroyRenderer(SDL_Renderer * renderer)
|
|||
D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
|
||||
|
||||
if (data) {
|
||||
int i;
|
||||
|
||||
/* Release the render target */
|
||||
if (data->defaultRenderTarget) {
|
||||
IDirect3DSurface9_Release(data->defaultRenderTarget);
|
||||
|
@ -1964,11 +1770,15 @@ D3D_DestroyRenderer(SDL_Renderer * renderer)
|
|||
IDirect3DSurface9_Release(data->currentRenderTarget);
|
||||
data->currentRenderTarget = NULL;
|
||||
}
|
||||
if (data->ps_yuv) {
|
||||
IDirect3DPixelShader9_Release(data->ps_yuv);
|
||||
for (i = 0; i < SDL_arraysize(data->shaders); ++i) {
|
||||
if (data->shaders[i]) {
|
||||
IDirect3DPixelShader9_Release(data->shaders[i]);
|
||||
data->shaders[i] = NULL;
|
||||
}
|
||||
}
|
||||
if (data->device) {
|
||||
IDirect3DDevice9_Release(data->device);
|
||||
data->device = NULL;
|
||||
}
|
||||
if (data->d3d) {
|
||||
IDirect3D9_Release(data->d3d);
|
||||
|
|
274
src/render/direct3d/SDL_shaders_d3d.c
Normal file
274
src/render/direct3d/SDL_shaders_d3d.c
Normal file
|
@ -0,0 +1,274 @@
|
|||
/*
|
||||
Simple DirectMedia Layer
|
||||
Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
#include "../../SDL_internal.h"
|
||||
|
||||
#include "SDL_render.h"
|
||||
#include "SDL_system.h"
|
||||
|
||||
#if SDL_VIDEO_RENDER_D3D && !SDL_RENDER_DISABLED
|
||||
|
||||
#include "../../core/windows/SDL_windows.h"
|
||||
|
||||
#include <d3d9.h>
|
||||
|
||||
#include "SDL_shaders_d3d.h"
|
||||
|
||||
/* The shaders here were compiled with:
|
||||
|
||||
fxc /T ps_2_0 /Fo"<OUTPUT FILE>" "<INPUT FILE>"
|
||||
|
||||
Shader object code was converted to a list of DWORDs via the following
|
||||
*nix style command (available separately from Windows + MSVC):
|
||||
|
||||
hexdump -v -e '6/4 "0x%08.8x, " "\n"' <FILE>
|
||||
*/
|
||||
|
||||
/* --- D3D9_PixelShader_YUV_JPEG.hlsl ---
|
||||
Texture2D theTextureY : register(t0);
|
||||
Texture2D theTextureU : register(t1);
|
||||
Texture2D theTextureV : register(t2);
|
||||
SamplerState theSampler = sampler_state
|
||||
{
|
||||
addressU = Clamp;
|
||||
addressV = Clamp;
|
||||
mipfilter = NONE;
|
||||
minfilter = LINEAR;
|
||||
magfilter = LINEAR;
|
||||
};
|
||||
|
||||
struct PixelShaderInput
|
||||
{
|
||||
float4 pos : SV_POSITION;
|
||||
float2 tex : TEXCOORD0;
|
||||
float4 color : COLOR0;
|
||||
};
|
||||
|
||||
float4 main(PixelShaderInput input) : SV_TARGET
|
||||
{
|
||||
const float3 offset = {0.0, -0.501960814, -0.501960814};
|
||||
const float3 Rcoeff = {1.0000, 0.0000, 1.4020};
|
||||
const float3 Gcoeff = {1.0000, -0.3441, -0.7141};
|
||||
const float3 Bcoeff = {1.0000, 1.7720, 0.0000};
|
||||
|
||||
float4 Output;
|
||||
|
||||
float3 yuv;
|
||||
yuv.x = theTextureY.Sample(theSampler, input.tex).r;
|
||||
yuv.y = theTextureU.Sample(theSampler, input.tex).r;
|
||||
yuv.z = theTextureV.Sample(theSampler, input.tex).r;
|
||||
|
||||
yuv += offset;
|
||||
Output.r = dot(yuv, Rcoeff);
|
||||
Output.g = dot(yuv, Gcoeff);
|
||||
Output.b = dot(yuv, Bcoeff);
|
||||
Output.a = 1.0f;
|
||||
|
||||
return Output * input.color;
|
||||
}
|
||||
*/
|
||||
static const DWORD D3D9_PixelShader_YUV_JPEG[] = {
|
||||
0xffff0200, 0x0044fffe, 0x42415443, 0x0000001c, 0x000000d7, 0xffff0200,
|
||||
0x00000003, 0x0000001c, 0x00000100, 0x000000d0, 0x00000058, 0x00010003,
|
||||
0x00000001, 0x00000070, 0x00000000, 0x00000080, 0x00020003, 0x00000001,
|
||||
0x00000098, 0x00000000, 0x000000a8, 0x00000003, 0x00000001, 0x000000c0,
|
||||
0x00000000, 0x53656874, 0x6c706d61, 0x742b7265, 0x65546568, 0x72757478,
|
||||
0xab005565, 0x00070004, 0x00040001, 0x00000001, 0x00000000, 0x53656874,
|
||||
0x6c706d61, 0x742b7265, 0x65546568, 0x72757478, 0xab005665, 0x00070004,
|
||||
0x00040001, 0x00000001, 0x00000000, 0x53656874, 0x6c706d61, 0x742b7265,
|
||||
0x65546568, 0x72757478, 0xab005965, 0x00070004, 0x00040001, 0x00000001,
|
||||
0x00000000, 0x325f7370, 0x4d00305f, 0x6f726369, 0x74666f73, 0x29522820,
|
||||
0x534c4820, 0x6853204c, 0x72656461, 0x6d6f4320, 0x656c6970, 0x2e362072,
|
||||
0x36392e33, 0x312e3030, 0x34383336, 0xababab00, 0x05000051, 0xa00f0000,
|
||||
0x00000000, 0xbf008081, 0xbf008081, 0x3f800000, 0x05000051, 0xa00f0001,
|
||||
0x3f800000, 0x00000000, 0x3fb374bc, 0x00000000, 0x05000051, 0xa00f0002,
|
||||
0x3f800000, 0xbeb02de0, 0xbf36cf42, 0x00000000, 0x05000051, 0xa00f0003,
|
||||
0x3f800000, 0x3fe2d0e5, 0x00000000, 0x00000000, 0x0200001f, 0x80000000,
|
||||
0xb0030000, 0x0200001f, 0x80000000, 0x900f0000, 0x0200001f, 0x90000000,
|
||||
0xa00f0800, 0x0200001f, 0x90000000, 0xa00f0801, 0x0200001f, 0x90000000,
|
||||
0xa00f0802, 0x03000042, 0x800f0000, 0xb0e40000, 0xa0e40800, 0x03000042,
|
||||
0x800f0001, 0xb0e40000, 0xa0e40801, 0x03000042, 0x800f0002, 0xb0e40000,
|
||||
0xa0e40802, 0x02000001, 0x80020000, 0x80000001, 0x02000001, 0x80040000,
|
||||
0x80000002, 0x03000002, 0x80070000, 0x80e40000, 0xa0e40000, 0x03000008,
|
||||
0x80010001, 0x80e40000, 0xa0e40001, 0x03000008, 0x80020001, 0x80e40000,
|
||||
0xa0e40002, 0x0400005a, 0x80040001, 0x80e40000, 0xa0e40003, 0xa0aa0003,
|
||||
0x02000001, 0x80080001, 0xa0ff0000, 0x03000005, 0x800f0000, 0x80e40001,
|
||||
0x90e40000, 0x02000001, 0x800f0800, 0x80e40000, 0x0000ffff
|
||||
};
|
||||
|
||||
/* --- D3D9_PixelShader_YUV_BT601.hlsl ---
|
||||
Texture2D theTextureY : register(t0);
|
||||
Texture2D theTextureU : register(t1);
|
||||
Texture2D theTextureV : register(t2);
|
||||
SamplerState theSampler = sampler_state
|
||||
{
|
||||
addressU = Clamp;
|
||||
addressV = Clamp;
|
||||
mipfilter = NONE;
|
||||
minfilter = LINEAR;
|
||||
magfilter = LINEAR;
|
||||
};
|
||||
|
||||
struct PixelShaderInput
|
||||
{
|
||||
float4 pos : SV_POSITION;
|
||||
float2 tex : TEXCOORD0;
|
||||
float4 color : COLOR0;
|
||||
};
|
||||
|
||||
float4 main(PixelShaderInput input) : SV_TARGET
|
||||
{
|
||||
const float3 offset = {-0.0627451017, -0.501960814, -0.501960814};
|
||||
const float3 Rcoeff = {1.1644, 0.0000, 1.5960};
|
||||
const float3 Gcoeff = {1.1644, -0.3918, -0.8130};
|
||||
const float3 Bcoeff = {1.1644, 2.0172, 0.0000};
|
||||
|
||||
float4 Output;
|
||||
|
||||
float3 yuv;
|
||||
yuv.x = theTextureY.Sample(theSampler, input.tex).r;
|
||||
yuv.y = theTextureU.Sample(theSampler, input.tex).r;
|
||||
yuv.z = theTextureV.Sample(theSampler, input.tex).r;
|
||||
|
||||
yuv += offset;
|
||||
Output.r = dot(yuv, Rcoeff);
|
||||
Output.g = dot(yuv, Gcoeff);
|
||||
Output.b = dot(yuv, Bcoeff);
|
||||
Output.a = 1.0f;
|
||||
|
||||
return Output * input.color;
|
||||
}
|
||||
*/
|
||||
static const DWORD D3D9_PixelShader_YUV_BT601[] = {
|
||||
0xffff0200, 0x0044fffe, 0x42415443, 0x0000001c, 0x000000d7, 0xffff0200,
|
||||
0x00000003, 0x0000001c, 0x00000100, 0x000000d0, 0x00000058, 0x00010003,
|
||||
0x00000001, 0x00000070, 0x00000000, 0x00000080, 0x00020003, 0x00000001,
|
||||
0x00000098, 0x00000000, 0x000000a8, 0x00000003, 0x00000001, 0x000000c0,
|
||||
0x00000000, 0x53656874, 0x6c706d61, 0x742b7265, 0x65546568, 0x72757478,
|
||||
0xab005565, 0x00070004, 0x00040001, 0x00000001, 0x00000000, 0x53656874,
|
||||
0x6c706d61, 0x742b7265, 0x65546568, 0x72757478, 0xab005665, 0x00070004,
|
||||
0x00040001, 0x00000001, 0x00000000, 0x53656874, 0x6c706d61, 0x742b7265,
|
||||
0x65546568, 0x72757478, 0xab005965, 0x00070004, 0x00040001, 0x00000001,
|
||||
0x00000000, 0x325f7370, 0x4d00305f, 0x6f726369, 0x74666f73, 0x29522820,
|
||||
0x534c4820, 0x6853204c, 0x72656461, 0x6d6f4320, 0x656c6970, 0x2e362072,
|
||||
0x36392e33, 0x312e3030, 0x34383336, 0xababab00, 0x05000051, 0xa00f0000,
|
||||
0xbd808081, 0xbf008081, 0xbf008081, 0x3f800000, 0x05000051, 0xa00f0001,
|
||||
0x3f950b0f, 0x00000000, 0x3fcc49ba, 0x00000000, 0x05000051, 0xa00f0002,
|
||||
0x3f950b0f, 0xbec89a02, 0xbf5020c5, 0x00000000, 0x05000051, 0xa00f0003,
|
||||
0x3f950b0f, 0x400119ce, 0x00000000, 0x00000000, 0x0200001f, 0x80000000,
|
||||
0xb0030000, 0x0200001f, 0x80000000, 0x900f0000, 0x0200001f, 0x90000000,
|
||||
0xa00f0800, 0x0200001f, 0x90000000, 0xa00f0801, 0x0200001f, 0x90000000,
|
||||
0xa00f0802, 0x03000042, 0x800f0000, 0xb0e40000, 0xa0e40800, 0x03000042,
|
||||
0x800f0001, 0xb0e40000, 0xa0e40801, 0x03000042, 0x800f0002, 0xb0e40000,
|
||||
0xa0e40802, 0x02000001, 0x80020000, 0x80000001, 0x02000001, 0x80040000,
|
||||
0x80000002, 0x03000002, 0x80070000, 0x80e40000, 0xa0e40000, 0x03000008,
|
||||
0x80010001, 0x80e40000, 0xa0e40001, 0x03000008, 0x80020001, 0x80e40000,
|
||||
0xa0e40002, 0x0400005a, 0x80040001, 0x80e40000, 0xa0e40003, 0xa0aa0003,
|
||||
0x02000001, 0x80080001, 0xa0ff0000, 0x03000005, 0x800f0000, 0x80e40001,
|
||||
0x90e40000, 0x02000001, 0x800f0800, 0x80e40000, 0x0000ffff
|
||||
};
|
||||
|
||||
/* --- D3D9_PixelShader_YUV_BT709.hlsl ---
|
||||
Texture2D theTextureY : register(t0);
|
||||
Texture2D theTextureU : register(t1);
|
||||
Texture2D theTextureV : register(t2);
|
||||
SamplerState theSampler = sampler_state
|
||||
{
|
||||
addressU = Clamp;
|
||||
addressV = Clamp;
|
||||
mipfilter = NONE;
|
||||
minfilter = LINEAR;
|
||||
magfilter = LINEAR;
|
||||
};
|
||||
|
||||
struct PixelShaderInput
|
||||
{
|
||||
float4 pos : SV_POSITION;
|
||||
float2 tex : TEXCOORD0;
|
||||
float4 color : COLOR0;
|
||||
};
|
||||
|
||||
float4 main(PixelShaderInput input) : SV_TARGET
|
||||
{
|
||||
const float3 offset = {-0.0627451017, -0.501960814, -0.501960814};
|
||||
const float3 Rcoeff = {1.1644, 0.0000, 1.7927};
|
||||
const float3 Gcoeff = {1.1644, -0.2132, -0.5329};
|
||||
const float3 Bcoeff = {1.1644, 2.1124, 0.0000};
|
||||
|
||||
float4 Output;
|
||||
|
||||
float3 yuv;
|
||||
yuv.x = theTextureY.Sample(theSampler, input.tex).r;
|
||||
yuv.y = theTextureU.Sample(theSampler, input.tex).r;
|
||||
yuv.z = theTextureV.Sample(theSampler, input.tex).r;
|
||||
|
||||
yuv += offset;
|
||||
Output.r = dot(yuv, Rcoeff);
|
||||
Output.g = dot(yuv, Gcoeff);
|
||||
Output.b = dot(yuv, Bcoeff);
|
||||
Output.a = 1.0f;
|
||||
|
||||
return Output * input.color;
|
||||
}
|
||||
*/
|
||||
static const DWORD D3D9_PixelShader_YUV_BT709[] = {
|
||||
0xffff0200, 0x0044fffe, 0x42415443, 0x0000001c, 0x000000d7, 0xffff0200,
|
||||
0x00000003, 0x0000001c, 0x00000100, 0x000000d0, 0x00000058, 0x00010003,
|
||||
0x00000001, 0x00000070, 0x00000000, 0x00000080, 0x00020003, 0x00000001,
|
||||
0x00000098, 0x00000000, 0x000000a8, 0x00000003, 0x00000001, 0x000000c0,
|
||||
0x00000000, 0x53656874, 0x6c706d61, 0x742b7265, 0x65546568, 0x72757478,
|
||||
0xab005565, 0x00070004, 0x00040001, 0x00000001, 0x00000000, 0x53656874,
|
||||
0x6c706d61, 0x742b7265, 0x65546568, 0x72757478, 0xab005665, 0x00070004,
|
||||
0x00040001, 0x00000001, 0x00000000, 0x53656874, 0x6c706d61, 0x742b7265,
|
||||
0x65546568, 0x72757478, 0xab005965, 0x00070004, 0x00040001, 0x00000001,
|
||||
0x00000000, 0x325f7370, 0x4d00305f, 0x6f726369, 0x74666f73, 0x29522820,
|
||||
0x534c4820, 0x6853204c, 0x72656461, 0x6d6f4320, 0x656c6970, 0x2e362072,
|
||||
0x36392e33, 0x312e3030, 0x34383336, 0xababab00, 0x05000051, 0xa00f0000,
|
||||
0xbd808081, 0xbf008081, 0xbf008081, 0x3f800000, 0x05000051, 0xa00f0001,
|
||||
0x3f950b0f, 0x00000000, 0x3fe57732, 0x00000000, 0x05000051, 0xa00f0002,
|
||||
0x3f950b0f, 0xbe5a511a, 0xbf086c22, 0x00000000, 0x05000051, 0xa00f0003,
|
||||
0x3f950b0f, 0x40073190, 0x00000000, 0x00000000, 0x0200001f, 0x80000000,
|
||||
0xb0030000, 0x0200001f, 0x80000000, 0x900f0000, 0x0200001f, 0x90000000,
|
||||
0xa00f0800, 0x0200001f, 0x90000000, 0xa00f0801, 0x0200001f, 0x90000000,
|
||||
0xa00f0802, 0x03000042, 0x800f0000, 0xb0e40000, 0xa0e40800, 0x03000042,
|
||||
0x800f0001, 0xb0e40000, 0xa0e40801, 0x03000042, 0x800f0002, 0xb0e40000,
|
||||
0xa0e40802, 0x02000001, 0x80020000, 0x80000001, 0x02000001, 0x80040000,
|
||||
0x80000002, 0x03000002, 0x80070000, 0x80e40000, 0xa0e40000, 0x03000008,
|
||||
0x80010001, 0x80e40000, 0xa0e40001, 0x03000008, 0x80020001, 0x80e40000,
|
||||
0xa0e40002, 0x0400005a, 0x80040001, 0x80e40000, 0xa0e40003, 0xa0aa0003,
|
||||
0x02000001, 0x80080001, 0xa0ff0000, 0x03000005, 0x800f0000, 0x80e40001,
|
||||
0x90e40000, 0x02000001, 0x800f0800, 0x80e40000, 0x0000ffff
|
||||
};
|
||||
|
||||
|
||||
static const DWORD *D3D9_shaders[] = {
|
||||
D3D9_PixelShader_YUV_JPEG,
|
||||
D3D9_PixelShader_YUV_BT601,
|
||||
D3D9_PixelShader_YUV_BT709,
|
||||
};
|
||||
|
||||
HRESULT D3D9_CreatePixelShader(IDirect3DDevice9 *d3dDevice, D3D9_Shader shader, IDirect3DPixelShader9 **pixelShader)
|
||||
{
|
||||
return IDirect3DDevice9_CreatePixelShader(d3dDevice, D3D9_shaders[shader], pixelShader);
|
||||
}
|
||||
|
||||
#endif /* SDL_VIDEO_RENDER_D3D && !SDL_RENDER_DISABLED */
|
||||
|
||||
/* vi: set ts=4 sw=4 expandtab: */
|
|
@ -18,11 +18,17 @@
|
|||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
#include "../SDL_internal.h"
|
||||
#include "../../SDL_internal.h"
|
||||
|
||||
/* FIXME: This breaks on various versions of GCC and should be rewritten using intrinsics */
|
||||
#if 0 /* (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES && !defined(__clang__) */
|
||||
#define USE_MMX_ASSEMBLY 1
|
||||
#endif
|
||||
/* D3D9 shader implementation */
|
||||
|
||||
typedef enum {
|
||||
SHADER_YUV_JPEG,
|
||||
SHADER_YUV_BT601,
|
||||
SHADER_YUV_BT709,
|
||||
NUM_SHADERS
|
||||
} D3D9_Shader;
|
||||
|
||||
extern HRESULT D3D9_CreatePixelShader(IDirect3DDevice9 *d3dDevice, D3D9_Shader shader, IDirect3DPixelShader9 **pixelShader);
|
||||
|
||||
/* vi: set ts=4 sw=4 expandtab: */
|
851
src/render/direct3d11/SDL_render_d3d11.c
Normal file → Executable file
851
src/render/direct3d11/SDL_render_d3d11.c
Normal file → Executable file
File diff suppressed because it is too large
Load diff
1957
src/render/direct3d11/SDL_shaders_d3d11.c
Executable file
1957
src/render/direct3d11/SDL_shaders_d3d11.c
Executable file
File diff suppressed because it is too large
Load diff
43
src/render/direct3d11/SDL_shaders_d3d11.h
Executable file
43
src/render/direct3d11/SDL_shaders_d3d11.h
Executable file
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
Simple DirectMedia Layer
|
||||
Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
|
||||
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the authors be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
#include "../../SDL_internal.h"
|
||||
|
||||
/* D3D11 shader implementation */
|
||||
|
||||
typedef enum {
|
||||
SHADER_SOLID,
|
||||
SHADER_RGB,
|
||||
SHADER_YUV_JPEG,
|
||||
SHADER_YUV_BT601,
|
||||
SHADER_YUV_BT709,
|
||||
SHADER_NV12_JPEG,
|
||||
SHADER_NV12_BT601,
|
||||
SHADER_NV12_BT709,
|
||||
SHADER_NV21_JPEG,
|
||||
SHADER_NV21_BT601,
|
||||
SHADER_NV21_BT709,
|
||||
NUM_SHADERS
|
||||
} D3D11_Shader;
|
||||
|
||||
extern int D3D11_CreateVertexShader(ID3D11Device1 *d3dDevice, ID3D11VertexShader **vertexShader, ID3D11InputLayout **inputLayout);
|
||||
extern int D3D11_CreatePixelShader(ID3D11Device1 *d3dDevice, D3D11_Shader shader, ID3D11PixelShader **pixelShader);
|
||||
|
||||
/* vi: set ts=4 sw=4 expandtab: */
|
642
src/render/mmx.h
642
src/render/mmx.h
|
@ -1,642 +0,0 @@
|
|||
/* mmx.h
|
||||
|
||||
MultiMedia eXtensions GCC interface library for IA32.
|
||||
|
||||
To use this library, simply include this header file
|
||||
and compile with GCC. You MUST have inlining enabled
|
||||
in order for mmx_ok() to work; this can be done by
|
||||
simply using -O on the GCC command line.
|
||||
|
||||
Compiling with -DMMX_TRACE will cause detailed trace
|
||||
output to be sent to stderr for each mmx operation.
|
||||
This adds lots of code, and obviously slows execution to
|
||||
a crawl, but can be very useful for debugging.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
|
||||
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
|
||||
LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
|
||||
AND FITNESS FOR ANY PARTICULAR PURPOSE.
|
||||
|
||||
1997-99 by H. Dietz and R. Fisher
|
||||
|
||||
Notes:
|
||||
It appears that the latest gas has the pand problem fixed, therefore
|
||||
I'll undefine BROKEN_PAND by default.
|
||||
*/
|
||||
|
||||
#ifndef _MMX_H
|
||||
#define _MMX_H
|
||||
|
||||
|
||||
/* Warning: at this writing, the version of GAS packaged
|
||||
with most Linux distributions does not handle the
|
||||
parallel AND operation mnemonic correctly. If the
|
||||
symbol BROKEN_PAND is defined, a slower alternative
|
||||
coding will be used. If execution of mmxtest results
|
||||
in an illegal instruction fault, define this symbol.
|
||||
*/
|
||||
#undef BROKEN_PAND
|
||||
|
||||
|
||||
/* The type of an value that fits in an MMX register
|
||||
(note that long long constant values MUST be suffixed
|
||||
by LL and unsigned long long values by ULL, lest
|
||||
they be truncated by the compiler)
|
||||
*/
|
||||
typedef union
|
||||
{
|
||||
long long q; /* Quadword (64-bit) value */
|
||||
unsigned long long uq; /* Unsigned Quadword */
|
||||
int d[2]; /* 2 Doubleword (32-bit) values */
|
||||
unsigned int ud[2]; /* 2 Unsigned Doubleword */
|
||||
short w[4]; /* 4 Word (16-bit) values */
|
||||
unsigned short uw[4]; /* 4 Unsigned Word */
|
||||
char b[8]; /* 8 Byte (8-bit) values */
|
||||
unsigned char ub[8]; /* 8 Unsigned Byte */
|
||||
float s[2]; /* Single-precision (32-bit) value */
|
||||
} __attribute__ ((aligned(8))) mmx_t; /* On an 8-byte (64-bit) boundary */
|
||||
|
||||
|
||||
#if 0
|
||||
/* Function to test if multimedia instructions are supported...
|
||||
*/
|
||||
inline extern int
|
||||
mm_support(void)
|
||||
{
|
||||
/* Returns 1 if MMX instructions are supported,
|
||||
3 if Cyrix MMX and Extended MMX instructions are supported
|
||||
5 if AMD MMX and 3DNow! instructions are supported
|
||||
0 if hardware does not support any of these
|
||||
*/
|
||||
register int rval = 0;
|
||||
|
||||
__asm__ __volatile__(
|
||||
/* See if CPUID instruction is supported ... */
|
||||
/* ... Get copies of EFLAGS into eax and ecx */
|
||||
"pushf\n\t"
|
||||
"popl %%eax\n\t" "movl %%eax, %%ecx\n\t"
|
||||
/* ... Toggle the ID bit in one copy and store */
|
||||
/* to the EFLAGS reg */
|
||||
"xorl $0x200000, %%eax\n\t"
|
||||
"push %%eax\n\t" "popf\n\t"
|
||||
/* ... Get the (hopefully modified) EFLAGS */
|
||||
"pushf\n\t" "popl %%eax\n\t"
|
||||
/* ... Compare and test result */
|
||||
"xorl %%eax, %%ecx\n\t" "testl $0x200000, %%ecx\n\t" "jz NotSupported1\n\t" /* CPUID not supported */
|
||||
/* Get standard CPUID information, and
|
||||
go to a specific vendor section */
|
||||
"movl $0, %%eax\n\t" "cpuid\n\t"
|
||||
/* Check for Intel */
|
||||
"cmpl $0x756e6547, %%ebx\n\t"
|
||||
"jne TryAMD\n\t"
|
||||
"cmpl $0x49656e69, %%edx\n\t"
|
||||
"jne TryAMD\n\t"
|
||||
"cmpl $0x6c65746e, %%ecx\n"
|
||||
"jne TryAMD\n\t" "jmp Intel\n\t"
|
||||
/* Check for AMD */
|
||||
"\nTryAMD:\n\t"
|
||||
"cmpl $0x68747541, %%ebx\n\t"
|
||||
"jne TryCyrix\n\t"
|
||||
"cmpl $0x69746e65, %%edx\n\t"
|
||||
"jne TryCyrix\n\t"
|
||||
"cmpl $0x444d4163, %%ecx\n"
|
||||
"jne TryCyrix\n\t" "jmp AMD\n\t"
|
||||
/* Check for Cyrix */
|
||||
"\nTryCyrix:\n\t"
|
||||
"cmpl $0x69727943, %%ebx\n\t"
|
||||
"jne NotSupported2\n\t"
|
||||
"cmpl $0x736e4978, %%edx\n\t"
|
||||
"jne NotSupported3\n\t"
|
||||
"cmpl $0x64616574, %%ecx\n\t"
|
||||
"jne NotSupported4\n\t"
|
||||
/* Drop through to Cyrix... */
|
||||
/* Cyrix Section */
|
||||
/* See if extended CPUID level 80000001 is supported */
|
||||
/* The value of CPUID/80000001 for the 6x86MX is undefined
|
||||
according to the Cyrix CPU Detection Guide (Preliminary
|
||||
Rev. 1.01 table 1), so we'll check the value of eax for
|
||||
CPUID/0 to see if standard CPUID level 2 is supported.
|
||||
According to the table, the only CPU which supports level
|
||||
2 is also the only one which supports extended CPUID levels.
|
||||
*/
|
||||
"cmpl $0x2, %%eax\n\t" "jne MMXtest\n\t" /* Use standard CPUID instead */
|
||||
/* Extended CPUID supported (in theory), so get extended
|
||||
features */
|
||||
"movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%eax\n\t" /* Test for MMX */
|
||||
"jz NotSupported5\n\t" /* MMX not supported */
|
||||
"testl $0x01000000, %%eax\n\t" /* Test for Ext'd MMX */
|
||||
"jnz EMMXSupported\n\t" "movl $1, %0:\n\n\t" /* MMX Supported */
|
||||
"jmp Return\n\n" "EMMXSupported:\n\t" "movl $3, %0:\n\n\t" /* EMMX and MMX Supported */
|
||||
"jmp Return\n\t"
|
||||
/* AMD Section */
|
||||
"AMD:\n\t"
|
||||
/* See if extended CPUID is supported */
|
||||
"movl $0x80000000, %%eax\n\t" "cpuid\n\t" "cmpl $0x80000000, %%eax\n\t" "jl MMXtest\n\t" /* Use standard CPUID instead */
|
||||
/* Extended CPUID supported, so get extended features */
|
||||
"movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t" /* Test for MMX */
|
||||
"jz NotSupported6\n\t" /* MMX not supported */
|
||||
"testl $0x80000000, %%edx\n\t" /* Test for 3DNow! */
|
||||
"jnz ThreeDNowSupported\n\t" "movl $1, %0:\n\n\t" /* MMX Supported */
|
||||
"jmp Return\n\n" "ThreeDNowSupported:\n\t" "movl $5, %0:\n\n\t" /* 3DNow! and MMX Supported */
|
||||
"jmp Return\n\t"
|
||||
/* Intel Section */
|
||||
"Intel:\n\t"
|
||||
/* Check for MMX */
|
||||
"MMXtest:\n\t" "movl $1, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t" /* Test for MMX */
|
||||
"jz NotSupported7\n\t" /* MMX Not supported */
|
||||
"movl $1, %0:\n\n\t" /* MMX Supported */
|
||||
"jmp Return\n\t"
|
||||
/* Nothing supported */
|
||||
"\nNotSupported1:\n\t" "#movl $101, %0:\n\n\t" "\nNotSupported2:\n\t" "#movl $102, %0:\n\n\t" "\nNotSupported3:\n\t" "#movl $103, %0:\n\n\t" "\nNotSupported4:\n\t" "#movl $104, %0:\n\n\t" "\nNotSupported5:\n\t" "#movl $105, %0:\n\n\t" "\nNotSupported6:\n\t" "#movl $106, %0:\n\n\t" "\nNotSupported7:\n\t" "#movl $107, %0:\n\n\t" "movl $0, %0:\n\n\t" "Return:\n\t":"=a"(rval): /* no input */
|
||||
:"eax", "ebx", "ecx", "edx");
|
||||
|
||||
/* Return */
|
||||
return (rval);
|
||||
}
|
||||
|
||||
/* Function to test if mmx instructions are supported...
|
||||
*/
|
||||
inline extern int
|
||||
mmx_ok(void)
|
||||
{
|
||||
/* Returns 1 if MMX instructions are supported, 0 otherwise */
|
||||
return (mm_support() & 0x1);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Helper functions for the instruction macros that follow...
|
||||
(note that memory-to-register, m2r, instructions are nearly
|
||||
as efficient as register-to-register, r2r, instructions;
|
||||
however, memory-to-memory instructions are really simulated
|
||||
as a convenience, and are only 1/3 as efficient)
|
||||
*/
|
||||
#ifdef MMX_TRACE
|
||||
|
||||
/* Include the stuff for printing a trace to stderr...
|
||||
*/
|
||||
|
||||
#define mmx_i2r(op, imm, reg) \
|
||||
{ \
|
||||
mmx_t mmx_trace; \
|
||||
mmx_trace.uq = (imm); \
|
||||
printf(#op "_i2r(" #imm "=0x%08x%08x, ", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
printf(#reg "=0x%08x%08x) => ", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "X" (imm)); \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
printf(#reg "=0x%08x%08x\n", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
}
|
||||
|
||||
#define mmx_m2r(op, mem, reg) \
|
||||
{ \
|
||||
mmx_t mmx_trace; \
|
||||
mmx_trace = (mem); \
|
||||
printf(#op "_m2r(" #mem "=0x%08x%08x, ", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
printf(#reg "=0x%08x%08x) => ", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "X" (mem)); \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
printf(#reg "=0x%08x%08x\n", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
}
|
||||
|
||||
#define mmx_r2m(op, reg, mem) \
|
||||
{ \
|
||||
mmx_t mmx_trace; \
|
||||
__asm__ __volatile__ ("movq %%" #reg ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
printf(#op "_r2m(" #reg "=0x%08x%08x, ", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
mmx_trace = (mem); \
|
||||
printf(#mem "=0x%08x%08x) => ", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
__asm__ __volatile__ (#op " %%" #reg ", %0" \
|
||||
: "=X" (mem) \
|
||||
: /* nothing */ ); \
|
||||
mmx_trace = (mem); \
|
||||
printf(#mem "=0x%08x%08x\n", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
}
|
||||
|
||||
#define mmx_r2r(op, regs, regd) \
|
||||
{ \
|
||||
mmx_t mmx_trace; \
|
||||
__asm__ __volatile__ ("movq %%" #regs ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
printf(#op "_r2r(" #regs "=0x%08x%08x, ", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
__asm__ __volatile__ ("movq %%" #regd ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
printf(#regd "=0x%08x%08x) => ", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
|
||||
__asm__ __volatile__ ("movq %%" #regd ", %0" \
|
||||
: "=X" (mmx_trace) \
|
||||
: /* nothing */ ); \
|
||||
printf(#regd "=0x%08x%08x\n", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
}
|
||||
|
||||
#define mmx_m2m(op, mems, memd) \
|
||||
{ \
|
||||
mmx_t mmx_trace; \
|
||||
mmx_trace = (mems); \
|
||||
printf(#op "_m2m(" #mems "=0x%08x%08x, ", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
mmx_trace = (memd); \
|
||||
printf(#memd "=0x%08x%08x) => ", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
|
||||
#op " %1, %%mm0\n\t" \
|
||||
"movq %%mm0, %0" \
|
||||
: "=X" (memd) \
|
||||
: "X" (mems)); \
|
||||
mmx_trace = (memd); \
|
||||
printf(#memd "=0x%08x%08x\n", \
|
||||
mmx_trace.d[1], mmx_trace.d[0]); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* These macros are a lot simpler without the tracing...
|
||||
*/
|
||||
|
||||
#define mmx_i2r(op, imm, reg) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "X" (imm) )
|
||||
|
||||
#define mmx_m2r(op, mem, reg) \
|
||||
__asm__ __volatile__ (#op " %0, %%" #reg \
|
||||
: /* nothing */ \
|
||||
: "m" (mem))
|
||||
|
||||
#define mmx_r2m(op, reg, mem) \
|
||||
__asm__ __volatile__ (#op " %%" #reg ", %0" \
|
||||
: "=m" (mem) \
|
||||
: /* nothing */ )
|
||||
|
||||
#define mmx_r2r(op, regs, regd) \
|
||||
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
|
||||
|
||||
#define mmx_m2m(op, mems, memd) \
|
||||
__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
|
||||
#op " %1, %%mm0\n\t" \
|
||||
"movq %%mm0, %0" \
|
||||
: "=X" (memd) \
|
||||
: "X" (mems))
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
/* 1x64 MOVe Quadword
|
||||
(this is both a load and a store...
|
||||
in fact, it is the only way to store)
|
||||
*/
|
||||
#define movq_m2r(var, reg) mmx_m2r(movq, var, reg)
|
||||
#define movq_r2m(reg, var) mmx_r2m(movq, reg, var)
|
||||
#define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd)
|
||||
#define movq(vars, vard) \
|
||||
__asm__ __volatile__ ("movq %1, %%mm0\n\t" \
|
||||
"movq %%mm0, %0" \
|
||||
: "=X" (vard) \
|
||||
: "X" (vars))
|
||||
|
||||
|
||||
/* 1x32 MOVe Doubleword
|
||||
(like movq, this is both load and store...
|
||||
but is most useful for moving things between
|
||||
mmx registers and ordinary registers)
|
||||
*/
|
||||
#define movd_m2r(var, reg) mmx_m2r(movd, var, reg)
|
||||
#define movd_r2m(reg, var) mmx_r2m(movd, reg, var)
|
||||
#define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd)
|
||||
#define movd(vars, vard) \
|
||||
__asm__ __volatile__ ("movd %1, %%mm0\n\t" \
|
||||
"movd %%mm0, %0" \
|
||||
: "=X" (vard) \
|
||||
: "X" (vars))
|
||||
|
||||
|
||||
/* 2x32, 4x16, and 8x8 Parallel ADDs
|
||||
*/
|
||||
#define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg)
|
||||
#define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd)
|
||||
#define paddd(vars, vard) mmx_m2m(paddd, vars, vard)
|
||||
|
||||
#define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg)
|
||||
#define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd)
|
||||
#define paddw(vars, vard) mmx_m2m(paddw, vars, vard)
|
||||
|
||||
#define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg)
|
||||
#define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd)
|
||||
#define paddb(vars, vard) mmx_m2m(paddb, vars, vard)
|
||||
|
||||
|
||||
/* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic
|
||||
*/
|
||||
#define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg)
|
||||
#define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd)
|
||||
#define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard)
|
||||
|
||||
#define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg)
|
||||
#define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd)
|
||||
#define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard)
|
||||
|
||||
|
||||
/* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
|
||||
*/
|
||||
#define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg)
|
||||
#define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd)
|
||||
#define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard)
|
||||
|
||||
#define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg)
|
||||
#define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd)
|
||||
#define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard)
|
||||
|
||||
|
||||
/* 2x32, 4x16, and 8x8 Parallel SUBs
|
||||
*/
|
||||
#define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg)
|
||||
#define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd)
|
||||
#define psubd(vars, vard) mmx_m2m(psubd, vars, vard)
|
||||
|
||||
#define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg)
|
||||
#define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd)
|
||||
#define psubw(vars, vard) mmx_m2m(psubw, vars, vard)
|
||||
|
||||
#define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg)
|
||||
#define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd)
|
||||
#define psubb(vars, vard) mmx_m2m(psubb, vars, vard)
|
||||
|
||||
|
||||
/* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic
|
||||
*/
|
||||
#define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg)
|
||||
#define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd)
|
||||
#define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard)
|
||||
|
||||
#define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg)
|
||||
#define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd)
|
||||
#define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard)
|
||||
|
||||
|
||||
/* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
|
||||
*/
|
||||
#define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg)
|
||||
#define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd)
|
||||
#define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard)
|
||||
|
||||
#define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg)
|
||||
#define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd)
|
||||
#define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard)
|
||||
|
||||
|
||||
/* 4x16 Parallel MULs giving Low 4x16 portions of results
|
||||
*/
|
||||
#define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg)
|
||||
#define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd)
|
||||
#define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard)
|
||||
|
||||
|
||||
/* 4x16 Parallel MULs giving High 4x16 portions of results
|
||||
*/
|
||||
#define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg)
|
||||
#define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd)
|
||||
#define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard)
|
||||
|
||||
|
||||
/* 4x16->2x32 Parallel Mul-ADD
|
||||
(muls like pmullw, then adds adjacent 16-bit fields
|
||||
in the multiply result to make the final 2x32 result)
|
||||
*/
|
||||
#define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg)
|
||||
#define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd)
|
||||
#define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard)
|
||||
|
||||
|
||||
/* 1x64 bitwise AND
|
||||
*/
|
||||
#ifdef BROKEN_PAND
|
||||
#define pand_m2r(var, reg) \
|
||||
{ \
|
||||
mmx_m2r(pandn, (mmx_t) -1LL, reg); \
|
||||
mmx_m2r(pandn, var, reg); \
|
||||
}
|
||||
#define pand_r2r(regs, regd) \
|
||||
{ \
|
||||
mmx_m2r(pandn, (mmx_t) -1LL, regd); \
|
||||
mmx_r2r(pandn, regs, regd) \
|
||||
}
|
||||
#define pand(vars, vard) \
|
||||
{ \
|
||||
movq_m2r(vard, mm0); \
|
||||
mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
|
||||
mmx_m2r(pandn, vars, mm0); \
|
||||
movq_r2m(mm0, vard); \
|
||||
}
|
||||
#else
|
||||
#define pand_m2r(var, reg) mmx_m2r(pand, var, reg)
|
||||
#define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd)
|
||||
#define pand(vars, vard) mmx_m2m(pand, vars, vard)
|
||||
#endif
|
||||
|
||||
|
||||
/* 1x64 bitwise AND with Not the destination
|
||||
*/
|
||||
#define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg)
|
||||
#define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd)
|
||||
#define pandn(vars, vard) mmx_m2m(pandn, vars, vard)
|
||||
|
||||
|
||||
/* 1x64 bitwise OR
|
||||
*/
|
||||
#define por_m2r(var, reg) mmx_m2r(por, var, reg)
|
||||
#define por_r2r(regs, regd) mmx_r2r(por, regs, regd)
|
||||
#define por(vars, vard) mmx_m2m(por, vars, vard)
|
||||
|
||||
|
||||
/* 1x64 bitwise eXclusive OR
|
||||
*/
|
||||
#define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg)
|
||||
#define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd)
|
||||
#define pxor(vars, vard) mmx_m2m(pxor, vars, vard)
|
||||
|
||||
|
||||
/* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
|
||||
(resulting fields are either 0 or -1)
|
||||
*/
|
||||
#define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg)
|
||||
#define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd)
|
||||
#define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard)
|
||||
|
||||
#define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg)
|
||||
#define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd)
|
||||
#define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard)
|
||||
|
||||
#define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg)
|
||||
#define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd)
|
||||
#define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard)
|
||||
|
||||
|
||||
/* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
|
||||
(resulting fields are either 0 or -1)
|
||||
*/
|
||||
#define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg)
|
||||
#define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd)
|
||||
#define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard)
|
||||
|
||||
#define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg)
|
||||
#define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd)
|
||||
#define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard)
|
||||
|
||||
#define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg)
|
||||
#define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd)
|
||||
#define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard)
|
||||
|
||||
|
||||
/* 1x64, 2x32, and 4x16 Parallel Shift Left Logical
|
||||
*/
|
||||
#define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg)
|
||||
#define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg)
|
||||
#define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd)
|
||||
#define psllq(vars, vard) mmx_m2m(psllq, vars, vard)
|
||||
|
||||
#define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg)
|
||||
#define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg)
|
||||
#define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd)
|
||||
#define pslld(vars, vard) mmx_m2m(pslld, vars, vard)
|
||||
|
||||
#define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg)
|
||||
#define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg)
|
||||
#define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd)
|
||||
#define psllw(vars, vard) mmx_m2m(psllw, vars, vard)
|
||||
|
||||
|
||||
/* 1x64, 2x32, and 4x16 Parallel Shift Right Logical
|
||||
*/
|
||||
#define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg)
|
||||
#define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg)
|
||||
#define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd)
|
||||
#define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard)
|
||||
|
||||
#define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg)
|
||||
#define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg)
|
||||
#define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd)
|
||||
#define psrld(vars, vard) mmx_m2m(psrld, vars, vard)
|
||||
|
||||
#define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg)
|
||||
#define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg)
|
||||
#define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd)
|
||||
#define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard)
|
||||
|
||||
|
||||
/* 2x32 and 4x16 Parallel Shift Right Arithmetic
|
||||
*/
|
||||
#define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg)
|
||||
#define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg)
|
||||
#define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd)
|
||||
#define psrad(vars, vard) mmx_m2m(psrad, vars, vard)
|
||||
|
||||
#define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg)
|
||||
#define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg)
|
||||
#define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd)
|
||||
#define psraw(vars, vard) mmx_m2m(psraw, vars, vard)
|
||||
|
||||
|
||||
/* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
|
||||
(packs source and dest fields into dest in that order)
|
||||
*/
|
||||
#define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg)
|
||||
#define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
|
||||
#define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard)
|
||||
|
||||
#define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg)
|
||||
#define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
|
||||
#define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard)
|
||||
|
||||
|
||||
/* 4x16->8x8 PACK and Unsigned Saturate
|
||||
(packs source and dest fields into dest in that order)
|
||||
*/
|
||||
#define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg)
|
||||
#define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
|
||||
#define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard)
|
||||
|
||||
|
||||
/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
|
||||
(interleaves low half of dest with low half of source
|
||||
as padding in each result field)
|
||||
*/
|
||||
#define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg)
|
||||
#define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
|
||||
#define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard)
|
||||
|
||||
#define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg)
|
||||
#define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
|
||||
#define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard)
|
||||
|
||||
#define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg)
|
||||
#define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
|
||||
#define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard)
|
||||
|
||||
|
||||
/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
|
||||
(interleaves high half of dest with high half of source
|
||||
as padding in each result field)
|
||||
*/
|
||||
#define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg)
|
||||
#define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
|
||||
#define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard)
|
||||
|
||||
#define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg)
|
||||
#define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
|
||||
#define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard)
|
||||
|
||||
#define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg)
|
||||
#define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
|
||||
#define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard)
|
||||
|
||||
|
||||
/* Empty MMx State
|
||||
(used to clean-up when going from mmx to float use
|
||||
of the registers that are shared by both; note that
|
||||
there is no float-to-mmx operation needed, because
|
||||
only the float tag word info is corruptible)
|
||||
*/
|
||||
#ifdef MMX_TRACE
|
||||
|
||||
#define emms() \
|
||||
{ \
|
||||
printf("emms()\n"); \
|
||||
__asm__ __volatile__ ("emms"); \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define emms() __asm__ __volatile__ ("emms")
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
/* vi: set ts=4 sw=4 expandtab: */
|
|
@ -1349,13 +1349,37 @@ GL_SetupCopy(SDL_Renderer * renderer, SDL_Texture * texture)
|
|||
|
||||
GL_SetBlendMode(data, texture->blendMode);
|
||||
|
||||
if (texturedata->yuv) {
|
||||
GL_SetShader(data, SHADER_YUV);
|
||||
} else if (texturedata->nv12) {
|
||||
if (texture->format == SDL_PIXELFORMAT_NV12) {
|
||||
GL_SetShader(data, SHADER_NV12);
|
||||
} else {
|
||||
GL_SetShader(data, SHADER_NV21);
|
||||
if (texturedata->yuv || texturedata->nv12) {
|
||||
switch (SDL_GetYUVConversionModeForResolution(texture->w, texture->h)) {
|
||||
case SDL_YUV_CONVERSION_JPEG:
|
||||
if (texturedata->yuv) {
|
||||
GL_SetShader(data, SHADER_YUV_JPEG);
|
||||
} else if (texture->format == SDL_PIXELFORMAT_NV12) {
|
||||
GL_SetShader(data, SHADER_NV12_JPEG);
|
||||
} else {
|
||||
GL_SetShader(data, SHADER_NV21_JPEG);
|
||||
}
|
||||
break;
|
||||
case SDL_YUV_CONVERSION_BT601:
|
||||
if (texturedata->yuv) {
|
||||
GL_SetShader(data, SHADER_YUV_BT601);
|
||||
} else if (texture->format == SDL_PIXELFORMAT_NV12) {
|
||||
GL_SetShader(data, SHADER_NV12_BT601);
|
||||
} else {
|
||||
GL_SetShader(data, SHADER_NV21_BT601);
|
||||
}
|
||||
break;
|
||||
case SDL_YUV_CONVERSION_BT709:
|
||||
if (texturedata->yuv) {
|
||||
GL_SetShader(data, SHADER_YUV_BT709);
|
||||
} else if (texture->format == SDL_PIXELFORMAT_NV12) {
|
||||
GL_SetShader(data, SHADER_NV12_BT709);
|
||||
} else {
|
||||
GL_SetShader(data, SHADER_NV21_BT709);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
return SDL_SetError("Unsupported YUV conversion mode");
|
||||
}
|
||||
} else {
|
||||
GL_SetShader(data, SHADER_RGB);
|
||||
|
|
|
@ -62,6 +62,151 @@ struct GL_ShaderContext
|
|||
GL_ShaderData shaders[NUM_SHADERS];
|
||||
};
|
||||
|
||||
#define COLOR_VERTEX_SHADER \
|
||||
"varying vec4 v_color;\n" \
|
||||
"\n" \
|
||||
"void main()\n" \
|
||||
"{\n" \
|
||||
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n" \
|
||||
" v_color = gl_Color;\n" \
|
||||
"}" \
|
||||
|
||||
#define TEXTURE_VERTEX_SHADER \
|
||||
"varying vec4 v_color;\n" \
|
||||
"varying vec2 v_texCoord;\n" \
|
||||
"\n" \
|
||||
"void main()\n" \
|
||||
"{\n" \
|
||||
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n" \
|
||||
" v_color = gl_Color;\n" \
|
||||
" v_texCoord = vec2(gl_MultiTexCoord0);\n" \
|
||||
"}" \
|
||||
|
||||
#define JPEG_SHADER_CONSTANTS \
|
||||
"// YUV offset \n" \
|
||||
"const vec3 offset = vec3(0, -0.501960814, -0.501960814);\n" \
|
||||
"\n" \
|
||||
"// RGB coefficients \n" \
|
||||
"const vec3 Rcoeff = vec3(1, 0.000, 1.402);\n" \
|
||||
"const vec3 Gcoeff = vec3(1, -0.3441, -0.7141);\n" \
|
||||
"const vec3 Bcoeff = vec3(1, 1.772, 0.000);\n" \
|
||||
|
||||
#define BT601_SHADER_CONSTANTS \
|
||||
"// YUV offset \n" \
|
||||
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n" \
|
||||
"\n" \
|
||||
"// RGB coefficients \n" \
|
||||
"const vec3 Rcoeff = vec3(1.1644, 0.000, 1.596);\n" \
|
||||
"const vec3 Gcoeff = vec3(1.1644, -0.3918, -0.813);\n" \
|
||||
"const vec3 Bcoeff = vec3(1.1644, 2.0172, 0.000);\n" \
|
||||
|
||||
#define BT709_SHADER_CONSTANTS \
|
||||
"// YUV offset \n" \
|
||||
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n" \
|
||||
"\n" \
|
||||
"// RGB coefficients \n" \
|
||||
"const vec3 Rcoeff = vec3(1.1644, 0.000, 1.7927);\n" \
|
||||
"const vec3 Gcoeff = vec3(1.1644, -0.2132, -0.5329);\n" \
|
||||
"const vec3 Bcoeff = vec3(1.1644, 2.1124, 0.000);\n" \
|
||||
|
||||
#define YUV_SHADER_PROLOGUE \
|
||||
"varying vec4 v_color;\n" \
|
||||
"varying vec2 v_texCoord;\n" \
|
||||
"uniform sampler2D tex0; // Y \n" \
|
||||
"uniform sampler2D tex1; // U \n" \
|
||||
"uniform sampler2D tex2; // V \n" \
|
||||
"\n" \
|
||||
|
||||
#define YUV_SHADER_BODY \
|
||||
"\n" \
|
||||
"void main()\n" \
|
||||
"{\n" \
|
||||
" vec2 tcoord;\n" \
|
||||
" vec3 yuv, rgb;\n" \
|
||||
"\n" \
|
||||
" // Get the Y value \n" \
|
||||
" tcoord = v_texCoord;\n" \
|
||||
" yuv.x = texture2D(tex0, tcoord).r;\n" \
|
||||
"\n" \
|
||||
" // Get the U and V values \n" \
|
||||
" tcoord *= UVCoordScale;\n" \
|
||||
" yuv.y = texture2D(tex1, tcoord).r;\n" \
|
||||
" yuv.z = texture2D(tex2, tcoord).r;\n" \
|
||||
"\n" \
|
||||
" // Do the color transform \n" \
|
||||
" yuv += offset;\n" \
|
||||
" rgb.r = dot(yuv, Rcoeff);\n" \
|
||||
" rgb.g = dot(yuv, Gcoeff);\n" \
|
||||
" rgb.b = dot(yuv, Bcoeff);\n" \
|
||||
"\n" \
|
||||
" // That was easy. :) \n" \
|
||||
" gl_FragColor = vec4(rgb, 1.0) * v_color;\n" \
|
||||
"}" \
|
||||
|
||||
#define NV12_SHADER_PROLOGUE \
|
||||
"varying vec4 v_color;\n" \
|
||||
"varying vec2 v_texCoord;\n" \
|
||||
"uniform sampler2D tex0; // Y \n" \
|
||||
"uniform sampler2D tex1; // U/V \n" \
|
||||
"\n" \
|
||||
|
||||
#define NV12_SHADER_BODY \
|
||||
"\n" \
|
||||
"void main()\n" \
|
||||
"{\n" \
|
||||
" vec2 tcoord;\n" \
|
||||
" vec3 yuv, rgb;\n" \
|
||||
"\n" \
|
||||
" // Get the Y value \n" \
|
||||
" tcoord = v_texCoord;\n" \
|
||||
" yuv.x = texture2D(tex0, tcoord).r;\n" \
|
||||
"\n" \
|
||||
" // Get the U and V values \n" \
|
||||
" tcoord *= UVCoordScale;\n" \
|
||||
" yuv.yz = texture2D(tex1, tcoord).ra;\n" \
|
||||
"\n" \
|
||||
" // Do the color transform \n" \
|
||||
" yuv += offset;\n" \
|
||||
" rgb.r = dot(yuv, Rcoeff);\n" \
|
||||
" rgb.g = dot(yuv, Gcoeff);\n" \
|
||||
" rgb.b = dot(yuv, Bcoeff);\n" \
|
||||
"\n" \
|
||||
" // That was easy. :) \n" \
|
||||
" gl_FragColor = vec4(rgb, 1.0) * v_color;\n" \
|
||||
"}" \
|
||||
|
||||
#define NV21_SHADER_PROLOGUE \
|
||||
"varying vec4 v_color;\n" \
|
||||
"varying vec2 v_texCoord;\n" \
|
||||
"uniform sampler2D tex0; // Y \n" \
|
||||
"uniform sampler2D tex1; // U/V \n" \
|
||||
"\n" \
|
||||
|
||||
#define NV21_SHADER_BODY \
|
||||
"\n" \
|
||||
"void main()\n" \
|
||||
"{\n" \
|
||||
" vec2 tcoord;\n" \
|
||||
" vec3 yuv, rgb;\n" \
|
||||
"\n" \
|
||||
" // Get the Y value \n" \
|
||||
" tcoord = v_texCoord;\n" \
|
||||
" yuv.x = texture2D(tex0, tcoord).r;\n" \
|
||||
"\n" \
|
||||
" // Get the U and V values \n" \
|
||||
" tcoord *= UVCoordScale;\n" \
|
||||
" yuv.yz = texture2D(tex1, tcoord).ar;\n" \
|
||||
"\n" \
|
||||
" // Do the color transform \n" \
|
||||
" yuv += offset;\n" \
|
||||
" rgb.r = dot(yuv, Rcoeff);\n" \
|
||||
" rgb.g = dot(yuv, Gcoeff);\n" \
|
||||
" rgb.b = dot(yuv, Bcoeff);\n" \
|
||||
"\n" \
|
||||
" // That was easy. :) \n" \
|
||||
" gl_FragColor = vec4(rgb, 1.0) * v_color;\n" \
|
||||
"}" \
|
||||
|
||||
/*
|
||||
* NOTE: Always use sampler2D, etc here. We'll #define them to the
|
||||
* texture_rectangle versions if we choose to use that extension.
|
||||
|
@ -74,13 +219,7 @@ static const char *shader_source[NUM_SHADERS][2] =
|
|||
/* SHADER_SOLID */
|
||||
{
|
||||
/* vertex shader */
|
||||
"varying vec4 v_color;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n"
|
||||
" v_color = gl_Color;\n"
|
||||
"}",
|
||||
COLOR_VERTEX_SHADER,
|
||||
/* fragment shader */
|
||||
"varying vec4 v_color;\n"
|
||||
"\n"
|
||||
|
@ -93,15 +232,7 @@ static const char *shader_source[NUM_SHADERS][2] =
|
|||
/* SHADER_RGB */
|
||||
{
|
||||
/* vertex shader */
|
||||
"varying vec4 v_color;\n"
|
||||
"varying vec2 v_texCoord;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n"
|
||||
" v_color = gl_Color;\n"
|
||||
" v_texCoord = vec2(gl_MultiTexCoord0);\n"
|
||||
"}",
|
||||
TEXTURE_VERTEX_SHADER,
|
||||
/* fragment shader */
|
||||
"varying vec4 v_color;\n"
|
||||
"varying vec2 v_texCoord;\n"
|
||||
|
@ -113,156 +244,86 @@ static const char *shader_source[NUM_SHADERS][2] =
|
|||
"}"
|
||||
},
|
||||
|
||||
/* SHADER_YUV */
|
||||
/* SHADER_YUV_JPEG */
|
||||
{
|
||||
/* vertex shader */
|
||||
"varying vec4 v_color;\n"
|
||||
"varying vec2 v_texCoord;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n"
|
||||
" v_color = gl_Color;\n"
|
||||
" v_texCoord = vec2(gl_MultiTexCoord0);\n"
|
||||
"}",
|
||||
TEXTURE_VERTEX_SHADER,
|
||||
/* fragment shader */
|
||||
"varying vec4 v_color;\n"
|
||||
"varying vec2 v_texCoord;\n"
|
||||
"uniform sampler2D tex0; // Y \n"
|
||||
"uniform sampler2D tex1; // U \n"
|
||||
"uniform sampler2D tex2; // V \n"
|
||||
"\n"
|
||||
"// YUV offset \n"
|
||||
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n"
|
||||
"\n"
|
||||
"// RGB coefficients \n"
|
||||
"const vec3 Rcoeff = vec3(1.164, 0.000, 1.596);\n"
|
||||
"const vec3 Gcoeff = vec3(1.164, -0.391, -0.813);\n"
|
||||
"const vec3 Bcoeff = vec3(1.164, 2.018, 0.000);\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" vec2 tcoord;\n"
|
||||
" vec3 yuv, rgb;\n"
|
||||
"\n"
|
||||
" // Get the Y value \n"
|
||||
" tcoord = v_texCoord;\n"
|
||||
" yuv.x = texture2D(tex0, tcoord).r;\n"
|
||||
"\n"
|
||||
" // Get the U and V values \n"
|
||||
" tcoord *= UVCoordScale;\n"
|
||||
" yuv.y = texture2D(tex1, tcoord).r;\n"
|
||||
" yuv.z = texture2D(tex2, tcoord).r;\n"
|
||||
"\n"
|
||||
" // Do the color transform \n"
|
||||
" yuv += offset;\n"
|
||||
" rgb.r = dot(yuv, Rcoeff);\n"
|
||||
" rgb.g = dot(yuv, Gcoeff);\n"
|
||||
" rgb.b = dot(yuv, Bcoeff);\n"
|
||||
"\n"
|
||||
" // That was easy. :) \n"
|
||||
" gl_FragColor = vec4(rgb, 1.0) * v_color;\n"
|
||||
"}"
|
||||
YUV_SHADER_PROLOGUE
|
||||
JPEG_SHADER_CONSTANTS
|
||||
YUV_SHADER_BODY
|
||||
},
|
||||
|
||||
/* SHADER_NV12 */
|
||||
/* SHADER_YUV_BT601 */
|
||||
{
|
||||
/* vertex shader */
|
||||
"varying vec4 v_color;\n"
|
||||
"varying vec2 v_texCoord;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n"
|
||||
" v_color = gl_Color;\n"
|
||||
" v_texCoord = vec2(gl_MultiTexCoord0);\n"
|
||||
"}",
|
||||
TEXTURE_VERTEX_SHADER,
|
||||
/* fragment shader */
|
||||
"varying vec4 v_color;\n"
|
||||
"varying vec2 v_texCoord;\n"
|
||||
"uniform sampler2D tex0; // Y \n"
|
||||
"uniform sampler2D tex1; // U/V \n"
|
||||
"\n"
|
||||
"// YUV offset \n"
|
||||
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n"
|
||||
"\n"
|
||||
"// RGB coefficients \n"
|
||||
"const vec3 Rcoeff = vec3(1.164, 0.000, 1.596);\n"
|
||||
"const vec3 Gcoeff = vec3(1.164, -0.391, -0.813);\n"
|
||||
"const vec3 Bcoeff = vec3(1.164, 2.018, 0.000);\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" vec2 tcoord;\n"
|
||||
" vec3 yuv, rgb;\n"
|
||||
"\n"
|
||||
" // Get the Y value \n"
|
||||
" tcoord = v_texCoord;\n"
|
||||
" yuv.x = texture2D(tex0, tcoord).r;\n"
|
||||
"\n"
|
||||
" // Get the U and V values \n"
|
||||
" tcoord *= UVCoordScale;\n"
|
||||
" yuv.yz = texture2D(tex1, tcoord).ra;\n"
|
||||
"\n"
|
||||
" // Do the color transform \n"
|
||||
" yuv += offset;\n"
|
||||
" rgb.r = dot(yuv, Rcoeff);\n"
|
||||
" rgb.g = dot(yuv, Gcoeff);\n"
|
||||
" rgb.b = dot(yuv, Bcoeff);\n"
|
||||
"\n"
|
||||
" // That was easy. :) \n"
|
||||
" gl_FragColor = vec4(rgb, 1.0) * v_color;\n"
|
||||
"}"
|
||||
YUV_SHADER_PROLOGUE
|
||||
BT601_SHADER_CONSTANTS
|
||||
YUV_SHADER_BODY
|
||||
},
|
||||
|
||||
/* SHADER_NV21 */
|
||||
/* SHADER_YUV_BT709 */
|
||||
{
|
||||
/* vertex shader */
|
||||
"varying vec4 v_color;\n"
|
||||
"varying vec2 v_texCoord;\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n"
|
||||
" v_color = gl_Color;\n"
|
||||
" v_texCoord = vec2(gl_MultiTexCoord0);\n"
|
||||
"}",
|
||||
TEXTURE_VERTEX_SHADER,
|
||||
/* fragment shader */
|
||||
"varying vec4 v_color;\n"
|
||||
"varying vec2 v_texCoord;\n"
|
||||
"uniform sampler2D tex0; // Y \n"
|
||||
"uniform sampler2D tex1; // U/V \n"
|
||||
"\n"
|
||||
"// YUV offset \n"
|
||||
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n"
|
||||
"\n"
|
||||
"// RGB coefficients \n"
|
||||
"const vec3 Rcoeff = vec3(1.164, 0.000, 1.596);\n"
|
||||
"const vec3 Gcoeff = vec3(1.164, -0.391, -0.813);\n"
|
||||
"const vec3 Bcoeff = vec3(1.164, 2.018, 0.000);\n"
|
||||
"\n"
|
||||
"void main()\n"
|
||||
"{\n"
|
||||
" vec2 tcoord;\n"
|
||||
" vec3 yuv, rgb;\n"
|
||||
"\n"
|
||||
" // Get the Y value \n"
|
||||
" tcoord = v_texCoord;\n"
|
||||
" yuv.x = texture2D(tex0, tcoord).r;\n"
|
||||
"\n"
|
||||
" // Get the U and V values \n"
|
||||
" tcoord *= UVCoordScale;\n"
|
||||
" yuv.yz = texture2D(tex1, tcoord).ar;\n"
|
||||
"\n"
|
||||
" // Do the color transform \n"
|
||||
" yuv += offset;\n"
|
||||
" rgb.r = dot(yuv, Rcoeff);\n"
|
||||
" rgb.g = dot(yuv, Gcoeff);\n"
|
||||
" rgb.b = dot(yuv, Bcoeff);\n"
|
||||
"\n"
|
||||
" // That was easy. :) \n"
|
||||
" gl_FragColor = vec4(rgb, 1.0) * v_color;\n"
|
||||
"}"
|
||||
YUV_SHADER_PROLOGUE
|
||||
BT709_SHADER_CONSTANTS
|
||||
YUV_SHADER_BODY
|
||||
},
|
||||
/* SHADER_NV12_JPEG */
|
||||
{
|
||||
/* vertex shader */
|
||||
TEXTURE_VERTEX_SHADER,
|
||||
/* fragment shader */
|
||||
NV12_SHADER_PROLOGUE
|
||||
JPEG_SHADER_CONSTANTS
|
||||
NV12_SHADER_BODY
|
||||
},
|
||||
/* SHADER_NV12_BT601 */
|
||||
{
|
||||
/* vertex shader */
|
||||
TEXTURE_VERTEX_SHADER,
|
||||
/* fragment shader */
|
||||
NV12_SHADER_PROLOGUE
|
||||
BT601_SHADER_CONSTANTS
|
||||
NV12_SHADER_BODY
|
||||
},
|
||||
/* SHADER_NV12_BT709 */
|
||||
{
|
||||
/* vertex shader */
|
||||
TEXTURE_VERTEX_SHADER,
|
||||
/* fragment shader */
|
||||
NV12_SHADER_PROLOGUE
|
||||
BT709_SHADER_CONSTANTS
|
||||
NV12_SHADER_BODY
|
||||
},
|
||||
/* SHADER_NV21_JPEG */
|
||||
{
|
||||
/* vertex shader */
|
||||
TEXTURE_VERTEX_SHADER,
|
||||
/* fragment shader */
|
||||
NV21_SHADER_PROLOGUE
|
||||
JPEG_SHADER_CONSTANTS
|
||||
NV21_SHADER_BODY
|
||||
},
|
||||
/* SHADER_NV21_BT601 */
|
||||
{
|
||||
/* vertex shader */
|
||||
TEXTURE_VERTEX_SHADER,
|
||||
/* fragment shader */
|
||||
NV21_SHADER_PROLOGUE
|
||||
BT601_SHADER_CONSTANTS
|
||||
NV21_SHADER_BODY
|
||||
},
|
||||
/* SHADER_NV21_BT709 */
|
||||
{
|
||||
/* vertex shader */
|
||||
TEXTURE_VERTEX_SHADER,
|
||||
/* fragment shader */
|
||||
NV21_SHADER_PROLOGUE
|
||||
BT709_SHADER_CONSTANTS
|
||||
NV21_SHADER_BODY
|
||||
},
|
||||
};
|
||||
|
||||
|
|
|
@ -26,9 +26,15 @@ typedef enum {
|
|||
SHADER_NONE,
|
||||
SHADER_SOLID,
|
||||
SHADER_RGB,
|
||||
SHADER_YUV,
|
||||
SHADER_NV12,
|
||||
SHADER_NV21,
|
||||
SHADER_YUV_JPEG,
|
||||
SHADER_YUV_BT601,
|
||||
SHADER_YUV_BT709,
|
||||
SHADER_NV12_JPEG,
|
||||
SHADER_NV12_BT601,
|
||||
SHADER_NV12_BT709,
|
||||
SHADER_NV21_JPEG,
|
||||
SHADER_NV21_BT601,
|
||||
SHADER_NV21_BT709,
|
||||
NUM_SHADERS
|
||||
} GL_Shader;
|
||||
|
||||
|
|
|
@ -950,7 +950,7 @@ static void GLES2_EvictShader(SDL_Renderer *renderer, GLES2_ShaderCacheEntry *en
|
|||
static GLES2_ProgramCacheEntry *GLES2_CacheProgram(SDL_Renderer *renderer,
|
||||
GLES2_ShaderCacheEntry *vertex,
|
||||
GLES2_ShaderCacheEntry *fragment);
|
||||
static int GLES2_SelectProgram(SDL_Renderer *renderer, GLES2_ImageSource source);
|
||||
static int GLES2_SelectProgram(SDL_Renderer *renderer, GLES2_ImageSource source, int w, int h);
|
||||
|
||||
static GLES2_ProgramCacheEntry *
|
||||
GLES2_CacheProgram(SDL_Renderer *renderer, GLES2_ShaderCacheEntry *vertex,
|
||||
|
@ -1189,7 +1189,7 @@ GLES2_EvictShader(SDL_Renderer *renderer, GLES2_ShaderCacheEntry *entry)
|
|||
}
|
||||
|
||||
static int
|
||||
GLES2_SelectProgram(SDL_Renderer *renderer, GLES2_ImageSource source)
|
||||
GLES2_SelectProgram(SDL_Renderer *renderer, GLES2_ImageSource source, int w, int h)
|
||||
{
|
||||
GLES2_DriverContext *data = (GLES2_DriverContext *)renderer->driverdata;
|
||||
GLES2_ShaderCacheEntry *vertex = NULL;
|
||||
|
@ -1216,13 +1216,52 @@ GLES2_SelectProgram(SDL_Renderer *renderer, GLES2_ImageSource source)
|
|||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_BGR_SRC;
|
||||
break;
|
||||
case GLES2_IMAGESOURCE_TEXTURE_YUV:
|
||||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_YUV_SRC;
|
||||
switch (SDL_GetYUVConversionModeForResolution(w, h)) {
|
||||
case SDL_YUV_CONVERSION_JPEG:
|
||||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_YUV_JPEG_SRC;
|
||||
break;
|
||||
case SDL_YUV_CONVERSION_BT601:
|
||||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_YUV_BT601_SRC;
|
||||
break;
|
||||
case SDL_YUV_CONVERSION_BT709:
|
||||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_YUV_BT709_SRC;
|
||||
break;
|
||||
default:
|
||||
SDL_SetError("Unsupported YUV conversion mode: %d\n", SDL_GetYUVConversionModeForResolution(w, h));
|
||||
goto fault;
|
||||
}
|
||||
break;
|
||||
case GLES2_IMAGESOURCE_TEXTURE_NV12:
|
||||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV12_SRC;
|
||||
switch (SDL_GetYUVConversionModeForResolution(w, h)) {
|
||||
case SDL_YUV_CONVERSION_JPEG:
|
||||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV12_JPEG_SRC;
|
||||
break;
|
||||
case SDL_YUV_CONVERSION_BT601:
|
||||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV12_BT601_SRC;
|
||||
break;
|
||||
case SDL_YUV_CONVERSION_BT709:
|
||||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV12_BT709_SRC;
|
||||
break;
|
||||
default:
|
||||
SDL_SetError("Unsupported YUV conversion mode: %d\n", SDL_GetYUVConversionModeForResolution(w, h));
|
||||
goto fault;
|
||||
}
|
||||
break;
|
||||
case GLES2_IMAGESOURCE_TEXTURE_NV21:
|
||||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV21_SRC;
|
||||
switch (SDL_GetYUVConversionModeForResolution(w, h)) {
|
||||
case SDL_YUV_CONVERSION_JPEG:
|
||||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV21_JPEG_SRC;
|
||||
break;
|
||||
case SDL_YUV_CONVERSION_BT601:
|
||||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV21_BT601_SRC;
|
||||
break;
|
||||
case SDL_YUV_CONVERSION_BT709:
|
||||
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV21_BT709_SRC;
|
||||
break;
|
||||
default:
|
||||
SDL_SetError("Unsupported YUV conversion mode: %d\n", SDL_GetYUVConversionModeForResolution(w, h));
|
||||
goto fault;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
goto fault;
|
||||
|
@ -1445,7 +1484,7 @@ GLES2_SetDrawingState(SDL_Renderer * renderer)
|
|||
GLES2_SetTexCoords(data, SDL_FALSE);
|
||||
|
||||
/* Activate an appropriate shader and set the projection matrix */
|
||||
if (GLES2_SelectProgram(renderer, GLES2_IMAGESOURCE_SOLID) < 0) {
|
||||
if (GLES2_SelectProgram(renderer, GLES2_IMAGESOURCE_SOLID, 0, 0) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
@ -1707,7 +1746,7 @@ GLES2_SetupCopy(SDL_Renderer *renderer, SDL_Texture *texture)
|
|||
}
|
||||
}
|
||||
|
||||
if (GLES2_SelectProgram(renderer, sourceType) < 0) {
|
||||
if (GLES2_SelectProgram(renderer, sourceType, texture->w, texture->h) < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
|
|
|
@ -126,73 +126,154 @@ static const Uint8 GLES2_FragmentSrc_TextureBGRSrc_[] = " \
|
|||
} \
|
||||
";
|
||||
|
||||
#define JPEG_SHADER_CONSTANTS \
|
||||
"// YUV offset \n" \
|
||||
"const vec3 offset = vec3(0, -0.501960814, -0.501960814);\n" \
|
||||
"\n" \
|
||||
"// RGB coefficients \n" \
|
||||
"const mat3 matrix = mat3( 1, 1, 1,\n" \
|
||||
" 0, -0.3441, 1.772,\n" \
|
||||
" 1.402, -0.7141, 0);\n" \
|
||||
|
||||
#define BT601_SHADER_CONSTANTS \
|
||||
"// YUV offset \n" \
|
||||
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n" \
|
||||
"\n" \
|
||||
"// RGB coefficients \n" \
|
||||
"const mat3 matrix = mat3( 1.1644, 1.1644, 1.1644,\n" \
|
||||
" 0, -0.3918, 2.0172,\n" \
|
||||
" 1.596, -0.813, 0);\n" \
|
||||
|
||||
#define BT709_SHADER_CONSTANTS \
|
||||
"// YUV offset \n" \
|
||||
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n" \
|
||||
"\n" \
|
||||
"// RGB coefficients \n" \
|
||||
"const mat3 matrix = mat3( 1.1644, 1.1644, 1.1644,\n" \
|
||||
" 0, -0.2132, 2.1124,\n" \
|
||||
" 1.7927, -0.5329, 0);\n" \
|
||||
|
||||
|
||||
#define YUV_SHADER_PROLOGUE \
|
||||
"precision mediump float;\n" \
|
||||
"uniform sampler2D u_texture;\n" \
|
||||
"uniform sampler2D u_texture_u;\n" \
|
||||
"uniform sampler2D u_texture_v;\n" \
|
||||
"uniform vec4 u_modulation;\n" \
|
||||
"varying vec2 v_texCoord;\n" \
|
||||
"\n" \
|
||||
|
||||
#define YUV_SHADER_BODY \
|
||||
"\n" \
|
||||
"void main()\n" \
|
||||
"{\n" \
|
||||
" mediump vec3 yuv;\n" \
|
||||
" lowp vec3 rgb;\n" \
|
||||
"\n" \
|
||||
" // Get the YUV values \n" \
|
||||
" yuv.x = texture2D(u_texture, v_texCoord).r;\n" \
|
||||
" yuv.y = texture2D(u_texture_u, v_texCoord).r;\n" \
|
||||
" yuv.z = texture2D(u_texture_v, v_texCoord).r;\n" \
|
||||
"\n" \
|
||||
" // Do the color transform \n" \
|
||||
" yuv += offset;\n" \
|
||||
" rgb = matrix * yuv;\n" \
|
||||
"\n" \
|
||||
" // That was easy. :) \n" \
|
||||
" gl_FragColor = vec4(rgb, 1);\n" \
|
||||
" gl_FragColor *= u_modulation;\n" \
|
||||
"}" \
|
||||
|
||||
#define NV12_SHADER_BODY \
|
||||
"\n" \
|
||||
"void main()\n" \
|
||||
"{\n" \
|
||||
" mediump vec3 yuv;\n" \
|
||||
" lowp vec3 rgb;\n" \
|
||||
"\n" \
|
||||
" // Get the YUV values \n" \
|
||||
" yuv.x = texture2D(u_texture, v_texCoord).r;\n" \
|
||||
" yuv.yz = texture2D(u_texture_u, v_texCoord).ra;\n" \
|
||||
"\n" \
|
||||
" // Do the color transform \n" \
|
||||
" yuv += offset;\n" \
|
||||
" rgb = matrix * yuv;\n" \
|
||||
"\n" \
|
||||
" // That was easy. :) \n" \
|
||||
" gl_FragColor = vec4(rgb, 1);\n" \
|
||||
" gl_FragColor *= u_modulation;\n" \
|
||||
"}" \
|
||||
|
||||
#define NV21_SHADER_BODY \
|
||||
"\n" \
|
||||
"void main()\n" \
|
||||
"{\n" \
|
||||
" mediump vec3 yuv;\n" \
|
||||
" lowp vec3 rgb;\n" \
|
||||
"\n" \
|
||||
" // Get the YUV values \n" \
|
||||
" yuv.x = texture2D(u_texture, v_texCoord).r;\n" \
|
||||
" yuv.yz = texture2D(u_texture_u, v_texCoord).ar;\n" \
|
||||
"\n" \
|
||||
" // Do the color transform \n" \
|
||||
" yuv += offset;\n" \
|
||||
" rgb = matrix * yuv;\n" \
|
||||
"\n" \
|
||||
" // That was easy. :) \n" \
|
||||
" gl_FragColor = vec4(rgb, 1);\n" \
|
||||
" gl_FragColor *= u_modulation;\n" \
|
||||
"}" \
|
||||
|
||||
/* YUV to ABGR conversion */
|
||||
static const Uint8 GLES2_FragmentSrc_TextureYUVSrc_[] = " \
|
||||
precision mediump float; \
|
||||
uniform sampler2D u_texture; \
|
||||
uniform sampler2D u_texture_u; \
|
||||
uniform sampler2D u_texture_v; \
|
||||
uniform vec4 u_modulation; \
|
||||
varying vec2 v_texCoord; \
|
||||
\
|
||||
void main() \
|
||||
{ \
|
||||
mediump vec3 yuv; \
|
||||
lowp vec3 rgb; \
|
||||
yuv.x = texture2D(u_texture, v_texCoord).r; \
|
||||
yuv.y = texture2D(u_texture_u, v_texCoord).r - 0.5; \
|
||||
yuv.z = texture2D(u_texture_v, v_texCoord).r - 0.5; \
|
||||
rgb = mat3( 1, 1, 1, \
|
||||
0, -0.39465, 2.03211, \
|
||||
1.13983, -0.58060, 0) * yuv; \
|
||||
gl_FragColor = vec4(rgb, 1); \
|
||||
gl_FragColor *= u_modulation; \
|
||||
} \
|
||||
";
|
||||
static const Uint8 GLES2_FragmentSrc_TextureYUVJPEGSrc_[] = \
|
||||
YUV_SHADER_PROLOGUE \
|
||||
JPEG_SHADER_CONSTANTS \
|
||||
YUV_SHADER_BODY \
|
||||
;
|
||||
static const Uint8 GLES2_FragmentSrc_TextureYUVBT601Src_[] = \
|
||||
YUV_SHADER_PROLOGUE \
|
||||
BT601_SHADER_CONSTANTS \
|
||||
YUV_SHADER_BODY \
|
||||
;
|
||||
static const Uint8 GLES2_FragmentSrc_TextureYUVBT709Src_[] = \
|
||||
YUV_SHADER_PROLOGUE \
|
||||
BT709_SHADER_CONSTANTS \
|
||||
YUV_SHADER_BODY \
|
||||
;
|
||||
|
||||
/* NV12 to ABGR conversion */
|
||||
static const Uint8 GLES2_FragmentSrc_TextureNV12Src_[] = " \
|
||||
precision mediump float; \
|
||||
uniform sampler2D u_texture; \
|
||||
uniform sampler2D u_texture_u; \
|
||||
uniform vec4 u_modulation; \
|
||||
varying vec2 v_texCoord; \
|
||||
\
|
||||
void main() \
|
||||
{ \
|
||||
mediump vec3 yuv; \
|
||||
lowp vec3 rgb; \
|
||||
yuv.x = texture2D(u_texture, v_texCoord).r; \
|
||||
yuv.yz = texture2D(u_texture_u, v_texCoord).ra - 0.5; \
|
||||
rgb = mat3( 1, 1, 1, \
|
||||
0, -0.39465, 2.03211, \
|
||||
1.13983, -0.58060, 0) * yuv; \
|
||||
gl_FragColor = vec4(rgb, 1); \
|
||||
gl_FragColor *= u_modulation; \
|
||||
} \
|
||||
";
|
||||
static const Uint8 GLES2_FragmentSrc_TextureNV12JPEGSrc_[] = \
|
||||
YUV_SHADER_PROLOGUE \
|
||||
JPEG_SHADER_CONSTANTS \
|
||||
NV12_SHADER_BODY \
|
||||
;
|
||||
static const Uint8 GLES2_FragmentSrc_TextureNV12BT601Src_[] = \
|
||||
YUV_SHADER_PROLOGUE \
|
||||
BT601_SHADER_CONSTANTS \
|
||||
NV12_SHADER_BODY \
|
||||
;
|
||||
static const Uint8 GLES2_FragmentSrc_TextureNV12BT709Src_[] = \
|
||||
YUV_SHADER_PROLOGUE \
|
||||
BT709_SHADER_CONSTANTS \
|
||||
NV12_SHADER_BODY \
|
||||
;
|
||||
|
||||
/* NV21 to ABGR conversion */
|
||||
static const Uint8 GLES2_FragmentSrc_TextureNV21Src_[] = " \
|
||||
precision mediump float; \
|
||||
uniform sampler2D u_texture; \
|
||||
uniform sampler2D u_texture_u; \
|
||||
uniform vec4 u_modulation; \
|
||||
varying vec2 v_texCoord; \
|
||||
\
|
||||
void main() \
|
||||
{ \
|
||||
mediump vec3 yuv; \
|
||||
lowp vec3 rgb; \
|
||||
yuv.x = texture2D(u_texture, v_texCoord).r; \
|
||||
yuv.yz = texture2D(u_texture_u, v_texCoord).ar - 0.5; \
|
||||
rgb = mat3( 1, 1, 1, \
|
||||
0, -0.39465, 2.03211, \
|
||||
1.13983, -0.58060, 0) * yuv; \
|
||||
gl_FragColor = vec4(rgb, 1); \
|
||||
gl_FragColor *= u_modulation; \
|
||||
} \
|
||||
";
|
||||
static const Uint8 GLES2_FragmentSrc_TextureNV21JPEGSrc_[] = \
|
||||
YUV_SHADER_PROLOGUE \
|
||||
JPEG_SHADER_CONSTANTS \
|
||||
NV21_SHADER_BODY \
|
||||
;
|
||||
static const Uint8 GLES2_FragmentSrc_TextureNV21BT601Src_[] = \
|
||||
YUV_SHADER_PROLOGUE \
|
||||
BT601_SHADER_CONSTANTS \
|
||||
NV21_SHADER_BODY \
|
||||
;
|
||||
static const Uint8 GLES2_FragmentSrc_TextureNV21BT709Src_[] = \
|
||||
YUV_SHADER_PROLOGUE \
|
||||
BT709_SHADER_CONSTANTS \
|
||||
NV21_SHADER_BODY \
|
||||
;
|
||||
|
||||
static const GLES2_ShaderInstance GLES2_VertexSrc_Default = {
|
||||
GL_VERTEX_SHADER,
|
||||
|
@ -236,25 +317,67 @@ static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureBGRSrc = {
|
|||
GLES2_FragmentSrc_TextureBGRSrc_
|
||||
};
|
||||
|
||||
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureYUVSrc = {
|
||||
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureYUVJPEGSrc = {
|
||||
GL_FRAGMENT_SHADER,
|
||||
GLES2_SOURCE_SHADER,
|
||||
sizeof(GLES2_FragmentSrc_TextureYUVSrc_),
|
||||
GLES2_FragmentSrc_TextureYUVSrc_
|
||||
sizeof(GLES2_FragmentSrc_TextureYUVJPEGSrc_),
|
||||
GLES2_FragmentSrc_TextureYUVJPEGSrc_
|
||||
};
|
||||
|
||||
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV12Src = {
|
||||
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureYUVBT601Src = {
|
||||
GL_FRAGMENT_SHADER,
|
||||
GLES2_SOURCE_SHADER,
|
||||
sizeof(GLES2_FragmentSrc_TextureNV12Src_),
|
||||
GLES2_FragmentSrc_TextureNV12Src_
|
||||
sizeof(GLES2_FragmentSrc_TextureYUVBT601Src_),
|
||||
GLES2_FragmentSrc_TextureYUVBT601Src_
|
||||
};
|
||||
|
||||
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV21Src = {
|
||||
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureYUVBT709Src = {
|
||||
GL_FRAGMENT_SHADER,
|
||||
GLES2_SOURCE_SHADER,
|
||||
sizeof(GLES2_FragmentSrc_TextureNV21Src_),
|
||||
GLES2_FragmentSrc_TextureNV21Src_
|
||||
sizeof(GLES2_FragmentSrc_TextureYUVBT709Src_),
|
||||
GLES2_FragmentSrc_TextureYUVBT709Src_
|
||||
};
|
||||
|
||||
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV12JPEGSrc = {
|
||||
GL_FRAGMENT_SHADER,
|
||||
GLES2_SOURCE_SHADER,
|
||||
sizeof(GLES2_FragmentSrc_TextureNV12JPEGSrc_),
|
||||
GLES2_FragmentSrc_TextureNV12JPEGSrc_
|
||||
};
|
||||
|
||||
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV12BT601Src = {
|
||||
GL_FRAGMENT_SHADER,
|
||||
GLES2_SOURCE_SHADER,
|
||||
sizeof(GLES2_FragmentSrc_TextureNV12BT601Src_),
|
||||
GLES2_FragmentSrc_TextureNV12BT601Src_
|
||||
};
|
||||
|
||||
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV21BT709Src = {
|
||||
GL_FRAGMENT_SHADER,
|
||||
GLES2_SOURCE_SHADER,
|
||||
sizeof(GLES2_FragmentSrc_TextureNV21BT709Src_),
|
||||
GLES2_FragmentSrc_TextureNV21BT709Src_
|
||||
};
|
||||
|
||||
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV21JPEGSrc = {
|
||||
GL_FRAGMENT_SHADER,
|
||||
GLES2_SOURCE_SHADER,
|
||||
sizeof(GLES2_FragmentSrc_TextureNV21JPEGSrc_),
|
||||
GLES2_FragmentSrc_TextureNV21JPEGSrc_
|
||||
};
|
||||
|
||||
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV21BT601Src = {
|
||||
GL_FRAGMENT_SHADER,
|
||||
GLES2_SOURCE_SHADER,
|
||||
sizeof(GLES2_FragmentSrc_TextureNV21BT601Src_),
|
||||
GLES2_FragmentSrc_TextureNV21BT601Src_
|
||||
};
|
||||
|
||||
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV12BT709Src = {
|
||||
GL_FRAGMENT_SHADER,
|
||||
GLES2_SOURCE_SHADER,
|
||||
sizeof(GLES2_FragmentSrc_TextureNV12BT709Src_),
|
||||
GLES2_FragmentSrc_TextureNV12BT709Src_
|
||||
};
|
||||
|
||||
|
||||
|
@ -304,24 +427,66 @@ static GLES2_Shader GLES2_FragmentShader_TextureBGRSrc = {
|
|||
}
|
||||
};
|
||||
|
||||
static GLES2_Shader GLES2_FragmentShader_TextureYUVSrc = {
|
||||
static GLES2_Shader GLES2_FragmentShader_TextureYUVJPEGSrc = {
|
||||
1,
|
||||
{
|
||||
&GLES2_FragmentSrc_TextureYUVSrc
|
||||
&GLES2_FragmentSrc_TextureYUVJPEGSrc
|
||||
}
|
||||
};
|
||||
|
||||
static GLES2_Shader GLES2_FragmentShader_TextureNV12Src = {
|
||||
static GLES2_Shader GLES2_FragmentShader_TextureYUVBT601Src = {
|
||||
1,
|
||||
{
|
||||
&GLES2_FragmentSrc_TextureNV12Src
|
||||
&GLES2_FragmentSrc_TextureYUVBT601Src
|
||||
}
|
||||
};
|
||||
|
||||
static GLES2_Shader GLES2_FragmentShader_TextureNV21Src = {
|
||||
static GLES2_Shader GLES2_FragmentShader_TextureYUVBT709Src = {
|
||||
1,
|
||||
{
|
||||
&GLES2_FragmentSrc_TextureNV21Src
|
||||
&GLES2_FragmentSrc_TextureYUVBT709Src
|
||||
}
|
||||
};
|
||||
|
||||
static GLES2_Shader GLES2_FragmentShader_TextureNV12JPEGSrc = {
|
||||
1,
|
||||
{
|
||||
&GLES2_FragmentSrc_TextureNV12JPEGSrc
|
||||
}
|
||||
};
|
||||
|
||||
static GLES2_Shader GLES2_FragmentShader_TextureNV12BT601Src = {
|
||||
1,
|
||||
{
|
||||
&GLES2_FragmentSrc_TextureNV12BT601Src
|
||||
}
|
||||
};
|
||||
|
||||
static GLES2_Shader GLES2_FragmentShader_TextureNV12BT709Src = {
|
||||
1,
|
||||
{
|
||||
&GLES2_FragmentSrc_TextureNV12BT709Src
|
||||
}
|
||||
};
|
||||
|
||||
static GLES2_Shader GLES2_FragmentShader_TextureNV21JPEGSrc = {
|
||||
1,
|
||||
{
|
||||
&GLES2_FragmentSrc_TextureNV21JPEGSrc
|
||||
}
|
||||
};
|
||||
|
||||
static GLES2_Shader GLES2_FragmentShader_TextureNV21BT601Src = {
|
||||
1,
|
||||
{
|
||||
&GLES2_FragmentSrc_TextureNV21BT601Src
|
||||
}
|
||||
};
|
||||
|
||||
static GLES2_Shader GLES2_FragmentShader_TextureNV21BT709Src = {
|
||||
1,
|
||||
{
|
||||
&GLES2_FragmentSrc_TextureNV21BT709Src
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -345,12 +510,24 @@ const GLES2_Shader *GLES2_GetShader(GLES2_ShaderType type)
|
|||
return &GLES2_FragmentShader_TextureRGBSrc;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_BGR_SRC:
|
||||
return &GLES2_FragmentShader_TextureBGRSrc;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_YUV_SRC:
|
||||
return &GLES2_FragmentShader_TextureYUVSrc;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_NV12_SRC:
|
||||
return &GLES2_FragmentShader_TextureNV12Src;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_NV21_SRC:
|
||||
return &GLES2_FragmentShader_TextureNV21Src;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_YUV_JPEG_SRC:
|
||||
return &GLES2_FragmentShader_TextureYUVJPEGSrc;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_YUV_BT601_SRC:
|
||||
return &GLES2_FragmentShader_TextureYUVBT601Src;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_YUV_BT709_SRC:
|
||||
return &GLES2_FragmentShader_TextureYUVBT709Src;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_NV12_JPEG_SRC:
|
||||
return &GLES2_FragmentShader_TextureNV12JPEGSrc;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_NV12_BT601_SRC:
|
||||
return &GLES2_FragmentShader_TextureNV12BT601Src;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_NV12_BT709_SRC:
|
||||
return &GLES2_FragmentShader_TextureNV12BT709Src;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_NV21_JPEG_SRC:
|
||||
return &GLES2_FragmentShader_TextureNV21JPEGSrc;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_NV21_BT601_SRC:
|
||||
return &GLES2_FragmentShader_TextureNV21BT601Src;
|
||||
case GLES2_SHADER_FRAGMENT_TEXTURE_NV21_BT709_SRC:
|
||||
return &GLES2_FragmentShader_TextureNV21BT709Src;
|
||||
default:
|
||||
return NULL;
|
||||
}
|
||||
|
|
|
@ -20,11 +20,11 @@
|
|||
*/
|
||||
#include "../../SDL_internal.h"
|
||||
|
||||
#if SDL_VIDEO_RENDER_OGL_ES2
|
||||
|
||||
#ifndef SDL_shaders_gles2_h_
|
||||
#define SDL_shaders_gles2_h_
|
||||
|
||||
#if SDL_VIDEO_RENDER_OGL_ES2
|
||||
|
||||
typedef struct GLES2_ShaderInstance
|
||||
{
|
||||
GLenum type;
|
||||
|
@ -47,17 +47,23 @@ typedef enum
|
|||
GLES2_SHADER_FRAGMENT_TEXTURE_ARGB_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_BGR_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_RGB_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_YUV_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_NV12_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_NV21_SRC
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_YUV_JPEG_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_YUV_BT601_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_YUV_BT709_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_NV12_JPEG_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_NV12_BT601_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_NV12_BT709_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_NV21_JPEG_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_NV21_BT601_SRC,
|
||||
GLES2_SHADER_FRAGMENT_TEXTURE_NV21_BT709_SRC,
|
||||
} GLES2_ShaderType;
|
||||
|
||||
#define GLES2_SOURCE_SHADER (GLenum)-1
|
||||
|
||||
const GLES2_Shader *GLES2_GetShader(GLES2_ShaderType type);
|
||||
|
||||
#endif /* SDL_shaders_gles2_h_ */
|
||||
|
||||
#endif /* SDL_VIDEO_RENDER_OGL_ES2 */
|
||||
|
||||
#endif /* SDL_shaders_gles2_h_ */
|
||||
|
||||
/* vi: set ts=4 sw=4 expandtab: */
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue