Updated SDL's YUV support, many thanks to Adrien Descamps

New functions get and set the YUV colorspace conversion mode:
	SDL_SetYUVConversionMode()
	SDL_GetYUVConversionMode()
	SDL_GetYUVConversionModeForResolution()

SDL_ConvertPixels() converts between all supported RGB and YUV formats, with SSE acceleration for converting from planar YUV formats (YV12, NV12, etc) to common RGB/RGBA formats.

Added a new test program, testyuv, to verify correctness and speed of YUV conversion functionality.
This commit is contained in:
Sam Lantinga 2017-11-12 22:51:12 -08:00
parent e7cc03e0bd
commit 145d2469ae
60 changed files with 8368 additions and 4310 deletions

View file

@ -1,409 +0,0 @@
/*
Simple DirectMedia Layer
Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "../SDL_internal.h"
#include "SDL_yuv_mmx_c.h"
#ifdef USE_MMX_ASSEMBLY
#include "SDL_stdinc.h"
#include "mmx.h"
/* *INDENT-OFF* */
static mmx_t MMX_0080w = { .ud = {0x00800080, 0x00800080} };
static mmx_t MMX_00FFw = { .ud = {0x00ff00ff, 0x00ff00ff} };
static mmx_t MMX_FF00w = { .ud = {0xff00ff00, 0xff00ff00} };
static mmx_t MMX_Ycoeff = { .uw = {0x004a, 0x004a, 0x004a, 0x004a} };
static mmx_t MMX_UbluRGB = { .uw = {0x0072, 0x0072, 0x0072, 0x0072} };
static mmx_t MMX_VredRGB = { .uw = {0x0059, 0x0059, 0x0059, 0x0059} };
static mmx_t MMX_UgrnRGB = { .uw = {0xffea, 0xffea, 0xffea, 0xffea} };
static mmx_t MMX_VgrnRGB = { .uw = {0xffd2, 0xffd2, 0xffd2, 0xffd2} };
static mmx_t MMX_Ublu5x5 = { .uw = {0x0081, 0x0081, 0x0081, 0x0081} };
static mmx_t MMX_Vred5x5 = { .uw = {0x0066, 0x0066, 0x0066, 0x0066} };
static mmx_t MMX_Ugrn565 = { .uw = {0xffe8, 0xffe8, 0xffe8, 0xffe8} };
static mmx_t MMX_Vgrn565 = { .uw = {0xffcd, 0xffcd, 0xffcd, 0xffcd} };
static mmx_t MMX_red565 = { .uw = {0xf800, 0xf800, 0xf800, 0xf800} };
static mmx_t MMX_grn565 = { .uw = {0x07e0, 0x07e0, 0x07e0, 0x07e0} };
/**
This MMX assembler is my first assembler/MMX program ever.
Thus it maybe buggy.
Send patches to:
mvogt@rhrk.uni-kl.de
After it worked fine I have "obfuscated" the code a bit to have
more parallism in the MMX units. This means I moved
initilisation around and delayed other instruction.
Performance measurement did not show that this brought any advantage
but in theory it _should_ be faster this way.
The overall performanve gain to the C based dither was 30%-40%.
The MMX routine calculates 256bit=8RGB values in each cycle
(4 for row1 & 4 for row2)
The red/green/blue.. coefficents are taken from the mpeg_play
player. They look nice, but I dont know if you can have
better values, to avoid integer rounding errors.
IMPORTANT:
==========
It is a requirement that the cr/cb/lum are 8 byte aligned and
the out are 16byte aligned or you will/may get segfaults
*/
void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
Uint32 *row1;
Uint32 *row2;
unsigned char* y = lum +cols*rows; /* Pointer to the end */
int x = 0;
row1 = (Uint32 *)out; /* 32 bit target */
row2 = (Uint32 *)out+cols+mod; /* start of second row */
mod = (mod+cols+mod)*4; /* increment for row1 in byte */
__asm__ __volatile__ (
".align 8\n"
"1:\n"
/* create Cr (result in mm1) */
"movd (%0),%%mm1\n" /* 0 0 0 0 v3 v2 v1 v0 */
"pxor %%mm7,%%mm7\n" /* 00 00 00 00 00 00 00 00 */
"movd (%2), %%mm2\n" /* 0 0 0 0 l3 l2 l1 l0 */
"punpcklbw %%mm7,%%mm1\n" /* 0 v3 0 v2 00 v1 00 v0 */
"punpckldq %%mm1,%%mm1\n" /* 00 v1 00 v0 00 v1 00 v0 */
"psubw %9,%%mm1\n" /* mm1-128:r1 r1 r0 r0 r1 r1 r0 r0 */
/* create Cr_g (result in mm0) */
"movq %%mm1,%%mm0\n" /* r1 r1 r0 r0 r1 r1 r0 r0 */
"pmullw %10,%%mm0\n" /* red*-46dec=0.7136*64 */
"pmullw %11,%%mm1\n" /* red*89dec=1.4013*64 */
"psraw $6, %%mm0\n" /* red=red/64 */
"psraw $6, %%mm1\n" /* red=red/64 */
/* create L1 L2 (result in mm2,mm4) */
/* L2=lum+cols */
"movq (%2,%4),%%mm3\n" /* 0 0 0 0 L3 L2 L1 L0 */
"punpckldq %%mm3,%%mm2\n" /* L3 L2 L1 L0 l3 l2 l1 l0 */
"movq %%mm2,%%mm4\n" /* L3 L2 L1 L0 l3 l2 l1 l0 */
"pand %12,%%mm2\n" /* L3 0 L1 0 l3 0 l1 0 */
"pand %13,%%mm4\n" /* 0 L2 0 L0 0 l2 0 l0 */
"psrlw $8,%%mm2\n" /* 0 L3 0 L1 0 l3 0 l1 */
/* create R (result in mm6) */
"movq %%mm2,%%mm5\n" /* 0 L3 0 L1 0 l3 0 l1 */
"movq %%mm4,%%mm6\n" /* 0 L2 0 L0 0 l2 0 l0 */
"paddsw %%mm1, %%mm5\n" /* lum1+red:x R3 x R1 x r3 x r1 */
"paddsw %%mm1, %%mm6\n" /* lum1+red:x R2 x R0 x r2 x r0 */
"packuswb %%mm5,%%mm5\n" /* R3 R1 r3 r1 R3 R1 r3 r1 */
"packuswb %%mm6,%%mm6\n" /* R2 R0 r2 r0 R2 R0 r2 r0 */
"pxor %%mm7,%%mm7\n" /* 00 00 00 00 00 00 00 00 */
"punpcklbw %%mm5,%%mm6\n" /* R3 R2 R1 R0 r3 r2 r1 r0 */
/* create Cb (result in mm1) */
"movd (%1), %%mm1\n" /* 0 0 0 0 u3 u2 u1 u0 */
"punpcklbw %%mm7,%%mm1\n" /* 0 u3 0 u2 00 u1 00 u0 */
"punpckldq %%mm1,%%mm1\n" /* 00 u1 00 u0 00 u1 00 u0 */
"psubw %9,%%mm1\n" /* mm1-128:u1 u1 u0 u0 u1 u1 u0 u0 */
/* create Cb_g (result in mm5) */
"movq %%mm1,%%mm5\n" /* u1 u1 u0 u0 u1 u1 u0 u0 */
"pmullw %14,%%mm5\n" /* blue*-109dec=1.7129*64 */
"pmullw %15,%%mm1\n" /* blue*114dec=1.78125*64 */
"psraw $6, %%mm5\n" /* blue=red/64 */
"psraw $6, %%mm1\n" /* blue=blue/64 */
/* create G (result in mm7) */
"movq %%mm2,%%mm3\n" /* 0 L3 0 L1 0 l3 0 l1 */
"movq %%mm4,%%mm7\n" /* 0 L2 0 L0 0 l2 0 l1 */
"paddsw %%mm5, %%mm3\n" /* lum1+Cb_g:x G3t x G1t x g3t x g1t */
"paddsw %%mm5, %%mm7\n" /* lum1+Cb_g:x G2t x G0t x g2t x g0t */
"paddsw %%mm0, %%mm3\n" /* lum1+Cr_g:x G3 x G1 x g3 x g1 */
"paddsw %%mm0, %%mm7\n" /* lum1+blue:x G2 x G0 x g2 x g0 */
"packuswb %%mm3,%%mm3\n" /* G3 G1 g3 g1 G3 G1 g3 g1 */
"packuswb %%mm7,%%mm7\n" /* G2 G0 g2 g0 G2 G0 g2 g0 */
"punpcklbw %%mm3,%%mm7\n" /* G3 G2 G1 G0 g3 g2 g1 g0 */
/* create B (result in mm5) */
"movq %%mm2,%%mm3\n" /* 0 L3 0 L1 0 l3 0 l1 */
"movq %%mm4,%%mm5\n" /* 0 L2 0 L0 0 l2 0 l1 */
"paddsw %%mm1, %%mm3\n" /* lum1+blue:x B3 x B1 x b3 x b1 */
"paddsw %%mm1, %%mm5\n" /* lum1+blue:x B2 x B0 x b2 x b0 */
"packuswb %%mm3,%%mm3\n" /* B3 B1 b3 b1 B3 B1 b3 b1 */
"packuswb %%mm5,%%mm5\n" /* B2 B0 b2 b0 B2 B0 b2 b0 */
"punpcklbw %%mm3,%%mm5\n" /* B3 B2 B1 B0 b3 b2 b1 b0 */
/* fill destination row1 (needed are mm6=Rr,mm7=Gg,mm5=Bb) */
"pxor %%mm2,%%mm2\n" /* 0 0 0 0 0 0 0 0 */
"pxor %%mm4,%%mm4\n" /* 0 0 0 0 0 0 0 0 */
"movq %%mm6,%%mm1\n" /* R3 R2 R1 R0 r3 r2 r1 r0 */
"movq %%mm5,%%mm3\n" /* B3 B2 B1 B0 b3 b2 b1 b0 */
/* process lower lum */
"punpcklbw %%mm4,%%mm1\n" /* 0 r3 0 r2 0 r1 0 r0 */
"punpcklbw %%mm4,%%mm3\n" /* 0 b3 0 b2 0 b1 0 b0 */
"movq %%mm1,%%mm2\n" /* 0 r3 0 r2 0 r1 0 r0 */
"movq %%mm3,%%mm0\n" /* 0 b3 0 b2 0 b1 0 b0 */
"punpcklwd %%mm1,%%mm3\n" /* 0 r1 0 b1 0 r0 0 b0 */
"punpckhwd %%mm2,%%mm0\n" /* 0 r3 0 b3 0 r2 0 b2 */
"pxor %%mm2,%%mm2\n" /* 0 0 0 0 0 0 0 0 */
"movq %%mm7,%%mm1\n" /* G3 G2 G1 G0 g3 g2 g1 g0 */
"punpcklbw %%mm1,%%mm2\n" /* g3 0 g2 0 g1 0 g0 0 */
"punpcklwd %%mm4,%%mm2\n" /* 0 0 g1 0 0 0 g0 0 */
"por %%mm3, %%mm2\n" /* 0 r1 g1 b1 0 r0 g0 b0 */
"movq %%mm2,(%3)\n" /* wrote out ! row1 */
"pxor %%mm2,%%mm2\n" /* 0 0 0 0 0 0 0 0 */
"punpcklbw %%mm1,%%mm4\n" /* g3 0 g2 0 g1 0 g0 0 */
"punpckhwd %%mm2,%%mm4\n" /* 0 0 g3 0 0 0 g2 0 */
"por %%mm0, %%mm4\n" /* 0 r3 g3 b3 0 r2 g2 b2 */
"movq %%mm4,8(%3)\n" /* wrote out ! row1 */
/* fill destination row2 (needed are mm6=Rr,mm7=Gg,mm5=Bb) */
/* this can be done "destructive" */
"pxor %%mm2,%%mm2\n" /* 0 0 0 0 0 0 0 0 */
"punpckhbw %%mm2,%%mm6\n" /* 0 R3 0 R2 0 R1 0 R0 */
"punpckhbw %%mm1,%%mm5\n" /* G3 B3 G2 B2 G1 B1 G0 B0 */
"movq %%mm5,%%mm1\n" /* G3 B3 G2 B2 G1 B1 G0 B0 */
"punpcklwd %%mm6,%%mm1\n" /* 0 R1 G1 B1 0 R0 G0 B0 */
"movq %%mm1,(%5)\n" /* wrote out ! row2 */
"punpckhwd %%mm6,%%mm5\n" /* 0 R3 G3 B3 0 R2 G2 B2 */
"movq %%mm5,8(%5)\n" /* wrote out ! row2 */
"addl $4,%2\n" /* lum+4 */
"leal 16(%3),%3\n" /* row1+16 */
"leal 16(%5),%5\n" /* row2+16 */
"addl $2,%0\n" /* cr+2 */
"addl $2,%1\n" /* cb+2 */
"addl $4,%6\n" /* x+4 */
"cmpl %4,%6\n"
"jl 1b\n"
"addl %4,%2\n" /* lum += cols */
"addl %8,%3\n" /* row1+= mod */
"addl %8,%5\n" /* row2+= mod */
"movl $0,%6\n" /* x=0 */
"cmpl %7,%2\n"
"jl 1b\n"
"emms\n" /* reset MMX registers. */
:
: "r" (cr), "r"(cb),"r"(lum),
"r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
"m"(MMX_0080w),"m"(MMX_VgrnRGB),"m"(MMX_VredRGB),
"m"(MMX_FF00w),"m"(MMX_00FFw),"m"(MMX_UgrnRGB),
"m"(MMX_UbluRGB)
);
}
void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod )
{
Uint16 *row1;
Uint16 *row2;
unsigned char* y = lum +cols*rows; /* Pointer to the end */
int x = 0;
row1 = (Uint16 *)out; /* 16 bit target */
row2 = (Uint16 *)out+cols+mod; /* start of second row */
mod = (mod+cols+mod)*2; /* increment for row1 in byte */
__asm__ __volatile__(
".align 8\n"
"1:\n"
"movd (%1), %%mm0\n" /* 4 Cb 0 0 0 0 u3 u2 u1 u0 */
"pxor %%mm7, %%mm7\n"
"movd (%0), %%mm1\n" /* 4 Cr 0 0 0 0 v3 v2 v1 v0 */
"punpcklbw %%mm7, %%mm0\n" /* 4 W cb 0 u3 0 u2 0 u1 0 u0 */
"punpcklbw %%mm7, %%mm1\n" /* 4 W cr 0 v3 0 v2 0 v1 0 v0 */
"psubw %9, %%mm0\n"
"psubw %9, %%mm1\n"
"movq %%mm0, %%mm2\n" /* Cb 0 u3 0 u2 0 u1 0 u0 */
"movq %%mm1, %%mm3\n" /* Cr */
"pmullw %10, %%mm2\n" /* Cb2green 0 R3 0 R2 0 R1 0 R0 */
"movq (%2), %%mm6\n" /* L1 l7 L6 L5 L4 L3 L2 L1 L0 */
"pmullw %11, %%mm0\n" /* Cb2blue */
"pand %12, %%mm6\n" /* L1 00 L6 00 L4 00 L2 00 L0 */
"pmullw %13, %%mm3\n" /* Cr2green */
"movq (%2), %%mm7\n" /* L2 */
"pmullw %14, %%mm1\n" /* Cr2red */
"psrlw $8, %%mm7\n" /* L2 00 L7 00 L5 00 L3 00 L1 */
"pmullw %15, %%mm6\n" /* lum1 */
"paddw %%mm3, %%mm2\n" /* Cb2green + Cr2green == green */
"pmullw %15, %%mm7\n" /* lum2 */
"movq %%mm6, %%mm4\n" /* lum1 */
"paddw %%mm0, %%mm6\n" /* lum1 +blue 00 B6 00 B4 00 B2 00 B0 */
"movq %%mm4, %%mm5\n" /* lum1 */
"paddw %%mm1, %%mm4\n" /* lum1 +red 00 R6 00 R4 00 R2 00 R0 */
"paddw %%mm2, %%mm5\n" /* lum1 +green 00 G6 00 G4 00 G2 00 G0 */
"psraw $6, %%mm4\n" /* R1 0 .. 64 */
"movq %%mm7, %%mm3\n" /* lum2 00 L7 00 L5 00 L3 00 L1 */
"psraw $6, %%mm5\n" /* G1 - .. + */
"paddw %%mm0, %%mm7\n" /* Lum2 +blue 00 B7 00 B5 00 B3 00 B1 */
"psraw $6, %%mm6\n" /* B1 0 .. 64 */
"packuswb %%mm4, %%mm4\n" /* R1 R1 */
"packuswb %%mm5, %%mm5\n" /* G1 G1 */
"packuswb %%mm6, %%mm6\n" /* B1 B1 */
"punpcklbw %%mm4, %%mm4\n"
"punpcklbw %%mm5, %%mm5\n"
"pand %16, %%mm4\n"
"psllw $3, %%mm5\n" /* GREEN 1 */
"punpcklbw %%mm6, %%mm6\n"
"pand %17, %%mm5\n"
"pand %16, %%mm6\n"
"por %%mm5, %%mm4\n" /* */
"psrlw $11, %%mm6\n" /* BLUE 1 */
"movq %%mm3, %%mm5\n" /* lum2 */
"paddw %%mm1, %%mm3\n" /* lum2 +red 00 R7 00 R5 00 R3 00 R1 */
"paddw %%mm2, %%mm5\n" /* lum2 +green 00 G7 00 G5 00 G3 00 G1 */
"psraw $6, %%mm3\n" /* R2 */
"por %%mm6, %%mm4\n" /* MM4 */
"psraw $6, %%mm5\n" /* G2 */
"movq (%2, %4), %%mm6\n" /* L3 load lum2 */
"psraw $6, %%mm7\n"
"packuswb %%mm3, %%mm3\n"
"packuswb %%mm5, %%mm5\n"
"packuswb %%mm7, %%mm7\n"
"pand %12, %%mm6\n" /* L3 */
"punpcklbw %%mm3, %%mm3\n"
"punpcklbw %%mm5, %%mm5\n"
"pmullw %15, %%mm6\n" /* lum3 */
"punpcklbw %%mm7, %%mm7\n"
"psllw $3, %%mm5\n" /* GREEN 2 */
"pand %16, %%mm7\n"
"pand %16, %%mm3\n"
"psrlw $11, %%mm7\n" /* BLUE 2 */
"pand %17, %%mm5\n"
"por %%mm7, %%mm3\n"
"movq (%2,%4), %%mm7\n" /* L4 load lum2 */
"por %%mm5, %%mm3\n"
"psrlw $8, %%mm7\n" /* L4 */
"movq %%mm4, %%mm5\n"
"punpcklwd %%mm3, %%mm4\n"
"pmullw %15, %%mm7\n" /* lum4 */
"punpckhwd %%mm3, %%mm5\n"
"movq %%mm4, (%3)\n" /* write row1 */
"movq %%mm5, 8(%3)\n" /* write row1 */
"movq %%mm6, %%mm4\n" /* Lum3 */
"paddw %%mm0, %%mm6\n" /* Lum3 +blue */
"movq %%mm4, %%mm5\n" /* Lum3 */
"paddw %%mm1, %%mm4\n" /* Lum3 +red */
"paddw %%mm2, %%mm5\n" /* Lum3 +green */
"psraw $6, %%mm4\n"
"movq %%mm7, %%mm3\n" /* Lum4 */
"psraw $6, %%mm5\n"
"paddw %%mm0, %%mm7\n" /* Lum4 +blue */
"psraw $6, %%mm6\n" /* Lum3 +blue */
"movq %%mm3, %%mm0\n" /* Lum4 */
"packuswb %%mm4, %%mm4\n"
"paddw %%mm1, %%mm3\n" /* Lum4 +red */
"packuswb %%mm5, %%mm5\n"
"paddw %%mm2, %%mm0\n" /* Lum4 +green */
"packuswb %%mm6, %%mm6\n"
"punpcklbw %%mm4, %%mm4\n"
"punpcklbw %%mm5, %%mm5\n"
"punpcklbw %%mm6, %%mm6\n"
"psllw $3, %%mm5\n" /* GREEN 3 */
"pand %16, %%mm4\n"
"psraw $6, %%mm3\n" /* psr 6 */
"psraw $6, %%mm0\n"
"pand %16, %%mm6\n" /* BLUE */
"pand %17, %%mm5\n"
"psrlw $11, %%mm6\n" /* BLUE 3 */
"por %%mm5, %%mm4\n"
"psraw $6, %%mm7\n"
"por %%mm6, %%mm4\n"
"packuswb %%mm3, %%mm3\n"
"packuswb %%mm0, %%mm0\n"
"packuswb %%mm7, %%mm7\n"
"punpcklbw %%mm3, %%mm3\n"
"punpcklbw %%mm0, %%mm0\n"
"punpcklbw %%mm7, %%mm7\n"
"pand %16, %%mm3\n"
"pand %16, %%mm7\n" /* BLUE */
"psllw $3, %%mm0\n" /* GREEN 4 */
"psrlw $11, %%mm7\n"
"pand %17, %%mm0\n"
"por %%mm7, %%mm3\n"
"por %%mm0, %%mm3\n"
"movq %%mm4, %%mm5\n"
"punpcklwd %%mm3, %%mm4\n"
"punpckhwd %%mm3, %%mm5\n"
"movq %%mm4, (%5)\n"
"movq %%mm5, 8(%5)\n"
"addl $8, %6\n"
"addl $8, %2\n"
"addl $4, %0\n"
"addl $4, %1\n"
"cmpl %4, %6\n"
"leal 16(%3), %3\n"
"leal 16(%5),%5\n" /* row2+16 */
"jl 1b\n"
"addl %4, %2\n" /* lum += cols */
"addl %8, %3\n" /* row1+= mod */
"addl %8, %5\n" /* row2+= mod */
"movl $0, %6\n" /* x=0 */
"cmpl %7, %2\n"
"jl 1b\n"
"emms\n"
:
: "r" (cr), "r"(cb),"r"(lum),
"r"(row1),"r"(cols),"r"(row2),"m"(x),"m"(y),"m"(mod),
"m"(MMX_0080w),"m"(MMX_Ugrn565),"m"(MMX_Ublu5x5),
"m"(MMX_00FFw),"m"(MMX_Vgrn565),"m"(MMX_Vred5x5),
"m"(MMX_Ycoeff),"m"(MMX_red565),"m"(MMX_grn565)
);
}
/* *INDENT-ON* */
#endif /* USE_MMX_ASSEMBLY */
/* vi: set ts=4 sw=4 expandtab: */

File diff suppressed because it is too large Load diff

View file

@ -30,16 +30,6 @@ struct SDL_SW_YUVTexture
Uint32 target_format;
int w, h;
Uint8 *pixels;
int *colortab;
Uint32 *rgb_2_pix;
void (*Display1X) (int *colortab, Uint32 * rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod);
void (*Display2X) (int *colortab, Uint32 * rgb_2_pix,
unsigned char *lum, unsigned char *cr,
unsigned char *cb, unsigned char *out,
int rows, int cols, int mod);
/* These are just so we don't have to allocate them separately */
Uint16 pitches[3];

View file

@ -39,85 +39,7 @@
#include <d3d9.h>
#endif
#ifdef ASSEMBLE_SHADER
#pragma comment(lib, "d3dx9.lib")
/**************************************************************************
* ID3DXBuffer:
* ------------
* The buffer object is used by D3DX to return arbitrary size data.
*
* GetBufferPointer -
* Returns a pointer to the beginning of the buffer.
*
* GetBufferSize -
* Returns the size of the buffer, in bytes.
**************************************************************************/
typedef interface ID3DXBuffer ID3DXBuffer;
typedef interface ID3DXBuffer *LPD3DXBUFFER;
/* {8BA5FB08-5195-40e2-AC58-0D989C3A0102} */
DEFINE_GUID(IID_ID3DXBuffer,
0x8ba5fb08, 0x5195, 0x40e2, 0xac, 0x58, 0xd, 0x98, 0x9c, 0x3a, 0x1, 0x2);
#undef INTERFACE
#define INTERFACE ID3DXBuffer
typedef interface ID3DXBuffer {
const struct ID3DXBufferVtbl FAR* lpVtbl;
} ID3DXBuffer;
typedef const struct ID3DXBufferVtbl ID3DXBufferVtbl;
const struct ID3DXBufferVtbl
{
/* IUnknown */
STDMETHOD(QueryInterface)(THIS_ REFIID iid, LPVOID *ppv) PURE;
STDMETHOD_(ULONG, AddRef)(THIS) PURE;
STDMETHOD_(ULONG, Release)(THIS) PURE;
/* ID3DXBuffer */
STDMETHOD_(LPVOID, GetBufferPointer)(THIS) PURE;
STDMETHOD_(DWORD, GetBufferSize)(THIS) PURE;
};
HRESULT WINAPI
D3DXAssembleShader(
LPCSTR pSrcData,
UINT SrcDataLen,
CONST LPVOID* pDefines,
LPVOID pInclude,
DWORD Flags,
LPD3DXBUFFER* ppShader,
LPD3DXBUFFER* ppErrorMsgs);
static void PrintShaderData(LPDWORD shader_data, DWORD shader_size)
{
OutputDebugStringA("const DWORD shader_data[] = {\n\t");
{
SDL_bool newline = SDL_FALSE;
unsigned i;
for (i = 0; i < shader_size / sizeof(DWORD); ++i) {
char dword[11];
if (i > 0) {
if ((i%6) == 0) {
newline = SDL_TRUE;
}
if (newline) {
OutputDebugStringA(",\n ");
newline = SDL_FALSE;
} else {
OutputDebugStringA(", ");
}
}
SDL_snprintf(dword, sizeof(dword), "0x%8.8x", shader_data[i]);
OutputDebugStringA(dword);
}
OutputDebugStringA("\n};\n");
}
}
#endif /* ASSEMBLE_SHADER */
#include "SDL_shaders_d3d.h"
/* Direct3D renderer implementation */
@ -188,7 +110,7 @@ typedef struct
IDirect3DSurface9 *defaultRenderTarget;
IDirect3DSurface9 *currentRenderTarget;
void* d3dxDLL;
LPDIRECT3DPIXELSHADER9 ps_yuv;
LPDIRECT3DPIXELSHADER9 shaders[NUM_SHADERS];
} D3D_RenderData;
typedef struct
@ -197,6 +119,7 @@ typedef struct
int w, h;
DWORD usage;
Uint32 format;
D3DFORMAT d3dfmt;
IDirect3DTexture9 *texture;
IDirect3DTexture9 *staging;
} D3D_TextureRep;
@ -313,6 +236,8 @@ PixelFormatToD3DFMT(Uint32 format)
return D3DFMT_A8R8G8B8;
case SDL_PIXELFORMAT_YV12:
case SDL_PIXELFORMAT_IYUV:
case SDL_PIXELFORMAT_NV12:
case SDL_PIXELFORMAT_NV21:
return D3DFMT_L8;
default:
return D3DFMT_UNKNOWN;
@ -661,137 +586,19 @@ D3D_CreateRenderer(SDL_Window * window, Uint32 flags)
/* Set up parameters for rendering */
D3D_InitRenderState(data);
if (caps.MaxSimultaneousTextures >= 3)
{
#ifdef ASSEMBLE_SHADER
/* This shader was created by running the following HLSL through the fxc compiler
and then tuning the generated assembly.
fxc /T fx_4_0 /O3 /Gfa /Fc yuv.fxc yuv.fx
--- yuv.fx ---
Texture2D g_txY;
Texture2D g_txU;
Texture2D g_txV;
SamplerState samLinear
{
Filter = ANISOTROPIC;
AddressU = Clamp;
AddressV = Clamp;
MaxAnisotropy = 1;
};
struct VS_OUTPUT
{
float2 TextureUV : TEXCOORD0;
};
struct PS_OUTPUT
{
float4 RGBAColor : SV_Target;
};
PS_OUTPUT YUV420( VS_OUTPUT In )
{
const float3 offset = {-0.0627451017, -0.501960814, -0.501960814};
const float3 Rcoeff = {1.164, 0.000, 1.596};
const float3 Gcoeff = {1.164, -0.391, -0.813};
const float3 Bcoeff = {1.164, 2.018, 0.000};
PS_OUTPUT Output;
float2 TextureUV = In.TextureUV;
float3 yuv;
yuv.x = g_txY.Sample( samLinear, TextureUV ).r;
yuv.y = g_txU.Sample( samLinear, TextureUV ).r;
yuv.z = g_txV.Sample( samLinear, TextureUV ).r;
yuv += offset;
Output.RGBAColor.r = dot(yuv, Rcoeff);
Output.RGBAColor.g = dot(yuv, Gcoeff);
Output.RGBAColor.b = dot(yuv, Bcoeff);
Output.RGBAColor.a = 1.0f;
return Output;
}
technique10 RenderYUV420
{
pass P0
{
SetPixelShader( CompileShader( ps_4_0_level_9_0, YUV420() ) );
}
}
*/
const char *shader_text =
"ps_2_0\n"
"def c0, -0.0627451017, -0.501960814, -0.501960814, 1\n"
"def c1, 1.16400003, 0, 1.59599996, 0\n"
"def c2, 1.16400003, -0.391000003, -0.813000023, 0\n"
"def c3, 1.16400003, 2.01799989, 0, 0\n"
"dcl t0.xy\n"
"dcl v0.xyzw\n"
"dcl_2d s0\n"
"dcl_2d s1\n"
"dcl_2d s2\n"
"texld r0, t0, s0\n"
"texld r1, t0, s1\n"
"texld r2, t0, s2\n"
"mov r0.y, r1.x\n"
"mov r0.z, r2.x\n"
"add r0.xyz, r0, c0\n"
"dp3 r1.x, r0, c1\n"
"dp3 r1.y, r0, c2\n"
"dp2add r1.z, r0, c3, c3.z\n" /* Logically this is "dp3 r1.z, r0, c3" but the optimizer did its magic */
"mov r1.w, c0.w\n"
"mul r0, r1, v0\n" /* Not in the HLSL, multiply by vertex color */
"mov oC0, r0\n"
;
LPD3DXBUFFER pCode;
LPD3DXBUFFER pErrorMsgs;
LPDWORD shader_data = NULL;
DWORD shader_size = 0;
result = D3DXAssembleShader(shader_text, SDL_strlen(shader_text), NULL, NULL, 0, &pCode, &pErrorMsgs);
if (!FAILED(result)) {
shader_data = (DWORD*)pCode->lpVtbl->GetBufferPointer(pCode);
shader_size = pCode->lpVtbl->GetBufferSize(pCode);
PrintShaderData(shader_data, shader_size);
} else {
const char *error = (const char *)pErrorMsgs->lpVtbl->GetBufferPointer(pErrorMsgs);
SDL_SetError("Couldn't assemble shader: %s", error);
}
if (shader_data != NULL)
#else
const DWORD shader_data[] = {
0xffff0200, 0x05000051, 0xa00f0000, 0xbd808081, 0xbf008081, 0xbf008081,
0x3f800000, 0x05000051, 0xa00f0001, 0x3f94fdf4, 0x00000000, 0x3fcc49ba,
0x00000000, 0x05000051, 0xa00f0002, 0x3f94fdf4, 0xbec83127, 0xbf5020c5,
0x00000000, 0x05000051, 0xa00f0003, 0x3f94fdf4, 0x400126e9, 0x00000000,
0x00000000, 0x0200001f, 0x80000000, 0xb0030000, 0x0200001f, 0x80000000,
0x900f0000, 0x0200001f, 0x90000000, 0xa00f0800, 0x0200001f, 0x90000000,
0xa00f0801, 0x0200001f, 0x90000000, 0xa00f0802, 0x03000042, 0x800f0000,
0xb0e40000, 0xa0e40800, 0x03000042, 0x800f0001, 0xb0e40000, 0xa0e40801,
0x03000042, 0x800f0002, 0xb0e40000, 0xa0e40802, 0x02000001, 0x80020000,
0x80000001, 0x02000001, 0x80040000, 0x80000002, 0x03000002, 0x80070000,
0x80e40000, 0xa0e40000, 0x03000008, 0x80010001, 0x80e40000, 0xa0e40001,
0x03000008, 0x80020001, 0x80e40000, 0xa0e40002, 0x0400005a, 0x80040001,
0x80e40000, 0xa0e40003, 0xa0aa0003, 0x02000001, 0x80080001, 0xa0ff0000,
0x03000005, 0x800f0000, 0x80e40001, 0x90e40000, 0x02000001, 0x800f0800,
0x80e40000, 0x0000ffff
};
#endif
{
result = IDirect3DDevice9_CreatePixelShader(data->device, shader_data, &data->ps_yuv);
if (!FAILED(result)) {
renderer->info.texture_formats[renderer->info.num_texture_formats++] = SDL_PIXELFORMAT_YV12;
renderer->info.texture_formats[renderer->info.num_texture_formats++] = SDL_PIXELFORMAT_IYUV;
} else {
if (caps.MaxSimultaneousTextures >= 3) {
int i;
for (i = 0; i < SDL_arraysize(data->shaders); ++i) {
result = D3D9_CreatePixelShader(data->device, (D3D9_Shader)i, &data->shaders[i]);
if (FAILED(result)) {
D3D_SetError("CreatePixelShader()", result);
}
}
if (data->shaders[SHADER_YUV_JPEG] && data->shaders[SHADER_YUV_BT601] && data->shaders[SHADER_YUV_BT709]) {
renderer->info.texture_formats[renderer->info.num_texture_formats++] = SDL_PIXELFORMAT_YV12;
renderer->info.texture_formats[renderer->info.num_texture_formats++] = SDL_PIXELFORMAT_IYUV;
}
}
return renderer;
}
@ -870,7 +677,7 @@ GetScaleQuality(void)
}
static int
D3D_CreateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, DWORD usage, Uint32 format, int w, int h)
D3D_CreateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, DWORD usage, Uint32 format, D3DFORMAT d3dfmt, int w, int h)
{
HRESULT result;
@ -879,6 +686,7 @@ D3D_CreateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, DWORD us
texture->h = h;
texture->usage = usage;
texture->format = format;
texture->d3dfmt = d3dfmt;
result = IDirect3DDevice9_CreateTexture(device, w, h, 1, usage,
PixelFormatToD3DFMT(format),
@ -897,8 +705,7 @@ D3D_CreateStagingTexture(IDirect3DDevice9 *device, D3D_TextureRep *texture)
if (texture->staging == NULL) {
result = IDirect3DDevice9_CreateTexture(device, texture->w, texture->h, 1, 0,
PixelFormatToD3DFMT(texture->format),
D3DPOOL_SYSTEMMEM, &texture->staging, NULL);
texture->d3dfmt, D3DPOOL_SYSTEMMEM, &texture->staging, NULL);
if (FAILED(result)) {
return D3D_SetError("CreateTexture(D3DPOOL_SYSTEMMEM)", result);
}
@ -934,7 +741,7 @@ D3D_BindTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, DWORD samp
}
static int
D3D_RecreateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, Uint32 format, int w, int h)
D3D_RecreateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture)
{
if (texture->texture) {
IDirect3DTexture9_Release(texture->texture);
@ -948,7 +755,7 @@ D3D_RecreateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, Uint32
}
static int
D3D_UpdateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, Uint32 format, int x, int y, int w, int h, const void *pixels, int pitch)
D3D_UpdateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, int x, int y, int w, int h, const void *pixels, int pitch)
{
RECT d3drect;
D3DLOCKED_RECT locked;
@ -972,8 +779,8 @@ D3D_UpdateTextureRep(IDirect3DDevice9 *device, D3D_TextureRep *texture, Uint32 f
}
src = (const Uint8 *)pixels;
dst = locked.pBits;
length = w * SDL_BYTESPERPIXEL(format);
dst = (Uint8 *)locked.pBits;
length = w * SDL_BYTESPERPIXEL(texture->format);
if (length == pitch && length == locked.Pitch) {
SDL_memcpy(dst, src, length*h);
} else {
@ -1032,7 +839,7 @@ D3D_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
usage = 0;
}
if (D3D_CreateTextureRep(data->device, &texturedata->texture, usage, texture->format, texture->w, texture->h) < 0) {
if (D3D_CreateTextureRep(data->device, &texturedata->texture, usage, texture->format, PixelFormatToD3DFMT(texture->format), texture->w, texture->h) < 0) {
return -1;
}
@ -1040,11 +847,11 @@ D3D_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
texture->format == SDL_PIXELFORMAT_IYUV) {
texturedata->yuv = SDL_TRUE;
if (D3D_CreateTextureRep(data->device, &texturedata->utexture, usage, texture->format, texture->w / 2, texture->h / 2) < 0) {
if (D3D_CreateTextureRep(data->device, &texturedata->utexture, usage, texture->format, PixelFormatToD3DFMT(texture->format), (texture->w + 1) / 2, (texture->h + 1) / 2) < 0) {
return -1;
}
if (D3D_CreateTextureRep(data->device, &texturedata->vtexture, usage, texture->format, texture->w / 2, texture->h / 2) < 0) {
if (D3D_CreateTextureRep(data->device, &texturedata->vtexture, usage, texture->format, PixelFormatToD3DFMT(texture->format), (texture->w + 1) / 2, (texture->h + 1) / 2) < 0) {
return -1;
}
}
@ -1061,16 +868,16 @@ D3D_RecreateTexture(SDL_Renderer * renderer, SDL_Texture * texture)
return 0;
}
if (D3D_RecreateTextureRep(data->device, &texturedata->texture, texture->format, texture->w, texture->h) < 0) {
if (D3D_RecreateTextureRep(data->device, &texturedata->texture) < 0) {
return -1;
}
if (texturedata->yuv) {
if (D3D_RecreateTextureRep(data->device, &texturedata->utexture, texture->format, texture->w / 2, texture->h / 2) < 0) {
if (D3D_RecreateTextureRep(data->device, &texturedata->utexture) < 0) {
return -1;
}
if (D3D_RecreateTextureRep(data->device, &texturedata->vtexture, texture->format, texture->w / 2, texture->h / 2) < 0) {
if (D3D_RecreateTextureRep(data->device, &texturedata->vtexture) < 0) {
return -1;
}
}
@ -1089,7 +896,7 @@ D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
return -1;
}
if (D3D_UpdateTextureRep(data->device, &texturedata->texture, texture->format, rect->x, rect->y, rect->w, rect->h, pixels, pitch) < 0) {
if (D3D_UpdateTextureRep(data->device, &texturedata->texture, rect->x, rect->y, rect->w, rect->h, pixels, pitch) < 0) {
return -1;
}
@ -1097,13 +904,13 @@ D3D_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
/* Skip to the correct offset into the next texture */
pixels = (const void*)((const Uint8*)pixels + rect->h * pitch);
if (D3D_UpdateTextureRep(data->device, texture->format == SDL_PIXELFORMAT_YV12 ? &texturedata->vtexture : &texturedata->utexture, texture->format, rect->x / 2, rect->y / 2, rect->w / 2, rect->h / 2, pixels, pitch / 2) < 0) {
if (D3D_UpdateTextureRep(data->device, texture->format == SDL_PIXELFORMAT_YV12 ? &texturedata->vtexture : &texturedata->utexture, rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, pixels, (pitch + 1) / 2) < 0) {
return -1;
}
/* Skip to the correct offset into the next texture */
pixels = (const void*)((const Uint8*)pixels + (rect->h * pitch)/4);
if (D3D_UpdateTextureRep(data->device, texture->format == SDL_PIXELFORMAT_YV12 ? &texturedata->utexture : &texturedata->vtexture, texture->format, rect->x / 2, rect->y / 2, rect->w / 2, rect->h / 2, pixels, pitch / 2) < 0) {
pixels = (const void*)((const Uint8*)pixels + ((rect->h + 1) / 2) * ((pitch + 1) / 2));
if (D3D_UpdateTextureRep(data->device, texture->format == SDL_PIXELFORMAT_YV12 ? &texturedata->utexture : &texturedata->vtexture, rect->x / 2, (rect->y + 1) / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, pixels, (pitch + 1) / 2) < 0) {
return -1;
}
}
@ -1125,13 +932,13 @@ D3D_UpdateTextureYUV(SDL_Renderer * renderer, SDL_Texture * texture,
return -1;
}
if (D3D_UpdateTextureRep(data->device, &texturedata->texture, texture->format, rect->x, rect->y, rect->w, rect->h, Yplane, Ypitch) < 0) {
if (D3D_UpdateTextureRep(data->device, &texturedata->texture, rect->x, rect->y, rect->w, rect->h, Yplane, Ypitch) < 0) {
return -1;
}
if (D3D_UpdateTextureRep(data->device, &texturedata->utexture, texture->format, rect->x / 2, rect->y / 2, rect->w / 2, rect->h / 2, Uplane, Upitch) < 0) {
if (D3D_UpdateTextureRep(data->device, &texturedata->utexture, rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, Uplane, Upitch) < 0) {
return -1;
}
if (D3D_UpdateTextureRep(data->device, &texturedata->vtexture, texture->format, rect->x / 2, rect->y / 2, rect->w / 2, rect->h / 2, Vplane, Vpitch) < 0) {
if (D3D_UpdateTextureRep(data->device, &texturedata->vtexture, rect->x / 2, rect->y / 2, (rect->w + 1) / 2, (rect->h + 1) / 2, Vplane, Vpitch) < 0) {
return -1;
}
return 0;
@ -1609,13 +1416,60 @@ D3D_UpdateTextureScaleMode(D3D_RenderData *data, D3D_TextureData *texturedata, u
}
}
static int
D3D_RenderSetupTextureState(SDL_Renderer * renderer, SDL_Texture * texture, LPDIRECT3DPIXELSHADER9 *shader)
{
D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
D3D_TextureData *texturedata;
*shader = NULL;
texturedata = (D3D_TextureData *)texture->driverdata;
if (!texturedata) {
SDL_SetError("Texture is not currently available");
return -1;
}
D3D_UpdateTextureScaleMode(data, texturedata, 0);
if (D3D_BindTextureRep(data->device, &texturedata->texture, 0) < 0) {
return -1;
}
if (texturedata->yuv) {
switch (SDL_GetYUVConversionModeForResolution(texture->w, texture->h)) {
case SDL_YUV_CONVERSION_JPEG:
*shader = data->shaders[SHADER_YUV_JPEG];
break;
case SDL_YUV_CONVERSION_BT601:
*shader = data->shaders[SHADER_YUV_BT601];
break;
case SDL_YUV_CONVERSION_BT709:
*shader = data->shaders[SHADER_YUV_BT709];
break;
default:
return SDL_SetError("Unsupported YUV conversion mode");
}
D3D_UpdateTextureScaleMode(data, texturedata, 1);
D3D_UpdateTextureScaleMode(data, texturedata, 2);
if (D3D_BindTextureRep(data->device, &texturedata->utexture, 1) < 0) {
return -1;
}
if (D3D_BindTextureRep(data->device, &texturedata->vtexture, 2) < 0) {
return -1;
}
}
return 0;
}
static int
D3D_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
const SDL_Rect * srcrect, const SDL_FRect * dstrect)
{
D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
D3D_TextureData *texturedata;
LPDIRECT3DPIXELSHADER9 shader = NULL;
LPDIRECT3DPIXELSHADER9 shader;
float minx, miny, maxx, maxy;
float minu, maxu, minv, maxv;
DWORD color;
@ -1626,12 +1480,6 @@ D3D_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
return -1;
}
texturedata = (D3D_TextureData *)texture->driverdata;
if (!texturedata) {
SDL_SetError("Texture is not currently available");
return -1;
}
minx = dstrect->x - 0.5f;
miny = dstrect->y - 0.5f;
maxx = dstrect->x + dstrect->w - 0.5f;
@ -1674,45 +1522,25 @@ D3D_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
D3D_SetBlendMode(data, texture->blendMode);
D3D_UpdateTextureScaleMode(data, texturedata, 0);
if (D3D_BindTextureRep(data->device, &texturedata->texture, 0) < 0) {
if (D3D_RenderSetupTextureState(renderer, texture, &shader) < 0) {
return -1;
}
if (texturedata->yuv) {
shader = data->ps_yuv;
D3D_UpdateTextureScaleMode(data, texturedata, 1);
D3D_UpdateTextureScaleMode(data, texturedata, 2);
if (D3D_BindTextureRep(data->device, &texturedata->utexture, 1) < 0) {
return -1;
}
if (D3D_BindTextureRep(data->device, &texturedata->vtexture, 2) < 0) {
return -1;
}
}
if (shader) {
result = IDirect3DDevice9_SetPixelShader(data->device, shader);
if (FAILED(result)) {
return D3D_SetError("SetShader()", result);
}
}
result =
IDirect3DDevice9_DrawPrimitiveUP(data->device, D3DPT_TRIANGLEFAN, 2,
vertices, sizeof(*vertices));
result = IDirect3DDevice9_DrawPrimitiveUP(data->device, D3DPT_TRIANGLEFAN, 2,
vertices, sizeof(*vertices));
if (FAILED(result)) {
return D3D_SetError("DrawPrimitiveUP()", result);
D3D_SetError("DrawPrimitiveUP()", result);
}
if (shader) {
result = IDirect3DDevice9_SetPixelShader(data->device, NULL);
if (FAILED(result)) {
return D3D_SetError("SetShader()", result);
}
IDirect3DDevice9_SetPixelShader(data->device, NULL);
}
return 0;
return FAILED(result) ? -1 : 0;
}
@ -1722,7 +1550,6 @@ D3D_RenderCopyEx(SDL_Renderer * renderer, SDL_Texture * texture,
const double angle, const SDL_FPoint * center, const SDL_RendererFlip flip)
{
D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
D3D_TextureData *texturedata;
LPDIRECT3DPIXELSHADER9 shader = NULL;
float minx, miny, maxx, maxy;
float minu, maxu, minv, maxv;
@ -1736,12 +1563,6 @@ D3D_RenderCopyEx(SDL_Renderer * renderer, SDL_Texture * texture,
return -1;
}
texturedata = (D3D_TextureData *)texture->driverdata;
if (!texturedata) {
SDL_SetError("Texture is not currently available");
return -1;
}
centerx = center->x;
centery = center->y;
@ -1798,54 +1619,37 @@ D3D_RenderCopyEx(SDL_Renderer * renderer, SDL_Texture * texture,
D3D_SetBlendMode(data, texture->blendMode);
if (D3D_RenderSetupTextureState(renderer, texture, &shader) < 0) {
return -1;
}
/* Rotate and translate */
modelMatrix = MatrixMultiply(
MatrixRotationZ((float)(M_PI * (float) angle / 180.0f)),
MatrixTranslation(dstrect->x + center->x - 0.5f, dstrect->y + center->y - 0.5f, 0));
IDirect3DDevice9_SetTransform(data->device, D3DTS_VIEW, (D3DMATRIX*)&modelMatrix);
D3D_UpdateTextureScaleMode(data, texturedata, 0);
if (D3D_BindTextureRep(data->device, &texturedata->texture, 0) < 0) {
return -1;
}
if (texturedata->yuv) {
shader = data->ps_yuv;
D3D_UpdateTextureScaleMode(data, texturedata, 1);
D3D_UpdateTextureScaleMode(data, texturedata, 2);
if (D3D_BindTextureRep(data->device, &texturedata->utexture, 1) < 0) {
return -1;
}
if (D3D_BindTextureRep(data->device, &texturedata->vtexture, 2) < 0) {
return -1;
}
}
if (shader) {
result = IDirect3DDevice9_SetPixelShader(data->device, shader);
if (FAILED(result)) {
return D3D_SetError("SetShader()", result);
D3D_SetError("SetShader()", result);
goto done;
}
}
result =
IDirect3DDevice9_DrawPrimitiveUP(data->device, D3DPT_TRIANGLEFAN, 2,
vertices, sizeof(*vertices));
result = IDirect3DDevice9_DrawPrimitiveUP(data->device, D3DPT_TRIANGLEFAN, 2,
vertices, sizeof(*vertices));
if (FAILED(result)) {
return D3D_SetError("DrawPrimitiveUP()", result);
D3D_SetError("DrawPrimitiveUP()", result);
}
done:
if (shader) {
result = IDirect3DDevice9_SetPixelShader(data->device, NULL);
if (FAILED(result)) {
return D3D_SetError("SetShader()", result);
}
IDirect3DDevice9_SetPixelShader(data->device, NULL);
}
modelMatrix = MatrixIdentity();
IDirect3DDevice9_SetTransform(data->device, D3DTS_VIEW, (D3DMATRIX*)&modelMatrix);
return 0;
return FAILED(result) ? -1 : 0;
}
static int
@ -1955,6 +1759,8 @@ D3D_DestroyRenderer(SDL_Renderer * renderer)
D3D_RenderData *data = (D3D_RenderData *) renderer->driverdata;
if (data) {
int i;
/* Release the render target */
if (data->defaultRenderTarget) {
IDirect3DSurface9_Release(data->defaultRenderTarget);
@ -1964,11 +1770,15 @@ D3D_DestroyRenderer(SDL_Renderer * renderer)
IDirect3DSurface9_Release(data->currentRenderTarget);
data->currentRenderTarget = NULL;
}
if (data->ps_yuv) {
IDirect3DPixelShader9_Release(data->ps_yuv);
for (i = 0; i < SDL_arraysize(data->shaders); ++i) {
if (data->shaders[i]) {
IDirect3DPixelShader9_Release(data->shaders[i]);
data->shaders[i] = NULL;
}
}
if (data->device) {
IDirect3DDevice9_Release(data->device);
data->device = NULL;
}
if (data->d3d) {
IDirect3D9_Release(data->d3d);

View file

@ -0,0 +1,274 @@
/*
Simple DirectMedia Layer
Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "../../SDL_internal.h"
#include "SDL_render.h"
#include "SDL_system.h"
#if SDL_VIDEO_RENDER_D3D && !SDL_RENDER_DISABLED
#include "../../core/windows/SDL_windows.h"
#include <d3d9.h>
#include "SDL_shaders_d3d.h"
/* The shaders here were compiled with:
fxc /T ps_2_0 /Fo"<OUTPUT FILE>" "<INPUT FILE>"
Shader object code was converted to a list of DWORDs via the following
*nix style command (available separately from Windows + MSVC):
hexdump -v -e '6/4 "0x%08.8x, " "\n"' <FILE>
*/
/* --- D3D9_PixelShader_YUV_JPEG.hlsl ---
Texture2D theTextureY : register(t0);
Texture2D theTextureU : register(t1);
Texture2D theTextureV : register(t2);
SamplerState theSampler = sampler_state
{
addressU = Clamp;
addressV = Clamp;
mipfilter = NONE;
minfilter = LINEAR;
magfilter = LINEAR;
};
struct PixelShaderInput
{
float4 pos : SV_POSITION;
float2 tex : TEXCOORD0;
float4 color : COLOR0;
};
float4 main(PixelShaderInput input) : SV_TARGET
{
const float3 offset = {0.0, -0.501960814, -0.501960814};
const float3 Rcoeff = {1.0000, 0.0000, 1.4020};
const float3 Gcoeff = {1.0000, -0.3441, -0.7141};
const float3 Bcoeff = {1.0000, 1.7720, 0.0000};
float4 Output;
float3 yuv;
yuv.x = theTextureY.Sample(theSampler, input.tex).r;
yuv.y = theTextureU.Sample(theSampler, input.tex).r;
yuv.z = theTextureV.Sample(theSampler, input.tex).r;
yuv += offset;
Output.r = dot(yuv, Rcoeff);
Output.g = dot(yuv, Gcoeff);
Output.b = dot(yuv, Bcoeff);
Output.a = 1.0f;
return Output * input.color;
}
*/
static const DWORD D3D9_PixelShader_YUV_JPEG[] = {
0xffff0200, 0x0044fffe, 0x42415443, 0x0000001c, 0x000000d7, 0xffff0200,
0x00000003, 0x0000001c, 0x00000100, 0x000000d0, 0x00000058, 0x00010003,
0x00000001, 0x00000070, 0x00000000, 0x00000080, 0x00020003, 0x00000001,
0x00000098, 0x00000000, 0x000000a8, 0x00000003, 0x00000001, 0x000000c0,
0x00000000, 0x53656874, 0x6c706d61, 0x742b7265, 0x65546568, 0x72757478,
0xab005565, 0x00070004, 0x00040001, 0x00000001, 0x00000000, 0x53656874,
0x6c706d61, 0x742b7265, 0x65546568, 0x72757478, 0xab005665, 0x00070004,
0x00040001, 0x00000001, 0x00000000, 0x53656874, 0x6c706d61, 0x742b7265,
0x65546568, 0x72757478, 0xab005965, 0x00070004, 0x00040001, 0x00000001,
0x00000000, 0x325f7370, 0x4d00305f, 0x6f726369, 0x74666f73, 0x29522820,
0x534c4820, 0x6853204c, 0x72656461, 0x6d6f4320, 0x656c6970, 0x2e362072,
0x36392e33, 0x312e3030, 0x34383336, 0xababab00, 0x05000051, 0xa00f0000,
0x00000000, 0xbf008081, 0xbf008081, 0x3f800000, 0x05000051, 0xa00f0001,
0x3f800000, 0x00000000, 0x3fb374bc, 0x00000000, 0x05000051, 0xa00f0002,
0x3f800000, 0xbeb02de0, 0xbf36cf42, 0x00000000, 0x05000051, 0xa00f0003,
0x3f800000, 0x3fe2d0e5, 0x00000000, 0x00000000, 0x0200001f, 0x80000000,
0xb0030000, 0x0200001f, 0x80000000, 0x900f0000, 0x0200001f, 0x90000000,
0xa00f0800, 0x0200001f, 0x90000000, 0xa00f0801, 0x0200001f, 0x90000000,
0xa00f0802, 0x03000042, 0x800f0000, 0xb0e40000, 0xa0e40800, 0x03000042,
0x800f0001, 0xb0e40000, 0xa0e40801, 0x03000042, 0x800f0002, 0xb0e40000,
0xa0e40802, 0x02000001, 0x80020000, 0x80000001, 0x02000001, 0x80040000,
0x80000002, 0x03000002, 0x80070000, 0x80e40000, 0xa0e40000, 0x03000008,
0x80010001, 0x80e40000, 0xa0e40001, 0x03000008, 0x80020001, 0x80e40000,
0xa0e40002, 0x0400005a, 0x80040001, 0x80e40000, 0xa0e40003, 0xa0aa0003,
0x02000001, 0x80080001, 0xa0ff0000, 0x03000005, 0x800f0000, 0x80e40001,
0x90e40000, 0x02000001, 0x800f0800, 0x80e40000, 0x0000ffff
};
/* --- D3D9_PixelShader_YUV_BT601.hlsl ---
Texture2D theTextureY : register(t0);
Texture2D theTextureU : register(t1);
Texture2D theTextureV : register(t2);
SamplerState theSampler = sampler_state
{
addressU = Clamp;
addressV = Clamp;
mipfilter = NONE;
minfilter = LINEAR;
magfilter = LINEAR;
};
struct PixelShaderInput
{
float4 pos : SV_POSITION;
float2 tex : TEXCOORD0;
float4 color : COLOR0;
};
float4 main(PixelShaderInput input) : SV_TARGET
{
const float3 offset = {-0.0627451017, -0.501960814, -0.501960814};
const float3 Rcoeff = {1.1644, 0.0000, 1.5960};
const float3 Gcoeff = {1.1644, -0.3918, -0.8130};
const float3 Bcoeff = {1.1644, 2.0172, 0.0000};
float4 Output;
float3 yuv;
yuv.x = theTextureY.Sample(theSampler, input.tex).r;
yuv.y = theTextureU.Sample(theSampler, input.tex).r;
yuv.z = theTextureV.Sample(theSampler, input.tex).r;
yuv += offset;
Output.r = dot(yuv, Rcoeff);
Output.g = dot(yuv, Gcoeff);
Output.b = dot(yuv, Bcoeff);
Output.a = 1.0f;
return Output * input.color;
}
*/
static const DWORD D3D9_PixelShader_YUV_BT601[] = {
0xffff0200, 0x0044fffe, 0x42415443, 0x0000001c, 0x000000d7, 0xffff0200,
0x00000003, 0x0000001c, 0x00000100, 0x000000d0, 0x00000058, 0x00010003,
0x00000001, 0x00000070, 0x00000000, 0x00000080, 0x00020003, 0x00000001,
0x00000098, 0x00000000, 0x000000a8, 0x00000003, 0x00000001, 0x000000c0,
0x00000000, 0x53656874, 0x6c706d61, 0x742b7265, 0x65546568, 0x72757478,
0xab005565, 0x00070004, 0x00040001, 0x00000001, 0x00000000, 0x53656874,
0x6c706d61, 0x742b7265, 0x65546568, 0x72757478, 0xab005665, 0x00070004,
0x00040001, 0x00000001, 0x00000000, 0x53656874, 0x6c706d61, 0x742b7265,
0x65546568, 0x72757478, 0xab005965, 0x00070004, 0x00040001, 0x00000001,
0x00000000, 0x325f7370, 0x4d00305f, 0x6f726369, 0x74666f73, 0x29522820,
0x534c4820, 0x6853204c, 0x72656461, 0x6d6f4320, 0x656c6970, 0x2e362072,
0x36392e33, 0x312e3030, 0x34383336, 0xababab00, 0x05000051, 0xa00f0000,
0xbd808081, 0xbf008081, 0xbf008081, 0x3f800000, 0x05000051, 0xa00f0001,
0x3f950b0f, 0x00000000, 0x3fcc49ba, 0x00000000, 0x05000051, 0xa00f0002,
0x3f950b0f, 0xbec89a02, 0xbf5020c5, 0x00000000, 0x05000051, 0xa00f0003,
0x3f950b0f, 0x400119ce, 0x00000000, 0x00000000, 0x0200001f, 0x80000000,
0xb0030000, 0x0200001f, 0x80000000, 0x900f0000, 0x0200001f, 0x90000000,
0xa00f0800, 0x0200001f, 0x90000000, 0xa00f0801, 0x0200001f, 0x90000000,
0xa00f0802, 0x03000042, 0x800f0000, 0xb0e40000, 0xa0e40800, 0x03000042,
0x800f0001, 0xb0e40000, 0xa0e40801, 0x03000042, 0x800f0002, 0xb0e40000,
0xa0e40802, 0x02000001, 0x80020000, 0x80000001, 0x02000001, 0x80040000,
0x80000002, 0x03000002, 0x80070000, 0x80e40000, 0xa0e40000, 0x03000008,
0x80010001, 0x80e40000, 0xa0e40001, 0x03000008, 0x80020001, 0x80e40000,
0xa0e40002, 0x0400005a, 0x80040001, 0x80e40000, 0xa0e40003, 0xa0aa0003,
0x02000001, 0x80080001, 0xa0ff0000, 0x03000005, 0x800f0000, 0x80e40001,
0x90e40000, 0x02000001, 0x800f0800, 0x80e40000, 0x0000ffff
};
/* --- D3D9_PixelShader_YUV_BT709.hlsl ---
Texture2D theTextureY : register(t0);
Texture2D theTextureU : register(t1);
Texture2D theTextureV : register(t2);
SamplerState theSampler = sampler_state
{
addressU = Clamp;
addressV = Clamp;
mipfilter = NONE;
minfilter = LINEAR;
magfilter = LINEAR;
};
struct PixelShaderInput
{
float4 pos : SV_POSITION;
float2 tex : TEXCOORD0;
float4 color : COLOR0;
};
float4 main(PixelShaderInput input) : SV_TARGET
{
const float3 offset = {-0.0627451017, -0.501960814, -0.501960814};
const float3 Rcoeff = {1.1644, 0.0000, 1.7927};
const float3 Gcoeff = {1.1644, -0.2132, -0.5329};
const float3 Bcoeff = {1.1644, 2.1124, 0.0000};
float4 Output;
float3 yuv;
yuv.x = theTextureY.Sample(theSampler, input.tex).r;
yuv.y = theTextureU.Sample(theSampler, input.tex).r;
yuv.z = theTextureV.Sample(theSampler, input.tex).r;
yuv += offset;
Output.r = dot(yuv, Rcoeff);
Output.g = dot(yuv, Gcoeff);
Output.b = dot(yuv, Bcoeff);
Output.a = 1.0f;
return Output * input.color;
}
*/
static const DWORD D3D9_PixelShader_YUV_BT709[] = {
0xffff0200, 0x0044fffe, 0x42415443, 0x0000001c, 0x000000d7, 0xffff0200,
0x00000003, 0x0000001c, 0x00000100, 0x000000d0, 0x00000058, 0x00010003,
0x00000001, 0x00000070, 0x00000000, 0x00000080, 0x00020003, 0x00000001,
0x00000098, 0x00000000, 0x000000a8, 0x00000003, 0x00000001, 0x000000c0,
0x00000000, 0x53656874, 0x6c706d61, 0x742b7265, 0x65546568, 0x72757478,
0xab005565, 0x00070004, 0x00040001, 0x00000001, 0x00000000, 0x53656874,
0x6c706d61, 0x742b7265, 0x65546568, 0x72757478, 0xab005665, 0x00070004,
0x00040001, 0x00000001, 0x00000000, 0x53656874, 0x6c706d61, 0x742b7265,
0x65546568, 0x72757478, 0xab005965, 0x00070004, 0x00040001, 0x00000001,
0x00000000, 0x325f7370, 0x4d00305f, 0x6f726369, 0x74666f73, 0x29522820,
0x534c4820, 0x6853204c, 0x72656461, 0x6d6f4320, 0x656c6970, 0x2e362072,
0x36392e33, 0x312e3030, 0x34383336, 0xababab00, 0x05000051, 0xa00f0000,
0xbd808081, 0xbf008081, 0xbf008081, 0x3f800000, 0x05000051, 0xa00f0001,
0x3f950b0f, 0x00000000, 0x3fe57732, 0x00000000, 0x05000051, 0xa00f0002,
0x3f950b0f, 0xbe5a511a, 0xbf086c22, 0x00000000, 0x05000051, 0xa00f0003,
0x3f950b0f, 0x40073190, 0x00000000, 0x00000000, 0x0200001f, 0x80000000,
0xb0030000, 0x0200001f, 0x80000000, 0x900f0000, 0x0200001f, 0x90000000,
0xa00f0800, 0x0200001f, 0x90000000, 0xa00f0801, 0x0200001f, 0x90000000,
0xa00f0802, 0x03000042, 0x800f0000, 0xb0e40000, 0xa0e40800, 0x03000042,
0x800f0001, 0xb0e40000, 0xa0e40801, 0x03000042, 0x800f0002, 0xb0e40000,
0xa0e40802, 0x02000001, 0x80020000, 0x80000001, 0x02000001, 0x80040000,
0x80000002, 0x03000002, 0x80070000, 0x80e40000, 0xa0e40000, 0x03000008,
0x80010001, 0x80e40000, 0xa0e40001, 0x03000008, 0x80020001, 0x80e40000,
0xa0e40002, 0x0400005a, 0x80040001, 0x80e40000, 0xa0e40003, 0xa0aa0003,
0x02000001, 0x80080001, 0xa0ff0000, 0x03000005, 0x800f0000, 0x80e40001,
0x90e40000, 0x02000001, 0x800f0800, 0x80e40000, 0x0000ffff
};
static const DWORD *D3D9_shaders[] = {
D3D9_PixelShader_YUV_JPEG,
D3D9_PixelShader_YUV_BT601,
D3D9_PixelShader_YUV_BT709,
};
HRESULT D3D9_CreatePixelShader(IDirect3DDevice9 *d3dDevice, D3D9_Shader shader, IDirect3DPixelShader9 **pixelShader)
{
return IDirect3DDevice9_CreatePixelShader(d3dDevice, D3D9_shaders[shader], pixelShader);
}
#endif /* SDL_VIDEO_RENDER_D3D && !SDL_RENDER_DISABLED */
/* vi: set ts=4 sw=4 expandtab: */

View file

@ -18,11 +18,17 @@
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "../SDL_internal.h"
#include "../../SDL_internal.h"
/* FIXME: This breaks on various versions of GCC and should be rewritten using intrinsics */
#if 0 /* (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES && !defined(__clang__) */
#define USE_MMX_ASSEMBLY 1
#endif
/* D3D9 shader implementation */
typedef enum {
SHADER_YUV_JPEG,
SHADER_YUV_BT601,
SHADER_YUV_BT709,
NUM_SHADERS
} D3D9_Shader;
extern HRESULT D3D9_CreatePixelShader(IDirect3DDevice9 *d3dDevice, D3D9_Shader shader, IDirect3DPixelShader9 **pixelShader);
/* vi: set ts=4 sw=4 expandtab: */

851
src/render/direct3d11/SDL_render_d3d11.c Normal file → Executable file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,43 @@
/*
Simple DirectMedia Layer
Copyright (C) 1997-2017 Sam Lantinga <slouken@libsdl.org>
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
*/
#include "../../SDL_internal.h"
/* D3D11 shader implementation */
typedef enum {
SHADER_SOLID,
SHADER_RGB,
SHADER_YUV_JPEG,
SHADER_YUV_BT601,
SHADER_YUV_BT709,
SHADER_NV12_JPEG,
SHADER_NV12_BT601,
SHADER_NV12_BT709,
SHADER_NV21_JPEG,
SHADER_NV21_BT601,
SHADER_NV21_BT709,
NUM_SHADERS
} D3D11_Shader;
extern int D3D11_CreateVertexShader(ID3D11Device1 *d3dDevice, ID3D11VertexShader **vertexShader, ID3D11InputLayout **inputLayout);
extern int D3D11_CreatePixelShader(ID3D11Device1 *d3dDevice, D3D11_Shader shader, ID3D11PixelShader **pixelShader);
/* vi: set ts=4 sw=4 expandtab: */

View file

@ -1,642 +0,0 @@
/* mmx.h
MultiMedia eXtensions GCC interface library for IA32.
To use this library, simply include this header file
and compile with GCC. You MUST have inlining enabled
in order for mmx_ok() to work; this can be done by
simply using -O on the GCC command line.
Compiling with -DMMX_TRACE will cause detailed trace
output to be sent to stderr for each mmx operation.
This adds lots of code, and obviously slows execution to
a crawl, but can be very useful for debugging.
THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY
EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT
LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTABILITY
AND FITNESS FOR ANY PARTICULAR PURPOSE.
1997-99 by H. Dietz and R. Fisher
Notes:
It appears that the latest gas has the pand problem fixed, therefore
I'll undefine BROKEN_PAND by default.
*/
#ifndef _MMX_H
#define _MMX_H
/* Warning: at this writing, the version of GAS packaged
with most Linux distributions does not handle the
parallel AND operation mnemonic correctly. If the
symbol BROKEN_PAND is defined, a slower alternative
coding will be used. If execution of mmxtest results
in an illegal instruction fault, define this symbol.
*/
#undef BROKEN_PAND
/* The type of an value that fits in an MMX register
(note that long long constant values MUST be suffixed
by LL and unsigned long long values by ULL, lest
they be truncated by the compiler)
*/
typedef union
{
long long q; /* Quadword (64-bit) value */
unsigned long long uq; /* Unsigned Quadword */
int d[2]; /* 2 Doubleword (32-bit) values */
unsigned int ud[2]; /* 2 Unsigned Doubleword */
short w[4]; /* 4 Word (16-bit) values */
unsigned short uw[4]; /* 4 Unsigned Word */
char b[8]; /* 8 Byte (8-bit) values */
unsigned char ub[8]; /* 8 Unsigned Byte */
float s[2]; /* Single-precision (32-bit) value */
} __attribute__ ((aligned(8))) mmx_t; /* On an 8-byte (64-bit) boundary */
#if 0
/* Function to test if multimedia instructions are supported...
*/
inline extern int
mm_support(void)
{
/* Returns 1 if MMX instructions are supported,
3 if Cyrix MMX and Extended MMX instructions are supported
5 if AMD MMX and 3DNow! instructions are supported
0 if hardware does not support any of these
*/
register int rval = 0;
__asm__ __volatile__(
/* See if CPUID instruction is supported ... */
/* ... Get copies of EFLAGS into eax and ecx */
"pushf\n\t"
"popl %%eax\n\t" "movl %%eax, %%ecx\n\t"
/* ... Toggle the ID bit in one copy and store */
/* to the EFLAGS reg */
"xorl $0x200000, %%eax\n\t"
"push %%eax\n\t" "popf\n\t"
/* ... Get the (hopefully modified) EFLAGS */
"pushf\n\t" "popl %%eax\n\t"
/* ... Compare and test result */
"xorl %%eax, %%ecx\n\t" "testl $0x200000, %%ecx\n\t" "jz NotSupported1\n\t" /* CPUID not supported */
/* Get standard CPUID information, and
go to a specific vendor section */
"movl $0, %%eax\n\t" "cpuid\n\t"
/* Check for Intel */
"cmpl $0x756e6547, %%ebx\n\t"
"jne TryAMD\n\t"
"cmpl $0x49656e69, %%edx\n\t"
"jne TryAMD\n\t"
"cmpl $0x6c65746e, %%ecx\n"
"jne TryAMD\n\t" "jmp Intel\n\t"
/* Check for AMD */
"\nTryAMD:\n\t"
"cmpl $0x68747541, %%ebx\n\t"
"jne TryCyrix\n\t"
"cmpl $0x69746e65, %%edx\n\t"
"jne TryCyrix\n\t"
"cmpl $0x444d4163, %%ecx\n"
"jne TryCyrix\n\t" "jmp AMD\n\t"
/* Check for Cyrix */
"\nTryCyrix:\n\t"
"cmpl $0x69727943, %%ebx\n\t"
"jne NotSupported2\n\t"
"cmpl $0x736e4978, %%edx\n\t"
"jne NotSupported3\n\t"
"cmpl $0x64616574, %%ecx\n\t"
"jne NotSupported4\n\t"
/* Drop through to Cyrix... */
/* Cyrix Section */
/* See if extended CPUID level 80000001 is supported */
/* The value of CPUID/80000001 for the 6x86MX is undefined
according to the Cyrix CPU Detection Guide (Preliminary
Rev. 1.01 table 1), so we'll check the value of eax for
CPUID/0 to see if standard CPUID level 2 is supported.
According to the table, the only CPU which supports level
2 is also the only one which supports extended CPUID levels.
*/
"cmpl $0x2, %%eax\n\t" "jne MMXtest\n\t" /* Use standard CPUID instead */
/* Extended CPUID supported (in theory), so get extended
features */
"movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%eax\n\t" /* Test for MMX */
"jz NotSupported5\n\t" /* MMX not supported */
"testl $0x01000000, %%eax\n\t" /* Test for Ext'd MMX */
"jnz EMMXSupported\n\t" "movl $1, %0:\n\n\t" /* MMX Supported */
"jmp Return\n\n" "EMMXSupported:\n\t" "movl $3, %0:\n\n\t" /* EMMX and MMX Supported */
"jmp Return\n\t"
/* AMD Section */
"AMD:\n\t"
/* See if extended CPUID is supported */
"movl $0x80000000, %%eax\n\t" "cpuid\n\t" "cmpl $0x80000000, %%eax\n\t" "jl MMXtest\n\t" /* Use standard CPUID instead */
/* Extended CPUID supported, so get extended features */
"movl $0x80000001, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t" /* Test for MMX */
"jz NotSupported6\n\t" /* MMX not supported */
"testl $0x80000000, %%edx\n\t" /* Test for 3DNow! */
"jnz ThreeDNowSupported\n\t" "movl $1, %0:\n\n\t" /* MMX Supported */
"jmp Return\n\n" "ThreeDNowSupported:\n\t" "movl $5, %0:\n\n\t" /* 3DNow! and MMX Supported */
"jmp Return\n\t"
/* Intel Section */
"Intel:\n\t"
/* Check for MMX */
"MMXtest:\n\t" "movl $1, %%eax\n\t" "cpuid\n\t" "testl $0x00800000, %%edx\n\t" /* Test for MMX */
"jz NotSupported7\n\t" /* MMX Not supported */
"movl $1, %0:\n\n\t" /* MMX Supported */
"jmp Return\n\t"
/* Nothing supported */
"\nNotSupported1:\n\t" "#movl $101, %0:\n\n\t" "\nNotSupported2:\n\t" "#movl $102, %0:\n\n\t" "\nNotSupported3:\n\t" "#movl $103, %0:\n\n\t" "\nNotSupported4:\n\t" "#movl $104, %0:\n\n\t" "\nNotSupported5:\n\t" "#movl $105, %0:\n\n\t" "\nNotSupported6:\n\t" "#movl $106, %0:\n\n\t" "\nNotSupported7:\n\t" "#movl $107, %0:\n\n\t" "movl $0, %0:\n\n\t" "Return:\n\t":"=a"(rval): /* no input */
:"eax", "ebx", "ecx", "edx");
/* Return */
return (rval);
}
/* Function to test if mmx instructions are supported...
*/
inline extern int
mmx_ok(void)
{
/* Returns 1 if MMX instructions are supported, 0 otherwise */
return (mm_support() & 0x1);
}
#endif
/* Helper functions for the instruction macros that follow...
(note that memory-to-register, m2r, instructions are nearly
as efficient as register-to-register, r2r, instructions;
however, memory-to-memory instructions are really simulated
as a convenience, and are only 1/3 as efficient)
*/
#ifdef MMX_TRACE
/* Include the stuff for printing a trace to stderr...
*/
#define mmx_i2r(op, imm, reg) \
{ \
mmx_t mmx_trace; \
mmx_trace.uq = (imm); \
printf(#op "_i2r(" #imm "=0x%08x%08x, ", \
mmx_trace.d[1], mmx_trace.d[0]); \
__asm__ __volatile__ ("movq %%" #reg ", %0" \
: "=X" (mmx_trace) \
: /* nothing */ ); \
printf(#reg "=0x%08x%08x) => ", \
mmx_trace.d[1], mmx_trace.d[0]); \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
: "X" (imm)); \
__asm__ __volatile__ ("movq %%" #reg ", %0" \
: "=X" (mmx_trace) \
: /* nothing */ ); \
printf(#reg "=0x%08x%08x\n", \
mmx_trace.d[1], mmx_trace.d[0]); \
}
#define mmx_m2r(op, mem, reg) \
{ \
mmx_t mmx_trace; \
mmx_trace = (mem); \
printf(#op "_m2r(" #mem "=0x%08x%08x, ", \
mmx_trace.d[1], mmx_trace.d[0]); \
__asm__ __volatile__ ("movq %%" #reg ", %0" \
: "=X" (mmx_trace) \
: /* nothing */ ); \
printf(#reg "=0x%08x%08x) => ", \
mmx_trace.d[1], mmx_trace.d[0]); \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
: "X" (mem)); \
__asm__ __volatile__ ("movq %%" #reg ", %0" \
: "=X" (mmx_trace) \
: /* nothing */ ); \
printf(#reg "=0x%08x%08x\n", \
mmx_trace.d[1], mmx_trace.d[0]); \
}
#define mmx_r2m(op, reg, mem) \
{ \
mmx_t mmx_trace; \
__asm__ __volatile__ ("movq %%" #reg ", %0" \
: "=X" (mmx_trace) \
: /* nothing */ ); \
printf(#op "_r2m(" #reg "=0x%08x%08x, ", \
mmx_trace.d[1], mmx_trace.d[0]); \
mmx_trace = (mem); \
printf(#mem "=0x%08x%08x) => ", \
mmx_trace.d[1], mmx_trace.d[0]); \
__asm__ __volatile__ (#op " %%" #reg ", %0" \
: "=X" (mem) \
: /* nothing */ ); \
mmx_trace = (mem); \
printf(#mem "=0x%08x%08x\n", \
mmx_trace.d[1], mmx_trace.d[0]); \
}
#define mmx_r2r(op, regs, regd) \
{ \
mmx_t mmx_trace; \
__asm__ __volatile__ ("movq %%" #regs ", %0" \
: "=X" (mmx_trace) \
: /* nothing */ ); \
printf(#op "_r2r(" #regs "=0x%08x%08x, ", \
mmx_trace.d[1], mmx_trace.d[0]); \
__asm__ __volatile__ ("movq %%" #regd ", %0" \
: "=X" (mmx_trace) \
: /* nothing */ ); \
printf(#regd "=0x%08x%08x) => ", \
mmx_trace.d[1], mmx_trace.d[0]); \
__asm__ __volatile__ (#op " %" #regs ", %" #regd); \
__asm__ __volatile__ ("movq %%" #regd ", %0" \
: "=X" (mmx_trace) \
: /* nothing */ ); \
printf(#regd "=0x%08x%08x\n", \
mmx_trace.d[1], mmx_trace.d[0]); \
}
#define mmx_m2m(op, mems, memd) \
{ \
mmx_t mmx_trace; \
mmx_trace = (mems); \
printf(#op "_m2m(" #mems "=0x%08x%08x, ", \
mmx_trace.d[1], mmx_trace.d[0]); \
mmx_trace = (memd); \
printf(#memd "=0x%08x%08x) => ", \
mmx_trace.d[1], mmx_trace.d[0]); \
__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
#op " %1, %%mm0\n\t" \
"movq %%mm0, %0" \
: "=X" (memd) \
: "X" (mems)); \
mmx_trace = (memd); \
printf(#memd "=0x%08x%08x\n", \
mmx_trace.d[1], mmx_trace.d[0]); \
}
#else
/* These macros are a lot simpler without the tracing...
*/
#define mmx_i2r(op, imm, reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
: "X" (imm) )
#define mmx_m2r(op, mem, reg) \
__asm__ __volatile__ (#op " %0, %%" #reg \
: /* nothing */ \
: "m" (mem))
#define mmx_r2m(op, reg, mem) \
__asm__ __volatile__ (#op " %%" #reg ", %0" \
: "=m" (mem) \
: /* nothing */ )
#define mmx_r2r(op, regs, regd) \
__asm__ __volatile__ (#op " %" #regs ", %" #regd)
#define mmx_m2m(op, mems, memd) \
__asm__ __volatile__ ("movq %0, %%mm0\n\t" \
#op " %1, %%mm0\n\t" \
"movq %%mm0, %0" \
: "=X" (memd) \
: "X" (mems))
#endif
/* 1x64 MOVe Quadword
(this is both a load and a store...
in fact, it is the only way to store)
*/
#define movq_m2r(var, reg) mmx_m2r(movq, var, reg)
#define movq_r2m(reg, var) mmx_r2m(movq, reg, var)
#define movq_r2r(regs, regd) mmx_r2r(movq, regs, regd)
#define movq(vars, vard) \
__asm__ __volatile__ ("movq %1, %%mm0\n\t" \
"movq %%mm0, %0" \
: "=X" (vard) \
: "X" (vars))
/* 1x32 MOVe Doubleword
(like movq, this is both load and store...
but is most useful for moving things between
mmx registers and ordinary registers)
*/
#define movd_m2r(var, reg) mmx_m2r(movd, var, reg)
#define movd_r2m(reg, var) mmx_r2m(movd, reg, var)
#define movd_r2r(regs, regd) mmx_r2r(movd, regs, regd)
#define movd(vars, vard) \
__asm__ __volatile__ ("movd %1, %%mm0\n\t" \
"movd %%mm0, %0" \
: "=X" (vard) \
: "X" (vars))
/* 2x32, 4x16, and 8x8 Parallel ADDs
*/
#define paddd_m2r(var, reg) mmx_m2r(paddd, var, reg)
#define paddd_r2r(regs, regd) mmx_r2r(paddd, regs, regd)
#define paddd(vars, vard) mmx_m2m(paddd, vars, vard)
#define paddw_m2r(var, reg) mmx_m2r(paddw, var, reg)
#define paddw_r2r(regs, regd) mmx_r2r(paddw, regs, regd)
#define paddw(vars, vard) mmx_m2m(paddw, vars, vard)
#define paddb_m2r(var, reg) mmx_m2r(paddb, var, reg)
#define paddb_r2r(regs, regd) mmx_r2r(paddb, regs, regd)
#define paddb(vars, vard) mmx_m2m(paddb, vars, vard)
/* 4x16 and 8x8 Parallel ADDs using Saturation arithmetic
*/
#define paddsw_m2r(var, reg) mmx_m2r(paddsw, var, reg)
#define paddsw_r2r(regs, regd) mmx_r2r(paddsw, regs, regd)
#define paddsw(vars, vard) mmx_m2m(paddsw, vars, vard)
#define paddsb_m2r(var, reg) mmx_m2r(paddsb, var, reg)
#define paddsb_r2r(regs, regd) mmx_r2r(paddsb, regs, regd)
#define paddsb(vars, vard) mmx_m2m(paddsb, vars, vard)
/* 4x16 and 8x8 Parallel ADDs using Unsigned Saturation arithmetic
*/
#define paddusw_m2r(var, reg) mmx_m2r(paddusw, var, reg)
#define paddusw_r2r(regs, regd) mmx_r2r(paddusw, regs, regd)
#define paddusw(vars, vard) mmx_m2m(paddusw, vars, vard)
#define paddusb_m2r(var, reg) mmx_m2r(paddusb, var, reg)
#define paddusb_r2r(regs, regd) mmx_r2r(paddusb, regs, regd)
#define paddusb(vars, vard) mmx_m2m(paddusb, vars, vard)
/* 2x32, 4x16, and 8x8 Parallel SUBs
*/
#define psubd_m2r(var, reg) mmx_m2r(psubd, var, reg)
#define psubd_r2r(regs, regd) mmx_r2r(psubd, regs, regd)
#define psubd(vars, vard) mmx_m2m(psubd, vars, vard)
#define psubw_m2r(var, reg) mmx_m2r(psubw, var, reg)
#define psubw_r2r(regs, regd) mmx_r2r(psubw, regs, regd)
#define psubw(vars, vard) mmx_m2m(psubw, vars, vard)
#define psubb_m2r(var, reg) mmx_m2r(psubb, var, reg)
#define psubb_r2r(regs, regd) mmx_r2r(psubb, regs, regd)
#define psubb(vars, vard) mmx_m2m(psubb, vars, vard)
/* 4x16 and 8x8 Parallel SUBs using Saturation arithmetic
*/
#define psubsw_m2r(var, reg) mmx_m2r(psubsw, var, reg)
#define psubsw_r2r(regs, regd) mmx_r2r(psubsw, regs, regd)
#define psubsw(vars, vard) mmx_m2m(psubsw, vars, vard)
#define psubsb_m2r(var, reg) mmx_m2r(psubsb, var, reg)
#define psubsb_r2r(regs, regd) mmx_r2r(psubsb, regs, regd)
#define psubsb(vars, vard) mmx_m2m(psubsb, vars, vard)
/* 4x16 and 8x8 Parallel SUBs using Unsigned Saturation arithmetic
*/
#define psubusw_m2r(var, reg) mmx_m2r(psubusw, var, reg)
#define psubusw_r2r(regs, regd) mmx_r2r(psubusw, regs, regd)
#define psubusw(vars, vard) mmx_m2m(psubusw, vars, vard)
#define psubusb_m2r(var, reg) mmx_m2r(psubusb, var, reg)
#define psubusb_r2r(regs, regd) mmx_r2r(psubusb, regs, regd)
#define psubusb(vars, vard) mmx_m2m(psubusb, vars, vard)
/* 4x16 Parallel MULs giving Low 4x16 portions of results
*/
#define pmullw_m2r(var, reg) mmx_m2r(pmullw, var, reg)
#define pmullw_r2r(regs, regd) mmx_r2r(pmullw, regs, regd)
#define pmullw(vars, vard) mmx_m2m(pmullw, vars, vard)
/* 4x16 Parallel MULs giving High 4x16 portions of results
*/
#define pmulhw_m2r(var, reg) mmx_m2r(pmulhw, var, reg)
#define pmulhw_r2r(regs, regd) mmx_r2r(pmulhw, regs, regd)
#define pmulhw(vars, vard) mmx_m2m(pmulhw, vars, vard)
/* 4x16->2x32 Parallel Mul-ADD
(muls like pmullw, then adds adjacent 16-bit fields
in the multiply result to make the final 2x32 result)
*/
#define pmaddwd_m2r(var, reg) mmx_m2r(pmaddwd, var, reg)
#define pmaddwd_r2r(regs, regd) mmx_r2r(pmaddwd, regs, regd)
#define pmaddwd(vars, vard) mmx_m2m(pmaddwd, vars, vard)
/* 1x64 bitwise AND
*/
#ifdef BROKEN_PAND
#define pand_m2r(var, reg) \
{ \
mmx_m2r(pandn, (mmx_t) -1LL, reg); \
mmx_m2r(pandn, var, reg); \
}
#define pand_r2r(regs, regd) \
{ \
mmx_m2r(pandn, (mmx_t) -1LL, regd); \
mmx_r2r(pandn, regs, regd) \
}
#define pand(vars, vard) \
{ \
movq_m2r(vard, mm0); \
mmx_m2r(pandn, (mmx_t) -1LL, mm0); \
mmx_m2r(pandn, vars, mm0); \
movq_r2m(mm0, vard); \
}
#else
#define pand_m2r(var, reg) mmx_m2r(pand, var, reg)
#define pand_r2r(regs, regd) mmx_r2r(pand, regs, regd)
#define pand(vars, vard) mmx_m2m(pand, vars, vard)
#endif
/* 1x64 bitwise AND with Not the destination
*/
#define pandn_m2r(var, reg) mmx_m2r(pandn, var, reg)
#define pandn_r2r(regs, regd) mmx_r2r(pandn, regs, regd)
#define pandn(vars, vard) mmx_m2m(pandn, vars, vard)
/* 1x64 bitwise OR
*/
#define por_m2r(var, reg) mmx_m2r(por, var, reg)
#define por_r2r(regs, regd) mmx_r2r(por, regs, regd)
#define por(vars, vard) mmx_m2m(por, vars, vard)
/* 1x64 bitwise eXclusive OR
*/
#define pxor_m2r(var, reg) mmx_m2r(pxor, var, reg)
#define pxor_r2r(regs, regd) mmx_r2r(pxor, regs, regd)
#define pxor(vars, vard) mmx_m2m(pxor, vars, vard)
/* 2x32, 4x16, and 8x8 Parallel CoMPare for EQuality
(resulting fields are either 0 or -1)
*/
#define pcmpeqd_m2r(var, reg) mmx_m2r(pcmpeqd, var, reg)
#define pcmpeqd_r2r(regs, regd) mmx_r2r(pcmpeqd, regs, regd)
#define pcmpeqd(vars, vard) mmx_m2m(pcmpeqd, vars, vard)
#define pcmpeqw_m2r(var, reg) mmx_m2r(pcmpeqw, var, reg)
#define pcmpeqw_r2r(regs, regd) mmx_r2r(pcmpeqw, regs, regd)
#define pcmpeqw(vars, vard) mmx_m2m(pcmpeqw, vars, vard)
#define pcmpeqb_m2r(var, reg) mmx_m2r(pcmpeqb, var, reg)
#define pcmpeqb_r2r(regs, regd) mmx_r2r(pcmpeqb, regs, regd)
#define pcmpeqb(vars, vard) mmx_m2m(pcmpeqb, vars, vard)
/* 2x32, 4x16, and 8x8 Parallel CoMPare for Greater Than
(resulting fields are either 0 or -1)
*/
#define pcmpgtd_m2r(var, reg) mmx_m2r(pcmpgtd, var, reg)
#define pcmpgtd_r2r(regs, regd) mmx_r2r(pcmpgtd, regs, regd)
#define pcmpgtd(vars, vard) mmx_m2m(pcmpgtd, vars, vard)
#define pcmpgtw_m2r(var, reg) mmx_m2r(pcmpgtw, var, reg)
#define pcmpgtw_r2r(regs, regd) mmx_r2r(pcmpgtw, regs, regd)
#define pcmpgtw(vars, vard) mmx_m2m(pcmpgtw, vars, vard)
#define pcmpgtb_m2r(var, reg) mmx_m2r(pcmpgtb, var, reg)
#define pcmpgtb_r2r(regs, regd) mmx_r2r(pcmpgtb, regs, regd)
#define pcmpgtb(vars, vard) mmx_m2m(pcmpgtb, vars, vard)
/* 1x64, 2x32, and 4x16 Parallel Shift Left Logical
*/
#define psllq_i2r(imm, reg) mmx_i2r(psllq, imm, reg)
#define psllq_m2r(var, reg) mmx_m2r(psllq, var, reg)
#define psllq_r2r(regs, regd) mmx_r2r(psllq, regs, regd)
#define psllq(vars, vard) mmx_m2m(psllq, vars, vard)
#define pslld_i2r(imm, reg) mmx_i2r(pslld, imm, reg)
#define pslld_m2r(var, reg) mmx_m2r(pslld, var, reg)
#define pslld_r2r(regs, regd) mmx_r2r(pslld, regs, regd)
#define pslld(vars, vard) mmx_m2m(pslld, vars, vard)
#define psllw_i2r(imm, reg) mmx_i2r(psllw, imm, reg)
#define psllw_m2r(var, reg) mmx_m2r(psllw, var, reg)
#define psllw_r2r(regs, regd) mmx_r2r(psllw, regs, regd)
#define psllw(vars, vard) mmx_m2m(psllw, vars, vard)
/* 1x64, 2x32, and 4x16 Parallel Shift Right Logical
*/
#define psrlq_i2r(imm, reg) mmx_i2r(psrlq, imm, reg)
#define psrlq_m2r(var, reg) mmx_m2r(psrlq, var, reg)
#define psrlq_r2r(regs, regd) mmx_r2r(psrlq, regs, regd)
#define psrlq(vars, vard) mmx_m2m(psrlq, vars, vard)
#define psrld_i2r(imm, reg) mmx_i2r(psrld, imm, reg)
#define psrld_m2r(var, reg) mmx_m2r(psrld, var, reg)
#define psrld_r2r(regs, regd) mmx_r2r(psrld, regs, regd)
#define psrld(vars, vard) mmx_m2m(psrld, vars, vard)
#define psrlw_i2r(imm, reg) mmx_i2r(psrlw, imm, reg)
#define psrlw_m2r(var, reg) mmx_m2r(psrlw, var, reg)
#define psrlw_r2r(regs, regd) mmx_r2r(psrlw, regs, regd)
#define psrlw(vars, vard) mmx_m2m(psrlw, vars, vard)
/* 2x32 and 4x16 Parallel Shift Right Arithmetic
*/
#define psrad_i2r(imm, reg) mmx_i2r(psrad, imm, reg)
#define psrad_m2r(var, reg) mmx_m2r(psrad, var, reg)
#define psrad_r2r(regs, regd) mmx_r2r(psrad, regs, regd)
#define psrad(vars, vard) mmx_m2m(psrad, vars, vard)
#define psraw_i2r(imm, reg) mmx_i2r(psraw, imm, reg)
#define psraw_m2r(var, reg) mmx_m2r(psraw, var, reg)
#define psraw_r2r(regs, regd) mmx_r2r(psraw, regs, regd)
#define psraw(vars, vard) mmx_m2m(psraw, vars, vard)
/* 2x32->4x16 and 4x16->8x8 PACK and Signed Saturate
(packs source and dest fields into dest in that order)
*/
#define packssdw_m2r(var, reg) mmx_m2r(packssdw, var, reg)
#define packssdw_r2r(regs, regd) mmx_r2r(packssdw, regs, regd)
#define packssdw(vars, vard) mmx_m2m(packssdw, vars, vard)
#define packsswb_m2r(var, reg) mmx_m2r(packsswb, var, reg)
#define packsswb_r2r(regs, regd) mmx_r2r(packsswb, regs, regd)
#define packsswb(vars, vard) mmx_m2m(packsswb, vars, vard)
/* 4x16->8x8 PACK and Unsigned Saturate
(packs source and dest fields into dest in that order)
*/
#define packuswb_m2r(var, reg) mmx_m2r(packuswb, var, reg)
#define packuswb_r2r(regs, regd) mmx_r2r(packuswb, regs, regd)
#define packuswb(vars, vard) mmx_m2m(packuswb, vars, vard)
/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK Low
(interleaves low half of dest with low half of source
as padding in each result field)
*/
#define punpckldq_m2r(var, reg) mmx_m2r(punpckldq, var, reg)
#define punpckldq_r2r(regs, regd) mmx_r2r(punpckldq, regs, regd)
#define punpckldq(vars, vard) mmx_m2m(punpckldq, vars, vard)
#define punpcklwd_m2r(var, reg) mmx_m2r(punpcklwd, var, reg)
#define punpcklwd_r2r(regs, regd) mmx_r2r(punpcklwd, regs, regd)
#define punpcklwd(vars, vard) mmx_m2m(punpcklwd, vars, vard)
#define punpcklbw_m2r(var, reg) mmx_m2r(punpcklbw, var, reg)
#define punpcklbw_r2r(regs, regd) mmx_r2r(punpcklbw, regs, regd)
#define punpcklbw(vars, vard) mmx_m2m(punpcklbw, vars, vard)
/* 2x32->1x64, 4x16->2x32, and 8x8->4x16 UNPaCK High
(interleaves high half of dest with high half of source
as padding in each result field)
*/
#define punpckhdq_m2r(var, reg) mmx_m2r(punpckhdq, var, reg)
#define punpckhdq_r2r(regs, regd) mmx_r2r(punpckhdq, regs, regd)
#define punpckhdq(vars, vard) mmx_m2m(punpckhdq, vars, vard)
#define punpckhwd_m2r(var, reg) mmx_m2r(punpckhwd, var, reg)
#define punpckhwd_r2r(regs, regd) mmx_r2r(punpckhwd, regs, regd)
#define punpckhwd(vars, vard) mmx_m2m(punpckhwd, vars, vard)
#define punpckhbw_m2r(var, reg) mmx_m2r(punpckhbw, var, reg)
#define punpckhbw_r2r(regs, regd) mmx_r2r(punpckhbw, regs, regd)
#define punpckhbw(vars, vard) mmx_m2m(punpckhbw, vars, vard)
/* Empty MMx State
(used to clean-up when going from mmx to float use
of the registers that are shared by both; note that
there is no float-to-mmx operation needed, because
only the float tag word info is corruptible)
*/
#ifdef MMX_TRACE
#define emms() \
{ \
printf("emms()\n"); \
__asm__ __volatile__ ("emms"); \
}
#else
#define emms() __asm__ __volatile__ ("emms")
#endif
#endif
/* vi: set ts=4 sw=4 expandtab: */

View file

@ -1349,13 +1349,37 @@ GL_SetupCopy(SDL_Renderer * renderer, SDL_Texture * texture)
GL_SetBlendMode(data, texture->blendMode);
if (texturedata->yuv) {
GL_SetShader(data, SHADER_YUV);
} else if (texturedata->nv12) {
if (texture->format == SDL_PIXELFORMAT_NV12) {
GL_SetShader(data, SHADER_NV12);
} else {
GL_SetShader(data, SHADER_NV21);
if (texturedata->yuv || texturedata->nv12) {
switch (SDL_GetYUVConversionModeForResolution(texture->w, texture->h)) {
case SDL_YUV_CONVERSION_JPEG:
if (texturedata->yuv) {
GL_SetShader(data, SHADER_YUV_JPEG);
} else if (texture->format == SDL_PIXELFORMAT_NV12) {
GL_SetShader(data, SHADER_NV12_JPEG);
} else {
GL_SetShader(data, SHADER_NV21_JPEG);
}
break;
case SDL_YUV_CONVERSION_BT601:
if (texturedata->yuv) {
GL_SetShader(data, SHADER_YUV_BT601);
} else if (texture->format == SDL_PIXELFORMAT_NV12) {
GL_SetShader(data, SHADER_NV12_BT601);
} else {
GL_SetShader(data, SHADER_NV21_BT601);
}
break;
case SDL_YUV_CONVERSION_BT709:
if (texturedata->yuv) {
GL_SetShader(data, SHADER_YUV_BT709);
} else if (texture->format == SDL_PIXELFORMAT_NV12) {
GL_SetShader(data, SHADER_NV12_BT709);
} else {
GL_SetShader(data, SHADER_NV21_BT709);
}
break;
default:
return SDL_SetError("Unsupported YUV conversion mode");
}
} else {
GL_SetShader(data, SHADER_RGB);

View file

@ -62,6 +62,151 @@ struct GL_ShaderContext
GL_ShaderData shaders[NUM_SHADERS];
};
#define COLOR_VERTEX_SHADER \
"varying vec4 v_color;\n" \
"\n" \
"void main()\n" \
"{\n" \
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n" \
" v_color = gl_Color;\n" \
"}" \
#define TEXTURE_VERTEX_SHADER \
"varying vec4 v_color;\n" \
"varying vec2 v_texCoord;\n" \
"\n" \
"void main()\n" \
"{\n" \
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n" \
" v_color = gl_Color;\n" \
" v_texCoord = vec2(gl_MultiTexCoord0);\n" \
"}" \
#define JPEG_SHADER_CONSTANTS \
"// YUV offset \n" \
"const vec3 offset = vec3(0, -0.501960814, -0.501960814);\n" \
"\n" \
"// RGB coefficients \n" \
"const vec3 Rcoeff = vec3(1, 0.000, 1.402);\n" \
"const vec3 Gcoeff = vec3(1, -0.3441, -0.7141);\n" \
"const vec3 Bcoeff = vec3(1, 1.772, 0.000);\n" \
#define BT601_SHADER_CONSTANTS \
"// YUV offset \n" \
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n" \
"\n" \
"// RGB coefficients \n" \
"const vec3 Rcoeff = vec3(1.1644, 0.000, 1.596);\n" \
"const vec3 Gcoeff = vec3(1.1644, -0.3918, -0.813);\n" \
"const vec3 Bcoeff = vec3(1.1644, 2.0172, 0.000);\n" \
#define BT709_SHADER_CONSTANTS \
"// YUV offset \n" \
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n" \
"\n" \
"// RGB coefficients \n" \
"const vec3 Rcoeff = vec3(1.1644, 0.000, 1.7927);\n" \
"const vec3 Gcoeff = vec3(1.1644, -0.2132, -0.5329);\n" \
"const vec3 Bcoeff = vec3(1.1644, 2.1124, 0.000);\n" \
#define YUV_SHADER_PROLOGUE \
"varying vec4 v_color;\n" \
"varying vec2 v_texCoord;\n" \
"uniform sampler2D tex0; // Y \n" \
"uniform sampler2D tex1; // U \n" \
"uniform sampler2D tex2; // V \n" \
"\n" \
#define YUV_SHADER_BODY \
"\n" \
"void main()\n" \
"{\n" \
" vec2 tcoord;\n" \
" vec3 yuv, rgb;\n" \
"\n" \
" // Get the Y value \n" \
" tcoord = v_texCoord;\n" \
" yuv.x = texture2D(tex0, tcoord).r;\n" \
"\n" \
" // Get the U and V values \n" \
" tcoord *= UVCoordScale;\n" \
" yuv.y = texture2D(tex1, tcoord).r;\n" \
" yuv.z = texture2D(tex2, tcoord).r;\n" \
"\n" \
" // Do the color transform \n" \
" yuv += offset;\n" \
" rgb.r = dot(yuv, Rcoeff);\n" \
" rgb.g = dot(yuv, Gcoeff);\n" \
" rgb.b = dot(yuv, Bcoeff);\n" \
"\n" \
" // That was easy. :) \n" \
" gl_FragColor = vec4(rgb, 1.0) * v_color;\n" \
"}" \
#define NV12_SHADER_PROLOGUE \
"varying vec4 v_color;\n" \
"varying vec2 v_texCoord;\n" \
"uniform sampler2D tex0; // Y \n" \
"uniform sampler2D tex1; // U/V \n" \
"\n" \
#define NV12_SHADER_BODY \
"\n" \
"void main()\n" \
"{\n" \
" vec2 tcoord;\n" \
" vec3 yuv, rgb;\n" \
"\n" \
" // Get the Y value \n" \
" tcoord = v_texCoord;\n" \
" yuv.x = texture2D(tex0, tcoord).r;\n" \
"\n" \
" // Get the U and V values \n" \
" tcoord *= UVCoordScale;\n" \
" yuv.yz = texture2D(tex1, tcoord).ra;\n" \
"\n" \
" // Do the color transform \n" \
" yuv += offset;\n" \
" rgb.r = dot(yuv, Rcoeff);\n" \
" rgb.g = dot(yuv, Gcoeff);\n" \
" rgb.b = dot(yuv, Bcoeff);\n" \
"\n" \
" // That was easy. :) \n" \
" gl_FragColor = vec4(rgb, 1.0) * v_color;\n" \
"}" \
#define NV21_SHADER_PROLOGUE \
"varying vec4 v_color;\n" \
"varying vec2 v_texCoord;\n" \
"uniform sampler2D tex0; // Y \n" \
"uniform sampler2D tex1; // U/V \n" \
"\n" \
#define NV21_SHADER_BODY \
"\n" \
"void main()\n" \
"{\n" \
" vec2 tcoord;\n" \
" vec3 yuv, rgb;\n" \
"\n" \
" // Get the Y value \n" \
" tcoord = v_texCoord;\n" \
" yuv.x = texture2D(tex0, tcoord).r;\n" \
"\n" \
" // Get the U and V values \n" \
" tcoord *= UVCoordScale;\n" \
" yuv.yz = texture2D(tex1, tcoord).ar;\n" \
"\n" \
" // Do the color transform \n" \
" yuv += offset;\n" \
" rgb.r = dot(yuv, Rcoeff);\n" \
" rgb.g = dot(yuv, Gcoeff);\n" \
" rgb.b = dot(yuv, Bcoeff);\n" \
"\n" \
" // That was easy. :) \n" \
" gl_FragColor = vec4(rgb, 1.0) * v_color;\n" \
"}" \
/*
* NOTE: Always use sampler2D, etc here. We'll #define them to the
* texture_rectangle versions if we choose to use that extension.
@ -74,13 +219,7 @@ static const char *shader_source[NUM_SHADERS][2] =
/* SHADER_SOLID */
{
/* vertex shader */
"varying vec4 v_color;\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n"
" v_color = gl_Color;\n"
"}",
COLOR_VERTEX_SHADER,
/* fragment shader */
"varying vec4 v_color;\n"
"\n"
@ -93,15 +232,7 @@ static const char *shader_source[NUM_SHADERS][2] =
/* SHADER_RGB */
{
/* vertex shader */
"varying vec4 v_color;\n"
"varying vec2 v_texCoord;\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n"
" v_color = gl_Color;\n"
" v_texCoord = vec2(gl_MultiTexCoord0);\n"
"}",
TEXTURE_VERTEX_SHADER,
/* fragment shader */
"varying vec4 v_color;\n"
"varying vec2 v_texCoord;\n"
@ -113,156 +244,86 @@ static const char *shader_source[NUM_SHADERS][2] =
"}"
},
/* SHADER_YUV */
/* SHADER_YUV_JPEG */
{
/* vertex shader */
"varying vec4 v_color;\n"
"varying vec2 v_texCoord;\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n"
" v_color = gl_Color;\n"
" v_texCoord = vec2(gl_MultiTexCoord0);\n"
"}",
TEXTURE_VERTEX_SHADER,
/* fragment shader */
"varying vec4 v_color;\n"
"varying vec2 v_texCoord;\n"
"uniform sampler2D tex0; // Y \n"
"uniform sampler2D tex1; // U \n"
"uniform sampler2D tex2; // V \n"
"\n"
"// YUV offset \n"
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n"
"\n"
"// RGB coefficients \n"
"const vec3 Rcoeff = vec3(1.164, 0.000, 1.596);\n"
"const vec3 Gcoeff = vec3(1.164, -0.391, -0.813);\n"
"const vec3 Bcoeff = vec3(1.164, 2.018, 0.000);\n"
"\n"
"void main()\n"
"{\n"
" vec2 tcoord;\n"
" vec3 yuv, rgb;\n"
"\n"
" // Get the Y value \n"
" tcoord = v_texCoord;\n"
" yuv.x = texture2D(tex0, tcoord).r;\n"
"\n"
" // Get the U and V values \n"
" tcoord *= UVCoordScale;\n"
" yuv.y = texture2D(tex1, tcoord).r;\n"
" yuv.z = texture2D(tex2, tcoord).r;\n"
"\n"
" // Do the color transform \n"
" yuv += offset;\n"
" rgb.r = dot(yuv, Rcoeff);\n"
" rgb.g = dot(yuv, Gcoeff);\n"
" rgb.b = dot(yuv, Bcoeff);\n"
"\n"
" // That was easy. :) \n"
" gl_FragColor = vec4(rgb, 1.0) * v_color;\n"
"}"
YUV_SHADER_PROLOGUE
JPEG_SHADER_CONSTANTS
YUV_SHADER_BODY
},
/* SHADER_NV12 */
/* SHADER_YUV_BT601 */
{
/* vertex shader */
"varying vec4 v_color;\n"
"varying vec2 v_texCoord;\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n"
" v_color = gl_Color;\n"
" v_texCoord = vec2(gl_MultiTexCoord0);\n"
"}",
TEXTURE_VERTEX_SHADER,
/* fragment shader */
"varying vec4 v_color;\n"
"varying vec2 v_texCoord;\n"
"uniform sampler2D tex0; // Y \n"
"uniform sampler2D tex1; // U/V \n"
"\n"
"// YUV offset \n"
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n"
"\n"
"// RGB coefficients \n"
"const vec3 Rcoeff = vec3(1.164, 0.000, 1.596);\n"
"const vec3 Gcoeff = vec3(1.164, -0.391, -0.813);\n"
"const vec3 Bcoeff = vec3(1.164, 2.018, 0.000);\n"
"\n"
"void main()\n"
"{\n"
" vec2 tcoord;\n"
" vec3 yuv, rgb;\n"
"\n"
" // Get the Y value \n"
" tcoord = v_texCoord;\n"
" yuv.x = texture2D(tex0, tcoord).r;\n"
"\n"
" // Get the U and V values \n"
" tcoord *= UVCoordScale;\n"
" yuv.yz = texture2D(tex1, tcoord).ra;\n"
"\n"
" // Do the color transform \n"
" yuv += offset;\n"
" rgb.r = dot(yuv, Rcoeff);\n"
" rgb.g = dot(yuv, Gcoeff);\n"
" rgb.b = dot(yuv, Bcoeff);\n"
"\n"
" // That was easy. :) \n"
" gl_FragColor = vec4(rgb, 1.0) * v_color;\n"
"}"
YUV_SHADER_PROLOGUE
BT601_SHADER_CONSTANTS
YUV_SHADER_BODY
},
/* SHADER_NV21 */
/* SHADER_YUV_BT709 */
{
/* vertex shader */
"varying vec4 v_color;\n"
"varying vec2 v_texCoord;\n"
"\n"
"void main()\n"
"{\n"
" gl_Position = gl_ModelViewProjectionMatrix * gl_Vertex;\n"
" v_color = gl_Color;\n"
" v_texCoord = vec2(gl_MultiTexCoord0);\n"
"}",
TEXTURE_VERTEX_SHADER,
/* fragment shader */
"varying vec4 v_color;\n"
"varying vec2 v_texCoord;\n"
"uniform sampler2D tex0; // Y \n"
"uniform sampler2D tex1; // U/V \n"
"\n"
"// YUV offset \n"
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n"
"\n"
"// RGB coefficients \n"
"const vec3 Rcoeff = vec3(1.164, 0.000, 1.596);\n"
"const vec3 Gcoeff = vec3(1.164, -0.391, -0.813);\n"
"const vec3 Bcoeff = vec3(1.164, 2.018, 0.000);\n"
"\n"
"void main()\n"
"{\n"
" vec2 tcoord;\n"
" vec3 yuv, rgb;\n"
"\n"
" // Get the Y value \n"
" tcoord = v_texCoord;\n"
" yuv.x = texture2D(tex0, tcoord).r;\n"
"\n"
" // Get the U and V values \n"
" tcoord *= UVCoordScale;\n"
" yuv.yz = texture2D(tex1, tcoord).ar;\n"
"\n"
" // Do the color transform \n"
" yuv += offset;\n"
" rgb.r = dot(yuv, Rcoeff);\n"
" rgb.g = dot(yuv, Gcoeff);\n"
" rgb.b = dot(yuv, Bcoeff);\n"
"\n"
" // That was easy. :) \n"
" gl_FragColor = vec4(rgb, 1.0) * v_color;\n"
"}"
YUV_SHADER_PROLOGUE
BT709_SHADER_CONSTANTS
YUV_SHADER_BODY
},
/* SHADER_NV12_JPEG */
{
/* vertex shader */
TEXTURE_VERTEX_SHADER,
/* fragment shader */
NV12_SHADER_PROLOGUE
JPEG_SHADER_CONSTANTS
NV12_SHADER_BODY
},
/* SHADER_NV12_BT601 */
{
/* vertex shader */
TEXTURE_VERTEX_SHADER,
/* fragment shader */
NV12_SHADER_PROLOGUE
BT601_SHADER_CONSTANTS
NV12_SHADER_BODY
},
/* SHADER_NV12_BT709 */
{
/* vertex shader */
TEXTURE_VERTEX_SHADER,
/* fragment shader */
NV12_SHADER_PROLOGUE
BT709_SHADER_CONSTANTS
NV12_SHADER_BODY
},
/* SHADER_NV21_JPEG */
{
/* vertex shader */
TEXTURE_VERTEX_SHADER,
/* fragment shader */
NV21_SHADER_PROLOGUE
JPEG_SHADER_CONSTANTS
NV21_SHADER_BODY
},
/* SHADER_NV21_BT601 */
{
/* vertex shader */
TEXTURE_VERTEX_SHADER,
/* fragment shader */
NV21_SHADER_PROLOGUE
BT601_SHADER_CONSTANTS
NV21_SHADER_BODY
},
/* SHADER_NV21_BT709 */
{
/* vertex shader */
TEXTURE_VERTEX_SHADER,
/* fragment shader */
NV21_SHADER_PROLOGUE
BT709_SHADER_CONSTANTS
NV21_SHADER_BODY
},
};

View file

@ -26,9 +26,15 @@ typedef enum {
SHADER_NONE,
SHADER_SOLID,
SHADER_RGB,
SHADER_YUV,
SHADER_NV12,
SHADER_NV21,
SHADER_YUV_JPEG,
SHADER_YUV_BT601,
SHADER_YUV_BT709,
SHADER_NV12_JPEG,
SHADER_NV12_BT601,
SHADER_NV12_BT709,
SHADER_NV21_JPEG,
SHADER_NV21_BT601,
SHADER_NV21_BT709,
NUM_SHADERS
} GL_Shader;

View file

@ -950,7 +950,7 @@ static void GLES2_EvictShader(SDL_Renderer *renderer, GLES2_ShaderCacheEntry *en
static GLES2_ProgramCacheEntry *GLES2_CacheProgram(SDL_Renderer *renderer,
GLES2_ShaderCacheEntry *vertex,
GLES2_ShaderCacheEntry *fragment);
static int GLES2_SelectProgram(SDL_Renderer *renderer, GLES2_ImageSource source);
static int GLES2_SelectProgram(SDL_Renderer *renderer, GLES2_ImageSource source, int w, int h);
static GLES2_ProgramCacheEntry *
GLES2_CacheProgram(SDL_Renderer *renderer, GLES2_ShaderCacheEntry *vertex,
@ -1189,7 +1189,7 @@ GLES2_EvictShader(SDL_Renderer *renderer, GLES2_ShaderCacheEntry *entry)
}
static int
GLES2_SelectProgram(SDL_Renderer *renderer, GLES2_ImageSource source)
GLES2_SelectProgram(SDL_Renderer *renderer, GLES2_ImageSource source, int w, int h)
{
GLES2_DriverContext *data = (GLES2_DriverContext *)renderer->driverdata;
GLES2_ShaderCacheEntry *vertex = NULL;
@ -1216,13 +1216,52 @@ GLES2_SelectProgram(SDL_Renderer *renderer, GLES2_ImageSource source)
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_BGR_SRC;
break;
case GLES2_IMAGESOURCE_TEXTURE_YUV:
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_YUV_SRC;
switch (SDL_GetYUVConversionModeForResolution(w, h)) {
case SDL_YUV_CONVERSION_JPEG:
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_YUV_JPEG_SRC;
break;
case SDL_YUV_CONVERSION_BT601:
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_YUV_BT601_SRC;
break;
case SDL_YUV_CONVERSION_BT709:
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_YUV_BT709_SRC;
break;
default:
SDL_SetError("Unsupported YUV conversion mode: %d\n", SDL_GetYUVConversionModeForResolution(w, h));
goto fault;
}
break;
case GLES2_IMAGESOURCE_TEXTURE_NV12:
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV12_SRC;
switch (SDL_GetYUVConversionModeForResolution(w, h)) {
case SDL_YUV_CONVERSION_JPEG:
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV12_JPEG_SRC;
break;
case SDL_YUV_CONVERSION_BT601:
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV12_BT601_SRC;
break;
case SDL_YUV_CONVERSION_BT709:
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV12_BT709_SRC;
break;
default:
SDL_SetError("Unsupported YUV conversion mode: %d\n", SDL_GetYUVConversionModeForResolution(w, h));
goto fault;
}
break;
case GLES2_IMAGESOURCE_TEXTURE_NV21:
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV21_SRC;
switch (SDL_GetYUVConversionModeForResolution(w, h)) {
case SDL_YUV_CONVERSION_JPEG:
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV21_JPEG_SRC;
break;
case SDL_YUV_CONVERSION_BT601:
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV21_BT601_SRC;
break;
case SDL_YUV_CONVERSION_BT709:
ftype = GLES2_SHADER_FRAGMENT_TEXTURE_NV21_BT709_SRC;
break;
default:
SDL_SetError("Unsupported YUV conversion mode: %d\n", SDL_GetYUVConversionModeForResolution(w, h));
goto fault;
}
break;
default:
goto fault;
@ -1445,7 +1484,7 @@ GLES2_SetDrawingState(SDL_Renderer * renderer)
GLES2_SetTexCoords(data, SDL_FALSE);
/* Activate an appropriate shader and set the projection matrix */
if (GLES2_SelectProgram(renderer, GLES2_IMAGESOURCE_SOLID) < 0) {
if (GLES2_SelectProgram(renderer, GLES2_IMAGESOURCE_SOLID, 0, 0) < 0) {
return -1;
}
@ -1707,7 +1746,7 @@ GLES2_SetupCopy(SDL_Renderer *renderer, SDL_Texture *texture)
}
}
if (GLES2_SelectProgram(renderer, sourceType) < 0) {
if (GLES2_SelectProgram(renderer, sourceType, texture->w, texture->h) < 0) {
return -1;
}

View file

@ -126,73 +126,154 @@ static const Uint8 GLES2_FragmentSrc_TextureBGRSrc_[] = " \
} \
";
#define JPEG_SHADER_CONSTANTS \
"// YUV offset \n" \
"const vec3 offset = vec3(0, -0.501960814, -0.501960814);\n" \
"\n" \
"// RGB coefficients \n" \
"const mat3 matrix = mat3( 1, 1, 1,\n" \
" 0, -0.3441, 1.772,\n" \
" 1.402, -0.7141, 0);\n" \
#define BT601_SHADER_CONSTANTS \
"// YUV offset \n" \
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n" \
"\n" \
"// RGB coefficients \n" \
"const mat3 matrix = mat3( 1.1644, 1.1644, 1.1644,\n" \
" 0, -0.3918, 2.0172,\n" \
" 1.596, -0.813, 0);\n" \
#define BT709_SHADER_CONSTANTS \
"// YUV offset \n" \
"const vec3 offset = vec3(-0.0627451017, -0.501960814, -0.501960814);\n" \
"\n" \
"// RGB coefficients \n" \
"const mat3 matrix = mat3( 1.1644, 1.1644, 1.1644,\n" \
" 0, -0.2132, 2.1124,\n" \
" 1.7927, -0.5329, 0);\n" \
#define YUV_SHADER_PROLOGUE \
"precision mediump float;\n" \
"uniform sampler2D u_texture;\n" \
"uniform sampler2D u_texture_u;\n" \
"uniform sampler2D u_texture_v;\n" \
"uniform vec4 u_modulation;\n" \
"varying vec2 v_texCoord;\n" \
"\n" \
#define YUV_SHADER_BODY \
"\n" \
"void main()\n" \
"{\n" \
" mediump vec3 yuv;\n" \
" lowp vec3 rgb;\n" \
"\n" \
" // Get the YUV values \n" \
" yuv.x = texture2D(u_texture, v_texCoord).r;\n" \
" yuv.y = texture2D(u_texture_u, v_texCoord).r;\n" \
" yuv.z = texture2D(u_texture_v, v_texCoord).r;\n" \
"\n" \
" // Do the color transform \n" \
" yuv += offset;\n" \
" rgb = matrix * yuv;\n" \
"\n" \
" // That was easy. :) \n" \
" gl_FragColor = vec4(rgb, 1);\n" \
" gl_FragColor *= u_modulation;\n" \
"}" \
#define NV12_SHADER_BODY \
"\n" \
"void main()\n" \
"{\n" \
" mediump vec3 yuv;\n" \
" lowp vec3 rgb;\n" \
"\n" \
" // Get the YUV values \n" \
" yuv.x = texture2D(u_texture, v_texCoord).r;\n" \
" yuv.yz = texture2D(u_texture_u, v_texCoord).ra;\n" \
"\n" \
" // Do the color transform \n" \
" yuv += offset;\n" \
" rgb = matrix * yuv;\n" \
"\n" \
" // That was easy. :) \n" \
" gl_FragColor = vec4(rgb, 1);\n" \
" gl_FragColor *= u_modulation;\n" \
"}" \
#define NV21_SHADER_BODY \
"\n" \
"void main()\n" \
"{\n" \
" mediump vec3 yuv;\n" \
" lowp vec3 rgb;\n" \
"\n" \
" // Get the YUV values \n" \
" yuv.x = texture2D(u_texture, v_texCoord).r;\n" \
" yuv.yz = texture2D(u_texture_u, v_texCoord).ar;\n" \
"\n" \
" // Do the color transform \n" \
" yuv += offset;\n" \
" rgb = matrix * yuv;\n" \
"\n" \
" // That was easy. :) \n" \
" gl_FragColor = vec4(rgb, 1);\n" \
" gl_FragColor *= u_modulation;\n" \
"}" \
/* YUV to ABGR conversion */
static const Uint8 GLES2_FragmentSrc_TextureYUVSrc_[] = " \
precision mediump float; \
uniform sampler2D u_texture; \
uniform sampler2D u_texture_u; \
uniform sampler2D u_texture_v; \
uniform vec4 u_modulation; \
varying vec2 v_texCoord; \
\
void main() \
{ \
mediump vec3 yuv; \
lowp vec3 rgb; \
yuv.x = texture2D(u_texture, v_texCoord).r; \
yuv.y = texture2D(u_texture_u, v_texCoord).r - 0.5; \
yuv.z = texture2D(u_texture_v, v_texCoord).r - 0.5; \
rgb = mat3( 1, 1, 1, \
0, -0.39465, 2.03211, \
1.13983, -0.58060, 0) * yuv; \
gl_FragColor = vec4(rgb, 1); \
gl_FragColor *= u_modulation; \
} \
";
static const Uint8 GLES2_FragmentSrc_TextureYUVJPEGSrc_[] = \
YUV_SHADER_PROLOGUE \
JPEG_SHADER_CONSTANTS \
YUV_SHADER_BODY \
;
static const Uint8 GLES2_FragmentSrc_TextureYUVBT601Src_[] = \
YUV_SHADER_PROLOGUE \
BT601_SHADER_CONSTANTS \
YUV_SHADER_BODY \
;
static const Uint8 GLES2_FragmentSrc_TextureYUVBT709Src_[] = \
YUV_SHADER_PROLOGUE \
BT709_SHADER_CONSTANTS \
YUV_SHADER_BODY \
;
/* NV12 to ABGR conversion */
static const Uint8 GLES2_FragmentSrc_TextureNV12Src_[] = " \
precision mediump float; \
uniform sampler2D u_texture; \
uniform sampler2D u_texture_u; \
uniform vec4 u_modulation; \
varying vec2 v_texCoord; \
\
void main() \
{ \
mediump vec3 yuv; \
lowp vec3 rgb; \
yuv.x = texture2D(u_texture, v_texCoord).r; \
yuv.yz = texture2D(u_texture_u, v_texCoord).ra - 0.5; \
rgb = mat3( 1, 1, 1, \
0, -0.39465, 2.03211, \
1.13983, -0.58060, 0) * yuv; \
gl_FragColor = vec4(rgb, 1); \
gl_FragColor *= u_modulation; \
} \
";
static const Uint8 GLES2_FragmentSrc_TextureNV12JPEGSrc_[] = \
YUV_SHADER_PROLOGUE \
JPEG_SHADER_CONSTANTS \
NV12_SHADER_BODY \
;
static const Uint8 GLES2_FragmentSrc_TextureNV12BT601Src_[] = \
YUV_SHADER_PROLOGUE \
BT601_SHADER_CONSTANTS \
NV12_SHADER_BODY \
;
static const Uint8 GLES2_FragmentSrc_TextureNV12BT709Src_[] = \
YUV_SHADER_PROLOGUE \
BT709_SHADER_CONSTANTS \
NV12_SHADER_BODY \
;
/* NV21 to ABGR conversion */
static const Uint8 GLES2_FragmentSrc_TextureNV21Src_[] = " \
precision mediump float; \
uniform sampler2D u_texture; \
uniform sampler2D u_texture_u; \
uniform vec4 u_modulation; \
varying vec2 v_texCoord; \
\
void main() \
{ \
mediump vec3 yuv; \
lowp vec3 rgb; \
yuv.x = texture2D(u_texture, v_texCoord).r; \
yuv.yz = texture2D(u_texture_u, v_texCoord).ar - 0.5; \
rgb = mat3( 1, 1, 1, \
0, -0.39465, 2.03211, \
1.13983, -0.58060, 0) * yuv; \
gl_FragColor = vec4(rgb, 1); \
gl_FragColor *= u_modulation; \
} \
";
static const Uint8 GLES2_FragmentSrc_TextureNV21JPEGSrc_[] = \
YUV_SHADER_PROLOGUE \
JPEG_SHADER_CONSTANTS \
NV21_SHADER_BODY \
;
static const Uint8 GLES2_FragmentSrc_TextureNV21BT601Src_[] = \
YUV_SHADER_PROLOGUE \
BT601_SHADER_CONSTANTS \
NV21_SHADER_BODY \
;
static const Uint8 GLES2_FragmentSrc_TextureNV21BT709Src_[] = \
YUV_SHADER_PROLOGUE \
BT709_SHADER_CONSTANTS \
NV21_SHADER_BODY \
;
static const GLES2_ShaderInstance GLES2_VertexSrc_Default = {
GL_VERTEX_SHADER,
@ -236,25 +317,67 @@ static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureBGRSrc = {
GLES2_FragmentSrc_TextureBGRSrc_
};
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureYUVSrc = {
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureYUVJPEGSrc = {
GL_FRAGMENT_SHADER,
GLES2_SOURCE_SHADER,
sizeof(GLES2_FragmentSrc_TextureYUVSrc_),
GLES2_FragmentSrc_TextureYUVSrc_
sizeof(GLES2_FragmentSrc_TextureYUVJPEGSrc_),
GLES2_FragmentSrc_TextureYUVJPEGSrc_
};
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV12Src = {
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureYUVBT601Src = {
GL_FRAGMENT_SHADER,
GLES2_SOURCE_SHADER,
sizeof(GLES2_FragmentSrc_TextureNV12Src_),
GLES2_FragmentSrc_TextureNV12Src_
sizeof(GLES2_FragmentSrc_TextureYUVBT601Src_),
GLES2_FragmentSrc_TextureYUVBT601Src_
};
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV21Src = {
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureYUVBT709Src = {
GL_FRAGMENT_SHADER,
GLES2_SOURCE_SHADER,
sizeof(GLES2_FragmentSrc_TextureNV21Src_),
GLES2_FragmentSrc_TextureNV21Src_
sizeof(GLES2_FragmentSrc_TextureYUVBT709Src_),
GLES2_FragmentSrc_TextureYUVBT709Src_
};
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV12JPEGSrc = {
GL_FRAGMENT_SHADER,
GLES2_SOURCE_SHADER,
sizeof(GLES2_FragmentSrc_TextureNV12JPEGSrc_),
GLES2_FragmentSrc_TextureNV12JPEGSrc_
};
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV12BT601Src = {
GL_FRAGMENT_SHADER,
GLES2_SOURCE_SHADER,
sizeof(GLES2_FragmentSrc_TextureNV12BT601Src_),
GLES2_FragmentSrc_TextureNV12BT601Src_
};
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV21BT709Src = {
GL_FRAGMENT_SHADER,
GLES2_SOURCE_SHADER,
sizeof(GLES2_FragmentSrc_TextureNV21BT709Src_),
GLES2_FragmentSrc_TextureNV21BT709Src_
};
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV21JPEGSrc = {
GL_FRAGMENT_SHADER,
GLES2_SOURCE_SHADER,
sizeof(GLES2_FragmentSrc_TextureNV21JPEGSrc_),
GLES2_FragmentSrc_TextureNV21JPEGSrc_
};
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV21BT601Src = {
GL_FRAGMENT_SHADER,
GLES2_SOURCE_SHADER,
sizeof(GLES2_FragmentSrc_TextureNV21BT601Src_),
GLES2_FragmentSrc_TextureNV21BT601Src_
};
static const GLES2_ShaderInstance GLES2_FragmentSrc_TextureNV12BT709Src = {
GL_FRAGMENT_SHADER,
GLES2_SOURCE_SHADER,
sizeof(GLES2_FragmentSrc_TextureNV12BT709Src_),
GLES2_FragmentSrc_TextureNV12BT709Src_
};
@ -304,24 +427,66 @@ static GLES2_Shader GLES2_FragmentShader_TextureBGRSrc = {
}
};
static GLES2_Shader GLES2_FragmentShader_TextureYUVSrc = {
static GLES2_Shader GLES2_FragmentShader_TextureYUVJPEGSrc = {
1,
{
&GLES2_FragmentSrc_TextureYUVSrc
&GLES2_FragmentSrc_TextureYUVJPEGSrc
}
};
static GLES2_Shader GLES2_FragmentShader_TextureNV12Src = {
static GLES2_Shader GLES2_FragmentShader_TextureYUVBT601Src = {
1,
{
&GLES2_FragmentSrc_TextureNV12Src
&GLES2_FragmentSrc_TextureYUVBT601Src
}
};
static GLES2_Shader GLES2_FragmentShader_TextureNV21Src = {
static GLES2_Shader GLES2_FragmentShader_TextureYUVBT709Src = {
1,
{
&GLES2_FragmentSrc_TextureNV21Src
&GLES2_FragmentSrc_TextureYUVBT709Src
}
};
static GLES2_Shader GLES2_FragmentShader_TextureNV12JPEGSrc = {
1,
{
&GLES2_FragmentSrc_TextureNV12JPEGSrc
}
};
static GLES2_Shader GLES2_FragmentShader_TextureNV12BT601Src = {
1,
{
&GLES2_FragmentSrc_TextureNV12BT601Src
}
};
static GLES2_Shader GLES2_FragmentShader_TextureNV12BT709Src = {
1,
{
&GLES2_FragmentSrc_TextureNV12BT709Src
}
};
static GLES2_Shader GLES2_FragmentShader_TextureNV21JPEGSrc = {
1,
{
&GLES2_FragmentSrc_TextureNV21JPEGSrc
}
};
static GLES2_Shader GLES2_FragmentShader_TextureNV21BT601Src = {
1,
{
&GLES2_FragmentSrc_TextureNV21BT601Src
}
};
static GLES2_Shader GLES2_FragmentShader_TextureNV21BT709Src = {
1,
{
&GLES2_FragmentSrc_TextureNV21BT709Src
}
};
@ -345,12 +510,24 @@ const GLES2_Shader *GLES2_GetShader(GLES2_ShaderType type)
return &GLES2_FragmentShader_TextureRGBSrc;
case GLES2_SHADER_FRAGMENT_TEXTURE_BGR_SRC:
return &GLES2_FragmentShader_TextureBGRSrc;
case GLES2_SHADER_FRAGMENT_TEXTURE_YUV_SRC:
return &GLES2_FragmentShader_TextureYUVSrc;
case GLES2_SHADER_FRAGMENT_TEXTURE_NV12_SRC:
return &GLES2_FragmentShader_TextureNV12Src;
case GLES2_SHADER_FRAGMENT_TEXTURE_NV21_SRC:
return &GLES2_FragmentShader_TextureNV21Src;
case GLES2_SHADER_FRAGMENT_TEXTURE_YUV_JPEG_SRC:
return &GLES2_FragmentShader_TextureYUVJPEGSrc;
case GLES2_SHADER_FRAGMENT_TEXTURE_YUV_BT601_SRC:
return &GLES2_FragmentShader_TextureYUVBT601Src;
case GLES2_SHADER_FRAGMENT_TEXTURE_YUV_BT709_SRC:
return &GLES2_FragmentShader_TextureYUVBT709Src;
case GLES2_SHADER_FRAGMENT_TEXTURE_NV12_JPEG_SRC:
return &GLES2_FragmentShader_TextureNV12JPEGSrc;
case GLES2_SHADER_FRAGMENT_TEXTURE_NV12_BT601_SRC:
return &GLES2_FragmentShader_TextureNV12BT601Src;
case GLES2_SHADER_FRAGMENT_TEXTURE_NV12_BT709_SRC:
return &GLES2_FragmentShader_TextureNV12BT709Src;
case GLES2_SHADER_FRAGMENT_TEXTURE_NV21_JPEG_SRC:
return &GLES2_FragmentShader_TextureNV21JPEGSrc;
case GLES2_SHADER_FRAGMENT_TEXTURE_NV21_BT601_SRC:
return &GLES2_FragmentShader_TextureNV21BT601Src;
case GLES2_SHADER_FRAGMENT_TEXTURE_NV21_BT709_SRC:
return &GLES2_FragmentShader_TextureNV21BT709Src;
default:
return NULL;
}

View file

@ -20,11 +20,11 @@
*/
#include "../../SDL_internal.h"
#if SDL_VIDEO_RENDER_OGL_ES2
#ifndef SDL_shaders_gles2_h_
#define SDL_shaders_gles2_h_
#if SDL_VIDEO_RENDER_OGL_ES2
typedef struct GLES2_ShaderInstance
{
GLenum type;
@ -47,17 +47,23 @@ typedef enum
GLES2_SHADER_FRAGMENT_TEXTURE_ARGB_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_BGR_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_RGB_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_YUV_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_NV12_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_NV21_SRC
GLES2_SHADER_FRAGMENT_TEXTURE_YUV_JPEG_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_YUV_BT601_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_YUV_BT709_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_NV12_JPEG_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_NV12_BT601_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_NV12_BT709_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_NV21_JPEG_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_NV21_BT601_SRC,
GLES2_SHADER_FRAGMENT_TEXTURE_NV21_BT709_SRC,
} GLES2_ShaderType;
#define GLES2_SOURCE_SHADER (GLenum)-1
const GLES2_Shader *GLES2_GetShader(GLES2_ShaderType type);
#endif /* SDL_shaders_gles2_h_ */
#endif /* SDL_VIDEO_RENDER_OGL_ES2 */
#endif /* SDL_shaders_gles2_h_ */
/* vi: set ts=4 sw=4 expandtab: */