Re-added the 3DNow! and AltiVec instruction support.
This commit is contained in:
parent
92bf0fc2fd
commit
e4a0db291e
15 changed files with 2412 additions and 58 deletions
|
@ -52,7 +52,7 @@
|
||||||
Name="VCCLCompilerTool"
|
Name="VCCLCompilerTool"
|
||||||
Optimization="0"
|
Optimization="0"
|
||||||
AdditionalIncludeDirectories="..\..\include"
|
AdditionalIncludeDirectories="..\..\include"
|
||||||
PreprocessorDefinitions="_DEBUG;_WINDOWS;_WIN32_WINNT=0x0400;__MMX__;__3dNOW__;__SSE__;__SSE2__"
|
PreprocessorDefinitions="_DEBUG;_WINDOWS"
|
||||||
RuntimeLibrary="2"
|
RuntimeLibrary="2"
|
||||||
BufferSecurityCheck="false"
|
BufferSecurityCheck="false"
|
||||||
UsePrecompiledHeader="0"
|
UsePrecompiledHeader="0"
|
||||||
|
@ -231,7 +231,7 @@
|
||||||
InlineFunctionExpansion="1"
|
InlineFunctionExpansion="1"
|
||||||
EnableIntrinsicFunctions="false"
|
EnableIntrinsicFunctions="false"
|
||||||
AdditionalIncludeDirectories="..\..\include"
|
AdditionalIncludeDirectories="..\..\include"
|
||||||
PreprocessorDefinitions="NDEBUG;_WINDOWS;_WIN32_WINNT=0x0400;__MMX__;__3dNOW__;__SSE__;__SSE2__"
|
PreprocessorDefinitions="NDEBUG;_WINDOWS"
|
||||||
StringPooling="true"
|
StringPooling="true"
|
||||||
RuntimeLibrary="2"
|
RuntimeLibrary="2"
|
||||||
BufferSecurityCheck="false"
|
BufferSecurityCheck="false"
|
||||||
|
|
|
@ -52,7 +52,7 @@
|
||||||
Name="VCCLCompilerTool"
|
Name="VCCLCompilerTool"
|
||||||
Optimization="0"
|
Optimization="0"
|
||||||
AdditionalIncludeDirectories="..\..\include"
|
AdditionalIncludeDirectories="..\..\include"
|
||||||
PreprocessorDefinitions="_DEBUG;_WINDOWS;_WIN32_WINNT=0x0400;__MMX__;__3dNOW__;__SSE__;__SSE2__"
|
PreprocessorDefinitions="_DEBUG;_WINDOWS"
|
||||||
RuntimeLibrary="3"
|
RuntimeLibrary="3"
|
||||||
BufferSecurityCheck="false"
|
BufferSecurityCheck="false"
|
||||||
WarningLevel="3"
|
WarningLevel="3"
|
||||||
|
@ -223,7 +223,7 @@
|
||||||
InlineFunctionExpansion="1"
|
InlineFunctionExpansion="1"
|
||||||
EnableIntrinsicFunctions="false"
|
EnableIntrinsicFunctions="false"
|
||||||
AdditionalIncludeDirectories="..\..\include"
|
AdditionalIncludeDirectories="..\..\include"
|
||||||
PreprocessorDefinitions="NDEBUG;_WINDOWS;_WIN32_WINNT=0x0400;__MMX__;__3dNOW__;__SSE__;__SSE2__"
|
PreprocessorDefinitions="NDEBUG;_WINDOWS"
|
||||||
StringPooling="true"
|
StringPooling="true"
|
||||||
RuntimeLibrary="2"
|
RuntimeLibrary="2"
|
||||||
BufferSecurityCheck="false"
|
BufferSecurityCheck="false"
|
||||||
|
|
|
@ -83,7 +83,7 @@
|
||||||
<ClCompile>
|
<ClCompile>
|
||||||
<Optimization>Disabled</Optimization>
|
<Optimization>Disabled</Optimization>
|
||||||
<AdditionalIncludeDirectories>..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>_DEBUG;_WINDOWS;_WIN32_WINNT=0x0400;__MMX__;__3dNOW__;__SSE__;__SSE2__;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDebugDLL</RuntimeLibrary>
|
||||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||||
<PrecompiledHeader>
|
<PrecompiledHeader>
|
||||||
|
@ -152,7 +152,7 @@
|
||||||
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
<InlineFunctionExpansion>OnlyExplicitInline</InlineFunctionExpansion>
|
||||||
<IntrinsicFunctions>false</IntrinsicFunctions>
|
<IntrinsicFunctions>false</IntrinsicFunctions>
|
||||||
<AdditionalIncludeDirectories>..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
<AdditionalIncludeDirectories>..\..\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||||
<PreprocessorDefinitions>NDEBUG;_WINDOWS;_WIN32_WINNT=0x0400;__MMX__;__3dNOW__;__SSE__;__SSE2__;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
<PreprocessorDefinitions>NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||||
<StringPooling>true</StringPooling>
|
<StringPooling>true</StringPooling>
|
||||||
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
<RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
|
||||||
<BufferSecurityCheck>false</BufferSecurityCheck>
|
<BufferSecurityCheck>false</BufferSecurityCheck>
|
||||||
|
@ -446,4 +446,4 @@
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||||
<ImportGroup Label="ExtensionTargets">
|
<ImportGroup Label="ExtensionTargets">
|
||||||
</ImportGroup>
|
</ImportGroup>
|
||||||
</Project>
|
</Project>
|
||||||
|
|
369
configure
vendored
369
configure
vendored
|
@ -1514,8 +1514,10 @@ Optional Features:
|
||||||
--enable-ssemath Allow GCC to use SSE floating point math
|
--enable-ssemath Allow GCC to use SSE floating point math
|
||||||
[default=no]
|
[default=no]
|
||||||
--enable-mmx use MMX assembly routines [default=yes]
|
--enable-mmx use MMX assembly routines [default=yes]
|
||||||
|
--enable-3dnow use MMX assembly routines [default=yes]
|
||||||
--enable-sse use SSE assembly routines [default=yes]
|
--enable-sse use SSE assembly routines [default=yes]
|
||||||
--enable-sse2 use SSE2 assembly routines [default=no]
|
--enable-sse2 use SSE2 assembly routines [default=no]
|
||||||
|
--enable-altivec use Altivec assembly routines [default=yes]
|
||||||
--enable-oss support the OSS audio API [default=yes]
|
--enable-oss support the OSS audio API [default=yes]
|
||||||
--enable-alsa support the ALSA audio API [default=yes]
|
--enable-alsa support the ALSA audio API [default=yes]
|
||||||
--disable-alsatest Do not try to compile and run a test Alsa program
|
--disable-alsatest Do not try to compile and run a test Alsa program
|
||||||
|
@ -3768,13 +3770,13 @@ if test "${lt_cv_nm_interface+set}" = set; then
|
||||||
else
|
else
|
||||||
lt_cv_nm_interface="BSD nm"
|
lt_cv_nm_interface="BSD nm"
|
||||||
echo "int some_variable = 0;" > conftest.$ac_ext
|
echo "int some_variable = 0;" > conftest.$ac_ext
|
||||||
(eval echo "\"\$as_me:3771: $ac_compile\"" >&5)
|
(eval echo "\"\$as_me:3773: $ac_compile\"" >&5)
|
||||||
(eval "$ac_compile" 2>conftest.err)
|
(eval "$ac_compile" 2>conftest.err)
|
||||||
cat conftest.err >&5
|
cat conftest.err >&5
|
||||||
(eval echo "\"\$as_me:3774: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
|
(eval echo "\"\$as_me:3776: $NM \\\"conftest.$ac_objext\\\"\"" >&5)
|
||||||
(eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
|
(eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out)
|
||||||
cat conftest.err >&5
|
cat conftest.err >&5
|
||||||
(eval echo "\"\$as_me:3777: output\"" >&5)
|
(eval echo "\"\$as_me:3779: output\"" >&5)
|
||||||
cat conftest.out >&5
|
cat conftest.out >&5
|
||||||
if $GREP 'External.*some_variable' conftest.out > /dev/null; then
|
if $GREP 'External.*some_variable' conftest.out > /dev/null; then
|
||||||
lt_cv_nm_interface="MS dumpbin"
|
lt_cv_nm_interface="MS dumpbin"
|
||||||
|
@ -5001,7 +5003,7 @@ ia64-*-hpux*)
|
||||||
;;
|
;;
|
||||||
*-*-irix6*)
|
*-*-irix6*)
|
||||||
# Find out which ABI we are using.
|
# Find out which ABI we are using.
|
||||||
echo '#line 5004 "configure"' > conftest.$ac_ext
|
echo '#line 5006 "configure"' > conftest.$ac_ext
|
||||||
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
|
if { (eval echo "$as_me:$LINENO: \"$ac_compile\"") >&5
|
||||||
(eval $ac_compile) 2>&5
|
(eval $ac_compile) 2>&5
|
||||||
ac_status=$?
|
ac_status=$?
|
||||||
|
@ -7162,11 +7164,11 @@ else
|
||||||
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
||||||
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
||||||
-e 's:$: $lt_compiler_flag:'`
|
-e 's:$: $lt_compiler_flag:'`
|
||||||
(eval echo "\"\$as_me:7165: $lt_compile\"" >&5)
|
(eval echo "\"\$as_me:7167: $lt_compile\"" >&5)
|
||||||
(eval "$lt_compile" 2>conftest.err)
|
(eval "$lt_compile" 2>conftest.err)
|
||||||
ac_status=$?
|
ac_status=$?
|
||||||
cat conftest.err >&5
|
cat conftest.err >&5
|
||||||
echo "$as_me:7169: \$? = $ac_status" >&5
|
echo "$as_me:7171: \$? = $ac_status" >&5
|
||||||
if (exit $ac_status) && test -s "$ac_outfile"; then
|
if (exit $ac_status) && test -s "$ac_outfile"; then
|
||||||
# The compiler can only warn and ignore the option if not recognized
|
# The compiler can only warn and ignore the option if not recognized
|
||||||
# So say no if there are warnings other than the usual output.
|
# So say no if there are warnings other than the usual output.
|
||||||
|
@ -7501,11 +7503,11 @@ else
|
||||||
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
||||||
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
||||||
-e 's:$: $lt_compiler_flag:'`
|
-e 's:$: $lt_compiler_flag:'`
|
||||||
(eval echo "\"\$as_me:7504: $lt_compile\"" >&5)
|
(eval echo "\"\$as_me:7506: $lt_compile\"" >&5)
|
||||||
(eval "$lt_compile" 2>conftest.err)
|
(eval "$lt_compile" 2>conftest.err)
|
||||||
ac_status=$?
|
ac_status=$?
|
||||||
cat conftest.err >&5
|
cat conftest.err >&5
|
||||||
echo "$as_me:7508: \$? = $ac_status" >&5
|
echo "$as_me:7510: \$? = $ac_status" >&5
|
||||||
if (exit $ac_status) && test -s "$ac_outfile"; then
|
if (exit $ac_status) && test -s "$ac_outfile"; then
|
||||||
# The compiler can only warn and ignore the option if not recognized
|
# The compiler can only warn and ignore the option if not recognized
|
||||||
# So say no if there are warnings other than the usual output.
|
# So say no if there are warnings other than the usual output.
|
||||||
|
@ -7606,11 +7608,11 @@ else
|
||||||
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
||||||
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
||||||
-e 's:$: $lt_compiler_flag:'`
|
-e 's:$: $lt_compiler_flag:'`
|
||||||
(eval echo "\"\$as_me:7609: $lt_compile\"" >&5)
|
(eval echo "\"\$as_me:7611: $lt_compile\"" >&5)
|
||||||
(eval "$lt_compile" 2>out/conftest.err)
|
(eval "$lt_compile" 2>out/conftest.err)
|
||||||
ac_status=$?
|
ac_status=$?
|
||||||
cat out/conftest.err >&5
|
cat out/conftest.err >&5
|
||||||
echo "$as_me:7613: \$? = $ac_status" >&5
|
echo "$as_me:7615: \$? = $ac_status" >&5
|
||||||
if (exit $ac_status) && test -s out/conftest2.$ac_objext
|
if (exit $ac_status) && test -s out/conftest2.$ac_objext
|
||||||
then
|
then
|
||||||
# The compiler can only warn and ignore the option if not recognized
|
# The compiler can only warn and ignore the option if not recognized
|
||||||
|
@ -7661,11 +7663,11 @@ else
|
||||||
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
||||||
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
||||||
-e 's:$: $lt_compiler_flag:'`
|
-e 's:$: $lt_compiler_flag:'`
|
||||||
(eval echo "\"\$as_me:7664: $lt_compile\"" >&5)
|
(eval echo "\"\$as_me:7666: $lt_compile\"" >&5)
|
||||||
(eval "$lt_compile" 2>out/conftest.err)
|
(eval "$lt_compile" 2>out/conftest.err)
|
||||||
ac_status=$?
|
ac_status=$?
|
||||||
cat out/conftest.err >&5
|
cat out/conftest.err >&5
|
||||||
echo "$as_me:7668: \$? = $ac_status" >&5
|
echo "$as_me:7670: \$? = $ac_status" >&5
|
||||||
if (exit $ac_status) && test -s out/conftest2.$ac_objext
|
if (exit $ac_status) && test -s out/conftest2.$ac_objext
|
||||||
then
|
then
|
||||||
# The compiler can only warn and ignore the option if not recognized
|
# The compiler can only warn and ignore the option if not recognized
|
||||||
|
@ -10419,7 +10421,7 @@ else
|
||||||
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
||||||
lt_status=$lt_dlunknown
|
lt_status=$lt_dlunknown
|
||||||
cat > conftest.$ac_ext <<_LT_EOF
|
cat > conftest.$ac_ext <<_LT_EOF
|
||||||
#line 10422 "configure"
|
#line 10424 "configure"
|
||||||
#include "confdefs.h"
|
#include "confdefs.h"
|
||||||
|
|
||||||
#if HAVE_DLFCN_H
|
#if HAVE_DLFCN_H
|
||||||
|
@ -10515,7 +10517,7 @@ else
|
||||||
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
||||||
lt_status=$lt_dlunknown
|
lt_status=$lt_dlunknown
|
||||||
cat > conftest.$ac_ext <<_LT_EOF
|
cat > conftest.$ac_ext <<_LT_EOF
|
||||||
#line 10518 "configure"
|
#line 10520 "configure"
|
||||||
#include "confdefs.h"
|
#include "confdefs.h"
|
||||||
|
|
||||||
#if HAVE_DLFCN_H
|
#if HAVE_DLFCN_H
|
||||||
|
@ -14197,11 +14199,11 @@ else
|
||||||
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
||||||
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
||||||
-e 's:$: $lt_compiler_flag:'`
|
-e 's:$: $lt_compiler_flag:'`
|
||||||
(eval echo "\"\$as_me:14200: $lt_compile\"" >&5)
|
(eval echo "\"\$as_me:14202: $lt_compile\"" >&5)
|
||||||
(eval "$lt_compile" 2>conftest.err)
|
(eval "$lt_compile" 2>conftest.err)
|
||||||
ac_status=$?
|
ac_status=$?
|
||||||
cat conftest.err >&5
|
cat conftest.err >&5
|
||||||
echo "$as_me:14204: \$? = $ac_status" >&5
|
echo "$as_me:14206: \$? = $ac_status" >&5
|
||||||
if (exit $ac_status) && test -s "$ac_outfile"; then
|
if (exit $ac_status) && test -s "$ac_outfile"; then
|
||||||
# The compiler can only warn and ignore the option if not recognized
|
# The compiler can only warn and ignore the option if not recognized
|
||||||
# So say no if there are warnings other than the usual output.
|
# So say no if there are warnings other than the usual output.
|
||||||
|
@ -14296,11 +14298,11 @@ else
|
||||||
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
||||||
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
||||||
-e 's:$: $lt_compiler_flag:'`
|
-e 's:$: $lt_compiler_flag:'`
|
||||||
(eval echo "\"\$as_me:14299: $lt_compile\"" >&5)
|
(eval echo "\"\$as_me:14301: $lt_compile\"" >&5)
|
||||||
(eval "$lt_compile" 2>out/conftest.err)
|
(eval "$lt_compile" 2>out/conftest.err)
|
||||||
ac_status=$?
|
ac_status=$?
|
||||||
cat out/conftest.err >&5
|
cat out/conftest.err >&5
|
||||||
echo "$as_me:14303: \$? = $ac_status" >&5
|
echo "$as_me:14305: \$? = $ac_status" >&5
|
||||||
if (exit $ac_status) && test -s out/conftest2.$ac_objext
|
if (exit $ac_status) && test -s out/conftest2.$ac_objext
|
||||||
then
|
then
|
||||||
# The compiler can only warn and ignore the option if not recognized
|
# The compiler can only warn and ignore the option if not recognized
|
||||||
|
@ -14348,11 +14350,11 @@ else
|
||||||
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
-e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \
|
||||||
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
-e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \
|
||||||
-e 's:$: $lt_compiler_flag:'`
|
-e 's:$: $lt_compiler_flag:'`
|
||||||
(eval echo "\"\$as_me:14351: $lt_compile\"" >&5)
|
(eval echo "\"\$as_me:14353: $lt_compile\"" >&5)
|
||||||
(eval "$lt_compile" 2>out/conftest.err)
|
(eval "$lt_compile" 2>out/conftest.err)
|
||||||
ac_status=$?
|
ac_status=$?
|
||||||
cat out/conftest.err >&5
|
cat out/conftest.err >&5
|
||||||
echo "$as_me:14355: \$? = $ac_status" >&5
|
echo "$as_me:14357: \$? = $ac_status" >&5
|
||||||
if (exit $ac_status) && test -s out/conftest2.$ac_objext
|
if (exit $ac_status) && test -s out/conftest2.$ac_objext
|
||||||
then
|
then
|
||||||
# The compiler can only warn and ignore the option if not recognized
|
# The compiler can only warn and ignore the option if not recognized
|
||||||
|
@ -20693,6 +20695,79 @@ echo "${ECHO_T}$have_gcc_mmx" >&6; }
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Check whether --enable-3dnow was given.
|
||||||
|
if test "${enable_3dnow+set}" = set; then
|
||||||
|
enableval=$enable_3dnow;
|
||||||
|
else
|
||||||
|
enable_3dnow=yes
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test x$enable_3dnow = xyes; then
|
||||||
|
save_CFLAGS="$CFLAGS"
|
||||||
|
have_gcc_3dnow=no
|
||||||
|
{ echo "$as_me:$LINENO: checking for GCC -m3dnow option" >&5
|
||||||
|
echo $ECHO_N "checking for GCC -m3dnow option... $ECHO_C" >&6; }
|
||||||
|
amd3dnow_CFLAGS="-m3dnow"
|
||||||
|
CFLAGS="$save_CFLAGS $amd3dnow_CFLAGS"
|
||||||
|
|
||||||
|
cat >conftest.$ac_ext <<_ACEOF
|
||||||
|
/* confdefs.h. */
|
||||||
|
_ACEOF
|
||||||
|
cat confdefs.h >>conftest.$ac_ext
|
||||||
|
cat >>conftest.$ac_ext <<_ACEOF
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
#include <mm3dnow.h>
|
||||||
|
#ifndef __3dNOW__
|
||||||
|
#error Assembler CPP flag not enabled
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
rm -f conftest.$ac_objext
|
||||||
|
if { (ac_try="$ac_compile"
|
||||||
|
case "(($ac_try" in
|
||||||
|
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
|
||||||
|
*) ac_try_echo=$ac_try;;
|
||||||
|
esac
|
||||||
|
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
|
||||||
|
(eval "$ac_compile") 2>conftest.er1
|
||||||
|
ac_status=$?
|
||||||
|
grep -v '^ *+' conftest.er1 >conftest.err
|
||||||
|
rm -f conftest.er1
|
||||||
|
cat conftest.err >&5
|
||||||
|
echo "$as_me:$LINENO: \$? = $ac_status" >&5
|
||||||
|
(exit $ac_status); } && {
|
||||||
|
test -z "$ac_c_werror_flag" ||
|
||||||
|
test ! -s conftest.err
|
||||||
|
} && test -s conftest.$ac_objext; then
|
||||||
|
|
||||||
|
have_gcc_3dnow=yes
|
||||||
|
|
||||||
|
else
|
||||||
|
echo "$as_me: failed program was:" >&5
|
||||||
|
sed 's/^/| /' conftest.$ac_ext >&5
|
||||||
|
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||||
|
{ echo "$as_me:$LINENO: result: $have_gcc_3dnow" >&5
|
||||||
|
echo "${ECHO_T}$have_gcc_3dnow" >&6; }
|
||||||
|
CFLAGS="$save_CFLAGS"
|
||||||
|
|
||||||
|
if test x$have_gcc_3dnow = xyes; then
|
||||||
|
EXTRA_CFLAGS="$EXTRA_CFLAGS $amd3dnow_CFLAGS"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# Check whether --enable-sse was given.
|
# Check whether --enable-sse was given.
|
||||||
if test "${enable_sse+set}" = set; then
|
if test "${enable_sse+set}" = set; then
|
||||||
enableval=$enable_sse;
|
enableval=$enable_sse;
|
||||||
|
@ -20856,6 +20931,260 @@ echo "${ECHO_T}$have_gcc_sse2" >&6; }
|
||||||
EXTRA_CFLAGS="$EXTRA_CFLAGS $sse2_CFLAGS"
|
EXTRA_CFLAGS="$EXTRA_CFLAGS $sse2_CFLAGS"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Check whether --enable-altivec was given.
|
||||||
|
if test "${enable_altivec+set}" = set; then
|
||||||
|
enableval=$enable_altivec;
|
||||||
|
else
|
||||||
|
enable_altivec=yes
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test x$enable_altivec = xyes; then
|
||||||
|
save_CFLAGS="$CFLAGS"
|
||||||
|
have_gcc_altivec=no
|
||||||
|
have_altivec_h_hdr=no
|
||||||
|
altivec_CFLAGS="-maltivec"
|
||||||
|
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
|
||||||
|
|
||||||
|
{ echo "$as_me:$LINENO: checking for Altivec with GCC altivec.h and -maltivec option" >&5
|
||||||
|
echo $ECHO_N "checking for Altivec with GCC altivec.h and -maltivec option... $ECHO_C" >&6; }
|
||||||
|
cat >conftest.$ac_ext <<_ACEOF
|
||||||
|
/* confdefs.h. */
|
||||||
|
_ACEOF
|
||||||
|
cat confdefs.h >>conftest.$ac_ext
|
||||||
|
cat >>conftest.$ac_ext <<_ACEOF
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
#include <altivec.h>
|
||||||
|
vector unsigned int vzero() {
|
||||||
|
return vec_splat_u32(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
rm -f conftest.$ac_objext
|
||||||
|
if { (ac_try="$ac_compile"
|
||||||
|
case "(($ac_try" in
|
||||||
|
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
|
||||||
|
*) ac_try_echo=$ac_try;;
|
||||||
|
esac
|
||||||
|
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
|
||||||
|
(eval "$ac_compile") 2>conftest.er1
|
||||||
|
ac_status=$?
|
||||||
|
grep -v '^ *+' conftest.er1 >conftest.err
|
||||||
|
rm -f conftest.er1
|
||||||
|
cat conftest.err >&5
|
||||||
|
echo "$as_me:$LINENO: \$? = $ac_status" >&5
|
||||||
|
(exit $ac_status); } && {
|
||||||
|
test -z "$ac_c_werror_flag" ||
|
||||||
|
test ! -s conftest.err
|
||||||
|
} && test -s conftest.$ac_objext; then
|
||||||
|
|
||||||
|
have_gcc_altivec=yes
|
||||||
|
have_altivec_h_hdr=yes
|
||||||
|
|
||||||
|
else
|
||||||
|
echo "$as_me: failed program was:" >&5
|
||||||
|
sed 's/^/| /' conftest.$ac_ext >&5
|
||||||
|
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||||
|
{ echo "$as_me:$LINENO: result: $have_gcc_altivec" >&5
|
||||||
|
echo "${ECHO_T}$have_gcc_altivec" >&6; }
|
||||||
|
|
||||||
|
if test x$have_gcc_altivec = xno; then
|
||||||
|
{ echo "$as_me:$LINENO: checking for Altivec with GCC -maltivec option" >&5
|
||||||
|
echo $ECHO_N "checking for Altivec with GCC -maltivec option... $ECHO_C" >&6; }
|
||||||
|
cat >conftest.$ac_ext <<_ACEOF
|
||||||
|
/* confdefs.h. */
|
||||||
|
_ACEOF
|
||||||
|
cat confdefs.h >>conftest.$ac_ext
|
||||||
|
cat >>conftest.$ac_ext <<_ACEOF
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
vector unsigned int vzero() {
|
||||||
|
return vec_splat_u32(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
rm -f conftest.$ac_objext
|
||||||
|
if { (ac_try="$ac_compile"
|
||||||
|
case "(($ac_try" in
|
||||||
|
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
|
||||||
|
*) ac_try_echo=$ac_try;;
|
||||||
|
esac
|
||||||
|
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
|
||||||
|
(eval "$ac_compile") 2>conftest.er1
|
||||||
|
ac_status=$?
|
||||||
|
grep -v '^ *+' conftest.er1 >conftest.err
|
||||||
|
rm -f conftest.er1
|
||||||
|
cat conftest.err >&5
|
||||||
|
echo "$as_me:$LINENO: \$? = $ac_status" >&5
|
||||||
|
(exit $ac_status); } && {
|
||||||
|
test -z "$ac_c_werror_flag" ||
|
||||||
|
test ! -s conftest.err
|
||||||
|
} && test -s conftest.$ac_objext; then
|
||||||
|
|
||||||
|
have_gcc_altivec=yes
|
||||||
|
|
||||||
|
else
|
||||||
|
echo "$as_me: failed program was:" >&5
|
||||||
|
sed 's/^/| /' conftest.$ac_ext >&5
|
||||||
|
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||||
|
{ echo "$as_me:$LINENO: result: $have_gcc_altivec" >&5
|
||||||
|
echo "${ECHO_T}$have_gcc_altivec" >&6; }
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test x$have_gcc_altivec = xno; then
|
||||||
|
{ echo "$as_me:$LINENO: checking for Altivec with GCC altivec.h and -faltivec option" >&5
|
||||||
|
echo $ECHO_N "checking for Altivec with GCC altivec.h and -faltivec option... $ECHO_C" >&6; }
|
||||||
|
altivec_CFLAGS="-faltivec"
|
||||||
|
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
|
||||||
|
cat >conftest.$ac_ext <<_ACEOF
|
||||||
|
/* confdefs.h. */
|
||||||
|
_ACEOF
|
||||||
|
cat confdefs.h >>conftest.$ac_ext
|
||||||
|
cat >>conftest.$ac_ext <<_ACEOF
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
#include <altivec.h>
|
||||||
|
vector unsigned int vzero() {
|
||||||
|
return vec_splat_u32(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
rm -f conftest.$ac_objext
|
||||||
|
if { (ac_try="$ac_compile"
|
||||||
|
case "(($ac_try" in
|
||||||
|
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
|
||||||
|
*) ac_try_echo=$ac_try;;
|
||||||
|
esac
|
||||||
|
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
|
||||||
|
(eval "$ac_compile") 2>conftest.er1
|
||||||
|
ac_status=$?
|
||||||
|
grep -v '^ *+' conftest.er1 >conftest.err
|
||||||
|
rm -f conftest.er1
|
||||||
|
cat conftest.err >&5
|
||||||
|
echo "$as_me:$LINENO: \$? = $ac_status" >&5
|
||||||
|
(exit $ac_status); } && {
|
||||||
|
test -z "$ac_c_werror_flag" ||
|
||||||
|
test ! -s conftest.err
|
||||||
|
} && test -s conftest.$ac_objext; then
|
||||||
|
|
||||||
|
have_gcc_altivec=yes
|
||||||
|
have_altivec_h_hdr=yes
|
||||||
|
|
||||||
|
else
|
||||||
|
echo "$as_me: failed program was:" >&5
|
||||||
|
sed 's/^/| /' conftest.$ac_ext >&5
|
||||||
|
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||||
|
{ echo "$as_me:$LINENO: result: $have_gcc_altivec" >&5
|
||||||
|
echo "${ECHO_T}$have_gcc_altivec" >&6; }
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test x$have_gcc_altivec = xno; then
|
||||||
|
{ echo "$as_me:$LINENO: checking for Altivec with GCC -faltivec option" >&5
|
||||||
|
echo $ECHO_N "checking for Altivec with GCC -faltivec option... $ECHO_C" >&6; }
|
||||||
|
cat >conftest.$ac_ext <<_ACEOF
|
||||||
|
/* confdefs.h. */
|
||||||
|
_ACEOF
|
||||||
|
cat confdefs.h >>conftest.$ac_ext
|
||||||
|
cat >>conftest.$ac_ext <<_ACEOF
|
||||||
|
/* end confdefs.h. */
|
||||||
|
|
||||||
|
vector unsigned int vzero() {
|
||||||
|
return vec_splat_u32(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
main ()
|
||||||
|
{
|
||||||
|
|
||||||
|
|
||||||
|
;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
_ACEOF
|
||||||
|
rm -f conftest.$ac_objext
|
||||||
|
if { (ac_try="$ac_compile"
|
||||||
|
case "(($ac_try" in
|
||||||
|
*\"* | *\`* | *\\*) ac_try_echo=\$ac_try;;
|
||||||
|
*) ac_try_echo=$ac_try;;
|
||||||
|
esac
|
||||||
|
eval "echo \"\$as_me:$LINENO: $ac_try_echo\"") >&5
|
||||||
|
(eval "$ac_compile") 2>conftest.er1
|
||||||
|
ac_status=$?
|
||||||
|
grep -v '^ *+' conftest.er1 >conftest.err
|
||||||
|
rm -f conftest.er1
|
||||||
|
cat conftest.err >&5
|
||||||
|
echo "$as_me:$LINENO: \$? = $ac_status" >&5
|
||||||
|
(exit $ac_status); } && {
|
||||||
|
test -z "$ac_c_werror_flag" ||
|
||||||
|
test ! -s conftest.err
|
||||||
|
} && test -s conftest.$ac_objext; then
|
||||||
|
|
||||||
|
have_gcc_altivec=yes
|
||||||
|
|
||||||
|
else
|
||||||
|
echo "$as_me: failed program was:" >&5
|
||||||
|
sed 's/^/| /' conftest.$ac_ext >&5
|
||||||
|
|
||||||
|
|
||||||
|
fi
|
||||||
|
|
||||||
|
rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
|
||||||
|
{ echo "$as_me:$LINENO: result: $have_gcc_altivec" >&5
|
||||||
|
echo "${ECHO_T}$have_gcc_altivec" >&6; }
|
||||||
|
fi
|
||||||
|
CFLAGS="$save_CFLAGS"
|
||||||
|
|
||||||
|
if test x$have_gcc_altivec = xyes; then
|
||||||
|
cat >>confdefs.h <<\_ACEOF
|
||||||
|
#define SDL_ALTIVEC_BLITTERS 1
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
if test x$have_altivec_h_hdr = xyes; then
|
||||||
|
cat >>confdefs.h <<\_ACEOF
|
||||||
|
#define HAVE_ALTIVEC_H 1
|
||||||
|
_ACEOF
|
||||||
|
|
||||||
|
fi
|
||||||
|
EXTRA_CFLAGS="$EXTRA_CFLAGS $altivec_CFLAGS"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CheckOSS()
|
CheckOSS()
|
||||||
|
|
103
configure.in
103
configure.in
|
@ -501,6 +501,33 @@ AC_HELP_STRING([--enable-mmx], [use MMX assembly routines [[default=yes]]]),
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
AC_ARG_ENABLE(3dnow,
|
||||||
|
AC_HELP_STRING([--enable-3dnow], [use MMX assembly routines [[default=yes]]]),
|
||||||
|
, enable_3dnow=yes)
|
||||||
|
if test x$enable_3dnow = xyes; then
|
||||||
|
save_CFLAGS="$CFLAGS"
|
||||||
|
have_gcc_3dnow=no
|
||||||
|
AC_MSG_CHECKING(for GCC -m3dnow option)
|
||||||
|
amd3dnow_CFLAGS="-m3dnow"
|
||||||
|
CFLAGS="$save_CFLAGS $amd3dnow_CFLAGS"
|
||||||
|
|
||||||
|
AC_TRY_COMPILE([
|
||||||
|
#include <mm3dnow.h>
|
||||||
|
#ifndef __3dNOW__
|
||||||
|
#error Assembler CPP flag not enabled
|
||||||
|
#endif
|
||||||
|
],[
|
||||||
|
],[
|
||||||
|
have_gcc_3dnow=yes
|
||||||
|
])
|
||||||
|
AC_MSG_RESULT($have_gcc_3dnow)
|
||||||
|
CFLAGS="$save_CFLAGS"
|
||||||
|
|
||||||
|
if test x$have_gcc_3dnow = xyes; then
|
||||||
|
EXTRA_CFLAGS="$EXTRA_CFLAGS $amd3dnow_CFLAGS"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
AC_ARG_ENABLE(sse,
|
AC_ARG_ENABLE(sse,
|
||||||
AC_HELP_STRING([--enable-sse], [use SSE assembly routines [[default=yes]]]),
|
AC_HELP_STRING([--enable-sse], [use SSE assembly routines [[default=yes]]]),
|
||||||
, enable_sse=yes)
|
, enable_sse=yes)
|
||||||
|
@ -572,6 +599,82 @@ AC_HELP_STRING([--enable-sse2], [use SSE2 assembly routines [[default=no]]]),
|
||||||
EXTRA_CFLAGS="$EXTRA_CFLAGS $sse2_CFLAGS"
|
EXTRA_CFLAGS="$EXTRA_CFLAGS $sse2_CFLAGS"
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
AC_ARG_ENABLE(altivec,
|
||||||
|
AC_HELP_STRING([--enable-altivec], [use Altivec assembly routines [[default=yes]]]),
|
||||||
|
, enable_altivec=yes)
|
||||||
|
if test x$enable_altivec = xyes; then
|
||||||
|
save_CFLAGS="$CFLAGS"
|
||||||
|
have_gcc_altivec=no
|
||||||
|
have_altivec_h_hdr=no
|
||||||
|
altivec_CFLAGS="-maltivec"
|
||||||
|
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
|
||||||
|
|
||||||
|
AC_MSG_CHECKING(for Altivec with GCC altivec.h and -maltivec option)
|
||||||
|
AC_TRY_COMPILE([
|
||||||
|
#include <altivec.h>
|
||||||
|
vector unsigned int vzero() {
|
||||||
|
return vec_splat_u32(0);
|
||||||
|
}
|
||||||
|
],[
|
||||||
|
],[
|
||||||
|
have_gcc_altivec=yes
|
||||||
|
have_altivec_h_hdr=yes
|
||||||
|
])
|
||||||
|
AC_MSG_RESULT($have_gcc_altivec)
|
||||||
|
|
||||||
|
if test x$have_gcc_altivec = xno; then
|
||||||
|
AC_MSG_CHECKING(for Altivec with GCC -maltivec option)
|
||||||
|
AC_TRY_COMPILE([
|
||||||
|
vector unsigned int vzero() {
|
||||||
|
return vec_splat_u32(0);
|
||||||
|
}
|
||||||
|
],[
|
||||||
|
],[
|
||||||
|
have_gcc_altivec=yes
|
||||||
|
])
|
||||||
|
AC_MSG_RESULT($have_gcc_altivec)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test x$have_gcc_altivec = xno; then
|
||||||
|
AC_MSG_CHECKING(for Altivec with GCC altivec.h and -faltivec option)
|
||||||
|
altivec_CFLAGS="-faltivec"
|
||||||
|
CFLAGS="$save_CFLAGS $altivec_CFLAGS"
|
||||||
|
AC_TRY_COMPILE([
|
||||||
|
#include <altivec.h>
|
||||||
|
vector unsigned int vzero() {
|
||||||
|
return vec_splat_u32(0);
|
||||||
|
}
|
||||||
|
],[
|
||||||
|
],[
|
||||||
|
have_gcc_altivec=yes
|
||||||
|
have_altivec_h_hdr=yes
|
||||||
|
])
|
||||||
|
AC_MSG_RESULT($have_gcc_altivec)
|
||||||
|
fi
|
||||||
|
|
||||||
|
if test x$have_gcc_altivec = xno; then
|
||||||
|
AC_MSG_CHECKING(for Altivec with GCC -faltivec option)
|
||||||
|
AC_TRY_COMPILE([
|
||||||
|
vector unsigned int vzero() {
|
||||||
|
return vec_splat_u32(0);
|
||||||
|
}
|
||||||
|
],[
|
||||||
|
],[
|
||||||
|
have_gcc_altivec=yes
|
||||||
|
])
|
||||||
|
AC_MSG_RESULT($have_gcc_altivec)
|
||||||
|
fi
|
||||||
|
CFLAGS="$save_CFLAGS"
|
||||||
|
|
||||||
|
if test x$have_gcc_altivec = xyes; then
|
||||||
|
AC_DEFINE(SDL_ALTIVEC_BLITTERS)
|
||||||
|
if test x$have_altivec_h_hdr = xyes; then
|
||||||
|
AC_DEFINE(HAVE_ALTIVEC_H)
|
||||||
|
fi
|
||||||
|
EXTRA_CFLAGS="$EXTRA_CFLAGS $altivec_CFLAGS"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
dnl See if the OSS audio interface is supported
|
dnl See if the OSS audio interface is supported
|
||||||
|
|
|
@ -34,7 +34,7 @@
|
||||||
|
|
||||||
/* Make sure that this isn't included by Visual C++ */
|
/* Make sure that this isn't included by Visual C++ */
|
||||||
#ifdef _MSC_VER
|
#ifdef _MSC_VER
|
||||||
#error You should copy include/SDL_config.h.default to include/SDL_config.h
|
#error You should run hg revert SDL_config.h
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* C language features */
|
/* C language features */
|
||||||
|
@ -82,6 +82,7 @@
|
||||||
#undef HAVE_MATH_H
|
#undef HAVE_MATH_H
|
||||||
#undef HAVE_ICONV_H
|
#undef HAVE_ICONV_H
|
||||||
#undef HAVE_SIGNAL_H
|
#undef HAVE_SIGNAL_H
|
||||||
|
#undef HAVE_ALTIVEC_H
|
||||||
|
|
||||||
/* C library functions */
|
/* C library functions */
|
||||||
#undef HAVE_MALLOC
|
#undef HAVE_MALLOC
|
||||||
|
@ -302,5 +303,6 @@
|
||||||
|
|
||||||
/* Enable assembly routines */
|
/* Enable assembly routines */
|
||||||
#undef SDL_ASSEMBLY_ROUTINES
|
#undef SDL_ASSEMBLY_ROUTINES
|
||||||
|
#undef SDL_ALTIVEC_BLITTERS
|
||||||
|
|
||||||
#endif /* _SDL_config_h */
|
#endif /* _SDL_config_h */
|
||||||
|
|
|
@ -168,5 +168,8 @@
|
||||||
|
|
||||||
/* Enable assembly routines */
|
/* Enable assembly routines */
|
||||||
#define SDL_ASSEMBLY_ROUTINES 1
|
#define SDL_ASSEMBLY_ROUTINES 1
|
||||||
|
#ifdef __ppc__
|
||||||
|
#define SDL_ALTIVEC_BLITTERS 1
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _SDL_config_macosx_h */
|
#endif /* _SDL_config_macosx_h */
|
||||||
|
|
|
@ -31,6 +31,34 @@
|
||||||
|
|
||||||
#include "SDL_stdinc.h"
|
#include "SDL_stdinc.h"
|
||||||
|
|
||||||
|
/* Need to do this here because intrin.h has C++ code in it */
|
||||||
|
/* Visual Studio 2005 has a bug where intrin.h conflicts with winnt.h */
|
||||||
|
#if defined(_MSC_VER) && (_MSC_VER >= 1500) && !defined(_WIN32_WCE)
|
||||||
|
#include <intrin.h>
|
||||||
|
#define __MMX__
|
||||||
|
#define __3dNOW__
|
||||||
|
#define __SSE__
|
||||||
|
#define __SSE2__
|
||||||
|
#elif defined(__MINGW64_VERSION_MAJOR)
|
||||||
|
#include <intrin.h>
|
||||||
|
#else
|
||||||
|
#ifdef __MMX__
|
||||||
|
#include <mmintrin.h>
|
||||||
|
#endif
|
||||||
|
#ifdef __3dNOW__
|
||||||
|
#include <mm3dnow.h>
|
||||||
|
#endif
|
||||||
|
#ifdef __SSE__
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#endif
|
||||||
|
#ifdef __SSE2__
|
||||||
|
#include <emmintrin.h>
|
||||||
|
#endif
|
||||||
|
#ifdef HAVE_ALTIVEC_H
|
||||||
|
#include <altivec.h>
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
#include "begin_code.h"
|
#include "begin_code.h"
|
||||||
/* Set up for C function definitions, even when using C++ */
|
/* Set up for C function definitions, even when using C++ */
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
@ -64,11 +92,21 @@ extern DECLSPEC int SDLCALL SDL_GetCPUCacheLineSize(void);
|
||||||
*/
|
*/
|
||||||
extern DECLSPEC SDL_bool SDLCALL SDL_HasRDTSC(void);
|
extern DECLSPEC SDL_bool SDLCALL SDL_HasRDTSC(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function returns true if the CPU has AltiVec features.
|
||||||
|
*/
|
||||||
|
extern DECLSPEC SDL_bool SDLCALL SDL_HasAltiVec(void);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function returns true if the CPU has MMX features.
|
* This function returns true if the CPU has MMX features.
|
||||||
*/
|
*/
|
||||||
extern DECLSPEC SDL_bool SDLCALL SDL_HasMMX(void);
|
extern DECLSPEC SDL_bool SDLCALL SDL_HasMMX(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This function returns true if the CPU has 3DNow! features.
|
||||||
|
*/
|
||||||
|
extern DECLSPEC SDL_bool SDLCALL SDL_Has3DNow(void);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This function returns true if the CPU has SSE features.
|
* This function returns true if the CPU has SSE features.
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -32,18 +32,37 @@
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/sysctl.h>
|
#include <sys/sysctl.h>
|
||||||
#endif
|
#endif
|
||||||
|
#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
|
||||||
|
#include <sys/sysctl.h> /* For AltiVec check */
|
||||||
|
#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
|
||||||
|
#include <signal.h>
|
||||||
|
#include <setjmp.h>
|
||||||
|
#endif
|
||||||
#ifdef __WIN32__
|
#ifdef __WIN32__
|
||||||
#include "../core/windows/SDL_windows.h"
|
#include "../core/windows/SDL_windows.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define CPU_HAS_RDTSC 0x00000001
|
#define CPU_HAS_RDTSC 0x00000001
|
||||||
#define CPU_HAS_MMX 0x00000002
|
#define CPU_HAS_ALTIVEC 0x00000002
|
||||||
|
#define CPU_HAS_MMX 0x00000004
|
||||||
|
#define CPU_HAS_3DNOW 0x00000008
|
||||||
#define CPU_HAS_SSE 0x00000010
|
#define CPU_HAS_SSE 0x00000010
|
||||||
#define CPU_HAS_SSE2 0x00000020
|
#define CPU_HAS_SSE2 0x00000020
|
||||||
#define CPU_HAS_SSE3 0x00000040
|
#define CPU_HAS_SSE3 0x00000040
|
||||||
#define CPU_HAS_SSE41 0x00000080
|
#define CPU_HAS_SSE41 0x00000100
|
||||||
#define CPU_HAS_SSE42 0x00000100
|
#define CPU_HAS_SSE42 0x00000200
|
||||||
|
|
||||||
|
#if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__
|
||||||
|
/* This is the brute force way of detecting instruction sets...
|
||||||
|
the idea is borrowed from the libmpeg2 library - thanks!
|
||||||
|
*/
|
||||||
|
static jmp_buf jmpbuf;
|
||||||
|
static void
|
||||||
|
illegal_instruction(int sig)
|
||||||
|
{
|
||||||
|
longjmp(jmpbuf, 1);
|
||||||
|
}
|
||||||
|
#endif /* HAVE_SETJMP */
|
||||||
|
|
||||||
static __inline__ int
|
static __inline__ int
|
||||||
CPU_haveCPUID(void)
|
CPU_haveCPUID(void)
|
||||||
|
@ -192,6 +211,29 @@ CPU_haveRDTSC(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __inline__ int
|
||||||
|
CPU_haveAltiVec(void)
|
||||||
|
{
|
||||||
|
volatile int altivec = 0;
|
||||||
|
#if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
|
||||||
|
int selectors[2] = { CTL_HW, HW_VECTORUNIT };
|
||||||
|
int hasVectorUnit = 0;
|
||||||
|
size_t length = sizeof(hasVectorUnit);
|
||||||
|
int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
|
||||||
|
if (0 == error)
|
||||||
|
altivec = (hasVectorUnit != 0);
|
||||||
|
#elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
|
||||||
|
void (*handler) (int sig);
|
||||||
|
handler = signal(SIGILL, illegal_instruction);
|
||||||
|
if (setjmp(jmpbuf) == 0) {
|
||||||
|
asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
|
||||||
|
altivec = 1;
|
||||||
|
}
|
||||||
|
signal(SIGILL, handler);
|
||||||
|
#endif
|
||||||
|
return altivec;
|
||||||
|
}
|
||||||
|
|
||||||
static __inline__ int
|
static __inline__ int
|
||||||
CPU_haveMMX(void)
|
CPU_haveMMX(void)
|
||||||
{
|
{
|
||||||
|
@ -201,6 +243,21 @@ CPU_haveMMX(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __inline__ int
|
||||||
|
CPU_have3DNow(void)
|
||||||
|
{
|
||||||
|
if (CPU_haveCPUID()) {
|
||||||
|
int a, b, c, d;
|
||||||
|
|
||||||
|
cpuid(0x80000000, a, b, c, d);
|
||||||
|
if (a >= 0x80000001) {
|
||||||
|
cpuid(0x80000001, a, b, c, d);
|
||||||
|
return (d & 0x80000000);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static __inline__ int
|
static __inline__ int
|
||||||
CPU_haveSSE(void)
|
CPU_haveSSE(void)
|
||||||
{
|
{
|
||||||
|
@ -431,9 +488,15 @@ SDL_GetCPUFeatures(void)
|
||||||
if (CPU_haveRDTSC()) {
|
if (CPU_haveRDTSC()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_RDTSC;
|
SDL_CPUFeatures |= CPU_HAS_RDTSC;
|
||||||
}
|
}
|
||||||
|
if (CPU_haveAltiVec()) {
|
||||||
|
SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
|
||||||
|
}
|
||||||
if (CPU_haveMMX()) {
|
if (CPU_haveMMX()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_MMX;
|
SDL_CPUFeatures |= CPU_HAS_MMX;
|
||||||
}
|
}
|
||||||
|
if (CPU_have3DNow()) {
|
||||||
|
SDL_CPUFeatures |= CPU_HAS_3DNOW;
|
||||||
|
}
|
||||||
if (CPU_haveSSE()) {
|
if (CPU_haveSSE()) {
|
||||||
SDL_CPUFeatures |= CPU_HAS_SSE;
|
SDL_CPUFeatures |= CPU_HAS_SSE;
|
||||||
}
|
}
|
||||||
|
@ -462,6 +525,15 @@ SDL_HasRDTSC(void)
|
||||||
return SDL_FALSE;
|
return SDL_FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDL_bool
|
||||||
|
SDL_HasAltiVec(void)
|
||||||
|
{
|
||||||
|
if (SDL_GetCPUFeatures() & CPU_HAS_ALTIVEC) {
|
||||||
|
return SDL_TRUE;
|
||||||
|
}
|
||||||
|
return SDL_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
SDL_bool
|
SDL_bool
|
||||||
SDL_HasMMX(void)
|
SDL_HasMMX(void)
|
||||||
{
|
{
|
||||||
|
@ -471,6 +543,15 @@ SDL_HasMMX(void)
|
||||||
return SDL_FALSE;
|
return SDL_FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDL_bool
|
||||||
|
SDL_Has3DNow(void)
|
||||||
|
{
|
||||||
|
if (SDL_GetCPUFeatures() & CPU_HAS_3DNOW) {
|
||||||
|
return SDL_TRUE;
|
||||||
|
}
|
||||||
|
return SDL_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
SDL_bool
|
SDL_bool
|
||||||
SDL_HasSSE(void)
|
SDL_HasSSE(void)
|
||||||
{
|
{
|
||||||
|
@ -528,7 +609,9 @@ main()
|
||||||
printf("CPU name: %s\n", SDL_GetCPUName());
|
printf("CPU name: %s\n", SDL_GetCPUName());
|
||||||
printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
|
printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
|
||||||
printf("RDTSC: %d\n", SDL_HasRDTSC());
|
printf("RDTSC: %d\n", SDL_HasRDTSC());
|
||||||
|
printf("Altivec: %d\n", SDL_HasAltiVec());
|
||||||
printf("MMX: %d\n", SDL_HasMMX());
|
printf("MMX: %d\n", SDL_HasMMX());
|
||||||
|
printf("3DNow: %d\n", SDL_Has3DNow());
|
||||||
printf("SSE: %d\n", SDL_HasSSE());
|
printf("SSE: %d\n", SDL_HasSSE());
|
||||||
printf("SSE2: %d\n", SDL_HasSSE2());
|
printf("SSE2: %d\n", SDL_HasSSE2());
|
||||||
printf("SSE3: %d\n", SDL_HasSSE3());
|
printf("SSE3: %d\n", SDL_HasSSE3());
|
||||||
|
|
|
@ -100,6 +100,30 @@ SDL_SoftBlit(SDL_Surface * src, SDL_Rect * srcrect,
|
||||||
return (okay ? 0 : -1);
|
return (okay ? 0 : -1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __MACOSX__
|
||||||
|
#include <sys/sysctl.h>
|
||||||
|
|
||||||
|
static SDL_bool
|
||||||
|
SDL_UseAltivecPrefetch()
|
||||||
|
{
|
||||||
|
const char key[] = "hw.l3cachesize";
|
||||||
|
u_int64_t result = 0;
|
||||||
|
size_t typeSize = sizeof(result);
|
||||||
|
|
||||||
|
if (sysctlbyname(key, &result, &typeSize, NULL, 0) == 0 && result > 0) {
|
||||||
|
return SDL_TRUE;
|
||||||
|
} else {
|
||||||
|
return SDL_FALSE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static SDL_bool
|
||||||
|
SDL_UseAltivecPrefetch()
|
||||||
|
{
|
||||||
|
/* Just guess G4 */
|
||||||
|
return SDL_TRUE;
|
||||||
|
}
|
||||||
|
#endif /* __MACOSX__ */
|
||||||
|
|
||||||
static SDL_BlitFunc
|
static SDL_BlitFunc
|
||||||
SDL_ChooseBlitFunc(Uint32 src_format, Uint32 dst_format, int flags,
|
SDL_ChooseBlitFunc(Uint32 src_format, Uint32 dst_format, int flags,
|
||||||
|
@ -121,12 +145,22 @@ SDL_ChooseBlitFunc(Uint32 src_format, Uint32 dst_format, int flags,
|
||||||
if (SDL_HasMMX()) {
|
if (SDL_HasMMX()) {
|
||||||
features |= SDL_CPU_MMX;
|
features |= SDL_CPU_MMX;
|
||||||
}
|
}
|
||||||
|
if (SDL_Has3DNow()) {
|
||||||
|
features |= SDL_CPU_3DNOW;
|
||||||
|
}
|
||||||
if (SDL_HasSSE()) {
|
if (SDL_HasSSE()) {
|
||||||
features |= SDL_CPU_SSE;
|
features |= SDL_CPU_SSE;
|
||||||
}
|
}
|
||||||
if (SDL_HasSSE2()) {
|
if (SDL_HasSSE2()) {
|
||||||
features |= SDL_CPU_SSE2;
|
features |= SDL_CPU_SSE2;
|
||||||
}
|
}
|
||||||
|
if (SDL_HasAltiVec()) {
|
||||||
|
if (SDL_UseAltivecPrefetch()) {
|
||||||
|
features |= SDL_CPU_ALTIVEC_PREFETCH;
|
||||||
|
} else {
|
||||||
|
features |= SDL_CPU_ALTIVEC_NOPREFETCH;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,24 +24,6 @@
|
||||||
#ifndef _SDL_blit_h
|
#ifndef _SDL_blit_h
|
||||||
#define _SDL_blit_h
|
#define _SDL_blit_h
|
||||||
|
|
||||||
#ifdef __MINGW32__
|
|
||||||
#include <_mingw.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__MINGW32__) && defined(__MINGW64_VERSION_MAJOR)
|
|
||||||
#include <intrin.h>
|
|
||||||
#else
|
|
||||||
#ifdef __MMX__
|
|
||||||
#include <mmintrin.h>
|
|
||||||
#endif
|
|
||||||
#ifdef __SSE__
|
|
||||||
#include <xmmintrin.h>
|
|
||||||
#endif
|
|
||||||
#ifdef __SSE2__
|
|
||||||
#include <emmintrin.h>
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#include "SDL_cpuinfo.h"
|
#include "SDL_cpuinfo.h"
|
||||||
#include "SDL_endian.h"
|
#include "SDL_endian.h"
|
||||||
#include "SDL_surface.h"
|
#include "SDL_surface.h"
|
||||||
|
@ -62,8 +44,11 @@
|
||||||
/* SDL blit CPU flags */
|
/* SDL blit CPU flags */
|
||||||
#define SDL_CPU_ANY 0x00000000
|
#define SDL_CPU_ANY 0x00000000
|
||||||
#define SDL_CPU_MMX 0x00000001
|
#define SDL_CPU_MMX 0x00000001
|
||||||
|
#define SDL_CPU_3DNOW 0x00000002
|
||||||
#define SDL_CPU_SSE 0x00000004
|
#define SDL_CPU_SSE 0x00000004
|
||||||
#define SDL_CPU_SSE2 0x00000008
|
#define SDL_CPU_SSE2 0x00000008
|
||||||
|
#define SDL_CPU_ALTIVEC_PREFETCH 0x00000010
|
||||||
|
#define SDL_CPU_ALTIVEC_NOPREFETCH 0x00000020
|
||||||
|
|
||||||
typedef struct
|
typedef struct
|
||||||
{
|
{
|
||||||
|
|
|
@ -419,6 +419,806 @@ BlitRGBtoRGBPixelAlphaMMX(SDL_BlitInfo * info)
|
||||||
|
|
||||||
#endif /* __MMX__ */
|
#endif /* __MMX__ */
|
||||||
|
|
||||||
|
#if SDL_ALTIVEC_BLITTERS
|
||||||
|
#if __MWERKS__
|
||||||
|
#pragma altivec_model on
|
||||||
|
#endif
|
||||||
|
#if HAVE_ALTIVEC_H
|
||||||
|
#include <altivec.h>
|
||||||
|
#endif
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
#if (defined(__MACOSX__) && (__GNUC__ < 4))
|
||||||
|
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
|
||||||
|
(vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
|
||||||
|
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
|
||||||
|
(vector unsigned short) ( a,b,c,d,e,f,g,h )
|
||||||
|
#else
|
||||||
|
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
|
||||||
|
(vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
|
||||||
|
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
|
||||||
|
(vector unsigned short) { a,b,c,d,e,f,g,h }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
|
||||||
|
#define VECPRINT(msg, v) do { \
|
||||||
|
vector unsigned int tmpvec = (vector unsigned int)(v); \
|
||||||
|
unsigned int *vp = (unsigned int *)&tmpvec; \
|
||||||
|
printf("%s = %08X %08X %08X %08X\n", msg, vp[0], vp[1], vp[2], vp[3]); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
/* the permuation vector that takes the high bytes out of all the appropriate shorts
|
||||||
|
(vector unsigned char)(
|
||||||
|
0x00, 0x10, 0x02, 0x12,
|
||||||
|
0x04, 0x14, 0x06, 0x16,
|
||||||
|
0x08, 0x18, 0x0A, 0x1A,
|
||||||
|
0x0C, 0x1C, 0x0E, 0x1E );
|
||||||
|
*/
|
||||||
|
#define VEC_MERGE_PERMUTE() (vec_add(vec_lvsl(0, (int*)NULL), (vector unsigned char)vec_splat_u16(0x0F)))
|
||||||
|
#define VEC_U32_24() (vec_add(vec_splat_u32(12), vec_splat_u32(12)))
|
||||||
|
#define VEC_ALPHA_MASK() ((vector unsigned char)vec_sl((vector unsigned int)vec_splat_s8(-1), VEC_U32_24()))
|
||||||
|
#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
|
||||||
|
? vec_lvsl(0, src) \
|
||||||
|
: vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
|
||||||
|
|
||||||
|
|
||||||
|
#define VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1_16, v8_16) do { \
|
||||||
|
/* vtemp1 contains source AAGGAAGGAAGGAAGG */ \
|
||||||
|
vector unsigned short vtemp1 = vec_mule(vs, valpha); \
|
||||||
|
/* vtemp2 contains source RRBBRRBBRRBBRRBB */ \
|
||||||
|
vector unsigned short vtemp2 = vec_mulo(vs, valpha); \
|
||||||
|
/* valpha2 is 255-alpha */ \
|
||||||
|
vector unsigned char valpha2 = vec_nor(valpha, valpha); \
|
||||||
|
/* vtemp3 contains dest AAGGAAGGAAGGAAGG */ \
|
||||||
|
vector unsigned short vtemp3 = vec_mule(vd, valpha2); \
|
||||||
|
/* vtemp4 contains dest RRBBRRBBRRBBRRBB */ \
|
||||||
|
vector unsigned short vtemp4 = vec_mulo(vd, valpha2); \
|
||||||
|
/* add source and dest */ \
|
||||||
|
vtemp1 = vec_add(vtemp1, vtemp3); \
|
||||||
|
vtemp2 = vec_add(vtemp2, vtemp4); \
|
||||||
|
/* vtemp1 = (vtemp1 + 1) + ((vtemp1 + 1) >> 8) */ \
|
||||||
|
vtemp1 = vec_add(vtemp1, v1_16); \
|
||||||
|
vtemp3 = vec_sr(vtemp1, v8_16); \
|
||||||
|
vtemp1 = vec_add(vtemp1, vtemp3); \
|
||||||
|
/* vtemp2 = (vtemp2 + 1) + ((vtemp2 + 1) >> 8) */ \
|
||||||
|
vtemp2 = vec_add(vtemp2, v1_16); \
|
||||||
|
vtemp4 = vec_sr(vtemp2, v8_16); \
|
||||||
|
vtemp2 = vec_add(vtemp2, vtemp4); \
|
||||||
|
/* (>>8) and get ARGBARGBARGBARGB */ \
|
||||||
|
vd = (vector unsigned char)vec_perm(vtemp1, vtemp2, mergePermute); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
/* Calculate the permute vector used for 32->32 swizzling */
|
||||||
|
static vector unsigned char
|
||||||
|
calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We have to assume that the bits that aren't used by other
|
||||||
|
* colors is alpha, and it's one complete byte, since some formats
|
||||||
|
* leave alpha with a zero mask, but we should still swizzle the bits.
|
||||||
|
*/
|
||||||
|
/* ARGB */
|
||||||
|
const static struct SDL_PixelFormat default_pixel_format = {
|
||||||
|
NULL, 0, 0,
|
||||||
|
0, 0, 0, 0,
|
||||||
|
16, 8, 0, 24,
|
||||||
|
0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000
|
||||||
|
};
|
||||||
|
if (!srcfmt) {
|
||||||
|
srcfmt = &default_pixel_format;
|
||||||
|
}
|
||||||
|
if (!dstfmt) {
|
||||||
|
dstfmt = &default_pixel_format;
|
||||||
|
}
|
||||||
|
const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x04, 0x04, 0x04, 0x04,
|
||||||
|
0x08, 0x08, 0x08, 0x08,
|
||||||
|
0x0C, 0x0C, 0x0C,
|
||||||
|
0x0C);
|
||||||
|
vector unsigned char vswiz;
|
||||||
|
vector unsigned int srcvec;
|
||||||
|
#define RESHIFT(X) (3 - ((X) >> 3))
|
||||||
|
Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
|
||||||
|
Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
|
||||||
|
Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
|
||||||
|
Uint32 amask;
|
||||||
|
/* Use zero for alpha if either surface doesn't have alpha */
|
||||||
|
if (dstfmt->Amask) {
|
||||||
|
amask =
|
||||||
|
((srcfmt->Amask) ? RESHIFT(srcfmt->
|
||||||
|
Ashift) : 0x10) << (dstfmt->Ashift);
|
||||||
|
} else {
|
||||||
|
amask =
|
||||||
|
0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
|
||||||
|
0xFFFFFFFF);
|
||||||
|
}
|
||||||
|
#undef RESHIFT
|
||||||
|
((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
|
||||||
|
vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
|
||||||
|
return (vswiz);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
Blit32to565PixelAlphaAltivec(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint8 *src = (Uint8 *) info->src;
|
||||||
|
int srcskip = info->src_skip;
|
||||||
|
Uint8 *dst = (Uint8 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip;
|
||||||
|
SDL_PixelFormat *srcfmt = info->src_fmt;
|
||||||
|
|
||||||
|
vector unsigned char v0 = vec_splat_u8(0);
|
||||||
|
vector unsigned short v8_16 = vec_splat_u16(8);
|
||||||
|
vector unsigned short v1_16 = vec_splat_u16(1);
|
||||||
|
vector unsigned short v2_16 = vec_splat_u16(2);
|
||||||
|
vector unsigned short v3_16 = vec_splat_u16(3);
|
||||||
|
vector unsigned int v8_32 = vec_splat_u32(8);
|
||||||
|
vector unsigned int v16_32 = vec_add(v8_32, v8_32);
|
||||||
|
vector unsigned short v3f =
|
||||||
|
VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
|
||||||
|
0x003f, 0x003f, 0x003f, 0x003f);
|
||||||
|
vector unsigned short vfc =
|
||||||
|
VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
|
||||||
|
0x00fc, 0x00fc, 0x00fc, 0x00fc);
|
||||||
|
|
||||||
|
/*
|
||||||
|
0x10 - 0x1f is the alpha
|
||||||
|
0x00 - 0x0e evens are the red
|
||||||
|
0x01 - 0x0f odds are zero
|
||||||
|
*/
|
||||||
|
vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
|
||||||
|
0x10, 0x02, 0x01, 0x01,
|
||||||
|
0x10, 0x04, 0x01, 0x01,
|
||||||
|
0x10, 0x06, 0x01,
|
||||||
|
0x01);
|
||||||
|
vector unsigned char vredalpha2 =
|
||||||
|
(vector unsigned char) (vec_add((vector unsigned int) vredalpha1,
|
||||||
|
vec_sl(v8_32, v16_32))
|
||||||
|
);
|
||||||
|
/*
|
||||||
|
0x00 - 0x0f is ARxx ARxx ARxx ARxx
|
||||||
|
0x11 - 0x0f odds are blue
|
||||||
|
*/
|
||||||
|
vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
|
||||||
|
0x04, 0x05, 0x06, 0x13,
|
||||||
|
0x08, 0x09, 0x0a, 0x15,
|
||||||
|
0x0c, 0x0d, 0x0e, 0x17);
|
||||||
|
vector unsigned char vblue2 =
|
||||||
|
(vector unsigned char) (vec_add((vector unsigned int) vblue1, v8_32)
|
||||||
|
);
|
||||||
|
/*
|
||||||
|
0x00 - 0x0f is ARxB ARxB ARxB ARxB
|
||||||
|
0x10 - 0x0e evens are green
|
||||||
|
*/
|
||||||
|
vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
|
||||||
|
0x04, 0x05, 0x12, 0x07,
|
||||||
|
0x08, 0x09, 0x14, 0x0b,
|
||||||
|
0x0c, 0x0d, 0x16, 0x0f);
|
||||||
|
vector unsigned char vgreen2 =
|
||||||
|
(vector unsigned
|
||||||
|
char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8_32, v8_32))
|
||||||
|
);
|
||||||
|
vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
|
||||||
|
0x00, 0x0a, 0x00, 0x0e,
|
||||||
|
0x00, 0x12, 0x00, 0x16,
|
||||||
|
0x00, 0x1a, 0x00, 0x1e);
|
||||||
|
vector unsigned char mergePermute = VEC_MERGE_PERMUTE();
|
||||||
|
vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
|
||||||
|
vector unsigned char valphaPermute =
|
||||||
|
vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
|
||||||
|
|
||||||
|
vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
|
||||||
|
vf800 = vec_sl(vf800, vec_splat_u16(8));
|
||||||
|
|
||||||
|
while (height--) {
|
||||||
|
int extrawidth;
|
||||||
|
vector unsigned char valigner;
|
||||||
|
vector unsigned char vsrc;
|
||||||
|
vector unsigned char voverflow;
|
||||||
|
int width = info->dst_w;
|
||||||
|
|
||||||
|
#define ONE_PIXEL_BLEND(condition, widthvar) \
|
||||||
|
while (condition) { \
|
||||||
|
Uint32 Pixel; \
|
||||||
|
unsigned sR, sG, sB, dR, dG, dB, sA; \
|
||||||
|
DISEMBLE_RGBA(src, 4, srcfmt, Pixel, sR, sG, sB, sA); \
|
||||||
|
if(sA) { \
|
||||||
|
unsigned short dstpixel = *((unsigned short *)dst); \
|
||||||
|
dR = (dstpixel >> 8) & 0xf8; \
|
||||||
|
dG = (dstpixel >> 3) & 0xfc; \
|
||||||
|
dB = (dstpixel << 3) & 0xf8; \
|
||||||
|
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
|
||||||
|
*((unsigned short *)dst) = ( \
|
||||||
|
((dR & 0xf8) << 8) | ((dG & 0xfc) << 3) | (dB >> 3) \
|
||||||
|
); \
|
||||||
|
} \
|
||||||
|
src += 4; \
|
||||||
|
dst += 2; \
|
||||||
|
widthvar--; \
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((UNALIGNED_PTR(dst)) && (width), width);
|
||||||
|
extrawidth = (width % 8);
|
||||||
|
valigner = VEC_ALIGNER(src);
|
||||||
|
vsrc = (vector unsigned char) vec_ld(0, src);
|
||||||
|
width -= extrawidth;
|
||||||
|
while (width) {
|
||||||
|
vector unsigned char valpha;
|
||||||
|
vector unsigned char vsrc1, vsrc2;
|
||||||
|
vector unsigned char vdst1, vdst2;
|
||||||
|
vector unsigned short vR, vG, vB;
|
||||||
|
vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
|
||||||
|
|
||||||
|
/* Load 8 pixels from src as ARGB */
|
||||||
|
voverflow = (vector unsigned char) vec_ld(15, src);
|
||||||
|
vsrc = vec_perm(vsrc, voverflow, valigner);
|
||||||
|
vsrc1 = vec_perm(vsrc, vsrc, vpermute);
|
||||||
|
src += 16;
|
||||||
|
vsrc = (vector unsigned char) vec_ld(15, src);
|
||||||
|
voverflow = vec_perm(voverflow, vsrc, valigner);
|
||||||
|
vsrc2 = vec_perm(voverflow, voverflow, vpermute);
|
||||||
|
src += 16;
|
||||||
|
|
||||||
|
/* Load 8 pixels from dst as XRGB */
|
||||||
|
voverflow = vec_ld(0, dst);
|
||||||
|
vR = vec_and((vector unsigned short) voverflow, vf800);
|
||||||
|
vB = vec_sl((vector unsigned short) voverflow, v3_16);
|
||||||
|
vG = vec_sl(vB, v2_16);
|
||||||
|
vdst1 =
|
||||||
|
(vector unsigned char) vec_perm((vector unsigned char) vR,
|
||||||
|
(vector unsigned char) vR,
|
||||||
|
vredalpha1);
|
||||||
|
vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
|
||||||
|
vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
|
||||||
|
vdst2 =
|
||||||
|
(vector unsigned char) vec_perm((vector unsigned char) vR,
|
||||||
|
(vector unsigned char) vR,
|
||||||
|
vredalpha2);
|
||||||
|
vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
|
||||||
|
vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
|
||||||
|
|
||||||
|
/* Alpha blend 8 pixels as ARGB */
|
||||||
|
valpha = vec_perm(vsrc1, v0, valphaPermute);
|
||||||
|
VEC_MULTIPLY_ALPHA(vsrc1, vdst1, valpha, mergePermute, v1_16,
|
||||||
|
v8_16);
|
||||||
|
valpha = vec_perm(vsrc2, v0, valphaPermute);
|
||||||
|
VEC_MULTIPLY_ALPHA(vsrc2, vdst2, valpha, mergePermute, v1_16,
|
||||||
|
v8_16);
|
||||||
|
|
||||||
|
/* Convert 8 pixels to 565 */
|
||||||
|
vpixel = (vector unsigned short) vec_packpx((vector unsigned int)
|
||||||
|
vdst1,
|
||||||
|
(vector unsigned int)
|
||||||
|
vdst2);
|
||||||
|
vgpixel = (vector unsigned short) vec_perm(vdst1, vdst2, vgmerge);
|
||||||
|
vgpixel = vec_and(vgpixel, vfc);
|
||||||
|
vgpixel = vec_sl(vgpixel, v3_16);
|
||||||
|
vrpixel = vec_sl(vpixel, v1_16);
|
||||||
|
vrpixel = vec_and(vrpixel, vf800);
|
||||||
|
vbpixel = vec_and(vpixel, v3f);
|
||||||
|
vdst1 =
|
||||||
|
vec_or((vector unsigned char) vrpixel,
|
||||||
|
(vector unsigned char) vgpixel);
|
||||||
|
vdst1 = vec_or(vdst1, (vector unsigned char) vbpixel);
|
||||||
|
|
||||||
|
/* Store 8 pixels */
|
||||||
|
vec_st(vdst1, 0, dst);
|
||||||
|
|
||||||
|
width -= 8;
|
||||||
|
dst += 16;
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((extrawidth), extrawidth);
|
||||||
|
#undef ONE_PIXEL_BLEND
|
||||||
|
src += srcskip;
|
||||||
|
dst += dstskip;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
Blit32to32SurfaceAlphaKeyAltivec(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint32 *srcp = (Uint32 *) info->src;
|
||||||
|
int srcskip = info->src_skip >> 2;
|
||||||
|
Uint32 *dstp = (Uint32 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip >> 2;
|
||||||
|
SDL_PixelFormat *srcfmt = info->src_fmt;
|
||||||
|
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
||||||
|
unsigned sA = info->a;
|
||||||
|
unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
|
||||||
|
Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
|
||||||
|
Uint32 ckey = info->colorkey;
|
||||||
|
vector unsigned char mergePermute;
|
||||||
|
vector unsigned char vsrcPermute;
|
||||||
|
vector unsigned char vdstPermute;
|
||||||
|
vector unsigned char vsdstPermute;
|
||||||
|
vector unsigned char valpha;
|
||||||
|
vector unsigned char valphamask;
|
||||||
|
vector unsigned char vbits;
|
||||||
|
vector unsigned char v0;
|
||||||
|
vector unsigned short v1;
|
||||||
|
vector unsigned short v8;
|
||||||
|
vector unsigned int vckey;
|
||||||
|
vector unsigned int vrgbmask;
|
||||||
|
|
||||||
|
mergePermute = VEC_MERGE_PERMUTE();
|
||||||
|
v0 = vec_splat_u8(0);
|
||||||
|
v1 = vec_splat_u16(1);
|
||||||
|
v8 = vec_splat_u16(8);
|
||||||
|
|
||||||
|
/* set the alpha to 255 on the destination surf */
|
||||||
|
valphamask = VEC_ALPHA_MASK();
|
||||||
|
|
||||||
|
vsrcPermute = calc_swizzle32(srcfmt, NULL);
|
||||||
|
vdstPermute = calc_swizzle32(NULL, dstfmt);
|
||||||
|
vsdstPermute = calc_swizzle32(dstfmt, NULL);
|
||||||
|
|
||||||
|
/* set a vector full of alpha and 255-alpha */
|
||||||
|
((unsigned char *) &valpha)[0] = sA;
|
||||||
|
valpha = vec_splat(valpha, 0);
|
||||||
|
vbits = (vector unsigned char) vec_splat_s8(-1);
|
||||||
|
|
||||||
|
ckey &= rgbmask;
|
||||||
|
((unsigned int *) (char *) &vckey)[0] = ckey;
|
||||||
|
vckey = vec_splat(vckey, 0);
|
||||||
|
((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
|
||||||
|
vrgbmask = vec_splat(vrgbmask, 0);
|
||||||
|
|
||||||
|
while (height--) {
|
||||||
|
int width = info->dst_w;
|
||||||
|
#define ONE_PIXEL_BLEND(condition, widthvar) \
|
||||||
|
while (condition) { \
|
||||||
|
Uint32 Pixel; \
|
||||||
|
unsigned sR, sG, sB, dR, dG, dB; \
|
||||||
|
RETRIEVE_RGB_PIXEL(((Uint8 *)srcp), 4, Pixel); \
|
||||||
|
if(sA && Pixel != ckey) { \
|
||||||
|
RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
|
||||||
|
DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
|
||||||
|
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
|
||||||
|
ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
|
||||||
|
} \
|
||||||
|
dstp++; \
|
||||||
|
srcp++; \
|
||||||
|
widthvar--; \
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
|
||||||
|
if (width > 0) {
|
||||||
|
int extrawidth = (width % 4);
|
||||||
|
vector unsigned char valigner = VEC_ALIGNER(srcp);
|
||||||
|
vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
|
||||||
|
width -= extrawidth;
|
||||||
|
while (width) {
|
||||||
|
vector unsigned char vsel;
|
||||||
|
vector unsigned char voverflow;
|
||||||
|
vector unsigned char vd;
|
||||||
|
vector unsigned char vd_orig;
|
||||||
|
|
||||||
|
/* s = *srcp */
|
||||||
|
voverflow = (vector unsigned char) vec_ld(15, srcp);
|
||||||
|
vs = vec_perm(vs, voverflow, valigner);
|
||||||
|
|
||||||
|
/* vsel is set for items that match the key */
|
||||||
|
vsel =
|
||||||
|
(vector unsigned char) vec_and((vector unsigned int) vs,
|
||||||
|
vrgbmask);
|
||||||
|
vsel = (vector unsigned char) vec_cmpeq((vector unsigned int)
|
||||||
|
vsel, vckey);
|
||||||
|
|
||||||
|
/* permute to source format */
|
||||||
|
vs = vec_perm(vs, valpha, vsrcPermute);
|
||||||
|
|
||||||
|
/* d = *dstp */
|
||||||
|
vd = (vector unsigned char) vec_ld(0, dstp);
|
||||||
|
vd_orig = vd = vec_perm(vd, v0, vsdstPermute);
|
||||||
|
|
||||||
|
VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
|
||||||
|
|
||||||
|
/* set the alpha channel to full on */
|
||||||
|
vd = vec_or(vd, valphamask);
|
||||||
|
|
||||||
|
/* mask out color key */
|
||||||
|
vd = vec_sel(vd, vd_orig, vsel);
|
||||||
|
|
||||||
|
/* permute to dest format */
|
||||||
|
vd = vec_perm(vd, vbits, vdstPermute);
|
||||||
|
|
||||||
|
/* *dstp = res */
|
||||||
|
vec_st((vector unsigned int) vd, 0, dstp);
|
||||||
|
|
||||||
|
srcp += 4;
|
||||||
|
dstp += 4;
|
||||||
|
width -= 4;
|
||||||
|
vs = voverflow;
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((extrawidth), extrawidth);
|
||||||
|
}
|
||||||
|
#undef ONE_PIXEL_BLEND
|
||||||
|
|
||||||
|
srcp += srcskip;
|
||||||
|
dstp += dstskip;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
Blit32to32PixelAlphaAltivec(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
int width = info->dst_w;
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint32 *srcp = (Uint32 *) info->src;
|
||||||
|
int srcskip = info->src_skip >> 2;
|
||||||
|
Uint32 *dstp = (Uint32 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip >> 2;
|
||||||
|
SDL_PixelFormat *srcfmt = info->src_fmt;
|
||||||
|
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
||||||
|
vector unsigned char mergePermute;
|
||||||
|
vector unsigned char valphaPermute;
|
||||||
|
vector unsigned char vsrcPermute;
|
||||||
|
vector unsigned char vdstPermute;
|
||||||
|
vector unsigned char vsdstPermute;
|
||||||
|
vector unsigned char valphamask;
|
||||||
|
vector unsigned char vpixelmask;
|
||||||
|
vector unsigned char v0;
|
||||||
|
vector unsigned short v1;
|
||||||
|
vector unsigned short v8;
|
||||||
|
|
||||||
|
v0 = vec_splat_u8(0);
|
||||||
|
v1 = vec_splat_u16(1);
|
||||||
|
v8 = vec_splat_u16(8);
|
||||||
|
mergePermute = VEC_MERGE_PERMUTE();
|
||||||
|
valphamask = VEC_ALPHA_MASK();
|
||||||
|
valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
|
||||||
|
vpixelmask = vec_nor(valphamask, v0);
|
||||||
|
vsrcPermute = calc_swizzle32(srcfmt, NULL);
|
||||||
|
vdstPermute = calc_swizzle32(NULL, dstfmt);
|
||||||
|
vsdstPermute = calc_swizzle32(dstfmt, NULL);
|
||||||
|
|
||||||
|
while (height--) {
|
||||||
|
width = info->dst_w;
|
||||||
|
#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
|
||||||
|
Uint32 Pixel; \
|
||||||
|
unsigned sR, sG, sB, dR, dG, dB, sA, dA; \
|
||||||
|
DISEMBLE_RGBA((Uint8 *)srcp, 4, srcfmt, Pixel, sR, sG, sB, sA); \
|
||||||
|
if(sA) { \
|
||||||
|
DISEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, Pixel, dR, dG, dB, dA); \
|
||||||
|
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
|
||||||
|
ASSEMBLE_RGBA((Uint8 *)dstp, 4, dstfmt, dR, dG, dB, dA); \
|
||||||
|
} \
|
||||||
|
++srcp; \
|
||||||
|
++dstp; \
|
||||||
|
widthvar--; \
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
|
||||||
|
if (width > 0) {
|
||||||
|
/* vsrcPermute */
|
||||||
|
/* vdstPermute */
|
||||||
|
int extrawidth = (width % 4);
|
||||||
|
vector unsigned char valigner = VEC_ALIGNER(srcp);
|
||||||
|
vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
|
||||||
|
width -= extrawidth;
|
||||||
|
while (width) {
|
||||||
|
vector unsigned char voverflow;
|
||||||
|
vector unsigned char vd;
|
||||||
|
vector unsigned char valpha;
|
||||||
|
vector unsigned char vdstalpha;
|
||||||
|
/* s = *srcp */
|
||||||
|
voverflow = (vector unsigned char) vec_ld(15, srcp);
|
||||||
|
vs = vec_perm(vs, voverflow, valigner);
|
||||||
|
vs = vec_perm(vs, v0, vsrcPermute);
|
||||||
|
|
||||||
|
valpha = vec_perm(vs, v0, valphaPermute);
|
||||||
|
|
||||||
|
/* d = *dstp */
|
||||||
|
vd = (vector unsigned char) vec_ld(0, dstp);
|
||||||
|
vd = vec_perm(vd, v0, vsdstPermute);
|
||||||
|
vdstalpha = vec_and(vd, valphamask);
|
||||||
|
|
||||||
|
VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
|
||||||
|
|
||||||
|
/* set the alpha to the dest alpha */
|
||||||
|
vd = vec_and(vd, vpixelmask);
|
||||||
|
vd = vec_or(vd, vdstalpha);
|
||||||
|
vd = vec_perm(vd, v0, vdstPermute);
|
||||||
|
|
||||||
|
/* *dstp = res */
|
||||||
|
vec_st((vector unsigned int) vd, 0, dstp);
|
||||||
|
|
||||||
|
srcp += 4;
|
||||||
|
dstp += 4;
|
||||||
|
width -= 4;
|
||||||
|
vs = voverflow;
|
||||||
|
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((extrawidth), extrawidth);
|
||||||
|
}
|
||||||
|
srcp += srcskip;
|
||||||
|
dstp += dstskip;
|
||||||
|
#undef ONE_PIXEL_BLEND
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* fast ARGB888->(A)RGB888 blending with pixel alpha */
|
||||||
|
static void
|
||||||
|
BlitRGBtoRGBPixelAlphaAltivec(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
int width = info->dst_w;
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint32 *srcp = (Uint32 *) info->src;
|
||||||
|
int srcskip = info->src_skip >> 2;
|
||||||
|
Uint32 *dstp = (Uint32 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip >> 2;
|
||||||
|
vector unsigned char mergePermute;
|
||||||
|
vector unsigned char valphaPermute;
|
||||||
|
vector unsigned char valphamask;
|
||||||
|
vector unsigned char vpixelmask;
|
||||||
|
vector unsigned char v0;
|
||||||
|
vector unsigned short v1;
|
||||||
|
vector unsigned short v8;
|
||||||
|
v0 = vec_splat_u8(0);
|
||||||
|
v1 = vec_splat_u16(1);
|
||||||
|
v8 = vec_splat_u16(8);
|
||||||
|
mergePermute = VEC_MERGE_PERMUTE();
|
||||||
|
valphamask = VEC_ALPHA_MASK();
|
||||||
|
valphaPermute = vec_and(vec_lvsl(0, (int *) NULL), vec_splat_u8(0xC));
|
||||||
|
|
||||||
|
|
||||||
|
vpixelmask = vec_nor(valphamask, v0);
|
||||||
|
while (height--) {
|
||||||
|
width = info->dst_w;
|
||||||
|
#define ONE_PIXEL_BLEND(condition, widthvar) \
|
||||||
|
while ((condition)) { \
|
||||||
|
Uint32 dalpha; \
|
||||||
|
Uint32 d; \
|
||||||
|
Uint32 s1; \
|
||||||
|
Uint32 d1; \
|
||||||
|
Uint32 s = *srcp; \
|
||||||
|
Uint32 alpha = s >> 24; \
|
||||||
|
if(alpha) { \
|
||||||
|
if(alpha == SDL_ALPHA_OPAQUE) { \
|
||||||
|
*dstp = (s & 0x00ffffff) | (*dstp & 0xff000000); \
|
||||||
|
} else { \
|
||||||
|
d = *dstp; \
|
||||||
|
dalpha = d & 0xff000000; \
|
||||||
|
s1 = s & 0xff00ff; \
|
||||||
|
d1 = d & 0xff00ff; \
|
||||||
|
d1 = (d1 + ((s1 - d1) * alpha >> 8)) & 0xff00ff; \
|
||||||
|
s &= 0xff00; \
|
||||||
|
d &= 0xff00; \
|
||||||
|
d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
|
||||||
|
*dstp = d1 | d | dalpha; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
++srcp; \
|
||||||
|
++dstp; \
|
||||||
|
widthvar--; \
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
|
||||||
|
if (width > 0) {
|
||||||
|
int extrawidth = (width % 4);
|
||||||
|
vector unsigned char valigner = VEC_ALIGNER(srcp);
|
||||||
|
vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
|
||||||
|
width -= extrawidth;
|
||||||
|
while (width) {
|
||||||
|
vector unsigned char voverflow;
|
||||||
|
vector unsigned char vd;
|
||||||
|
vector unsigned char valpha;
|
||||||
|
vector unsigned char vdstalpha;
|
||||||
|
/* s = *srcp */
|
||||||
|
voverflow = (vector unsigned char) vec_ld(15, srcp);
|
||||||
|
vs = vec_perm(vs, voverflow, valigner);
|
||||||
|
|
||||||
|
valpha = vec_perm(vs, v0, valphaPermute);
|
||||||
|
|
||||||
|
/* d = *dstp */
|
||||||
|
vd = (vector unsigned char) vec_ld(0, dstp);
|
||||||
|
vdstalpha = vec_and(vd, valphamask);
|
||||||
|
|
||||||
|
VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
|
||||||
|
|
||||||
|
/* set the alpha to the dest alpha */
|
||||||
|
vd = vec_and(vd, vpixelmask);
|
||||||
|
vd = vec_or(vd, vdstalpha);
|
||||||
|
|
||||||
|
/* *dstp = res */
|
||||||
|
vec_st((vector unsigned int) vd, 0, dstp);
|
||||||
|
|
||||||
|
srcp += 4;
|
||||||
|
dstp += 4;
|
||||||
|
width -= 4;
|
||||||
|
vs = voverflow;
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((extrawidth), extrawidth);
|
||||||
|
}
|
||||||
|
srcp += srcskip;
|
||||||
|
dstp += dstskip;
|
||||||
|
}
|
||||||
|
#undef ONE_PIXEL_BLEND
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
Blit32to32SurfaceAlphaAltivec(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
/* XXX : 6 */
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint32 *srcp = (Uint32 *) info->src;
|
||||||
|
int srcskip = info->src_skip >> 2;
|
||||||
|
Uint32 *dstp = (Uint32 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip >> 2;
|
||||||
|
SDL_PixelFormat *srcfmt = info->src_fmt;
|
||||||
|
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
||||||
|
unsigned sA = info->a;
|
||||||
|
unsigned dA = dstfmt->Amask ? SDL_ALPHA_OPAQUE : 0;
|
||||||
|
vector unsigned char mergePermute;
|
||||||
|
vector unsigned char vsrcPermute;
|
||||||
|
vector unsigned char vdstPermute;
|
||||||
|
vector unsigned char vsdstPermute;
|
||||||
|
vector unsigned char valpha;
|
||||||
|
vector unsigned char valphamask;
|
||||||
|
vector unsigned char vbits;
|
||||||
|
vector unsigned short v1;
|
||||||
|
vector unsigned short v8;
|
||||||
|
|
||||||
|
mergePermute = VEC_MERGE_PERMUTE();
|
||||||
|
v1 = vec_splat_u16(1);
|
||||||
|
v8 = vec_splat_u16(8);
|
||||||
|
|
||||||
|
/* set the alpha to 255 on the destination surf */
|
||||||
|
valphamask = VEC_ALPHA_MASK();
|
||||||
|
|
||||||
|
vsrcPermute = calc_swizzle32(srcfmt, NULL);
|
||||||
|
vdstPermute = calc_swizzle32(NULL, dstfmt);
|
||||||
|
vsdstPermute = calc_swizzle32(dstfmt, NULL);
|
||||||
|
|
||||||
|
/* set a vector full of alpha and 255-alpha */
|
||||||
|
((unsigned char *) &valpha)[0] = sA;
|
||||||
|
valpha = vec_splat(valpha, 0);
|
||||||
|
vbits = (vector unsigned char) vec_splat_s8(-1);
|
||||||
|
|
||||||
|
while (height--) {
|
||||||
|
int width = info->dst_w;
|
||||||
|
#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
|
||||||
|
Uint32 Pixel; \
|
||||||
|
unsigned sR, sG, sB, dR, dG, dB; \
|
||||||
|
DISEMBLE_RGB(((Uint8 *)srcp), 4, srcfmt, Pixel, sR, sG, sB); \
|
||||||
|
DISEMBLE_RGB(((Uint8 *)dstp), 4, dstfmt, Pixel, dR, dG, dB); \
|
||||||
|
ALPHA_BLEND(sR, sG, sB, sA, dR, dG, dB); \
|
||||||
|
ASSEMBLE_RGBA(((Uint8 *)dstp), 4, dstfmt, dR, dG, dB, dA); \
|
||||||
|
++srcp; \
|
||||||
|
++dstp; \
|
||||||
|
widthvar--; \
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
|
||||||
|
if (width > 0) {
|
||||||
|
int extrawidth = (width % 4);
|
||||||
|
vector unsigned char valigner = VEC_ALIGNER(srcp);
|
||||||
|
vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
|
||||||
|
width -= extrawidth;
|
||||||
|
while (width) {
|
||||||
|
vector unsigned char voverflow;
|
||||||
|
vector unsigned char vd;
|
||||||
|
|
||||||
|
/* s = *srcp */
|
||||||
|
voverflow = (vector unsigned char) vec_ld(15, srcp);
|
||||||
|
vs = vec_perm(vs, voverflow, valigner);
|
||||||
|
vs = vec_perm(vs, valpha, vsrcPermute);
|
||||||
|
|
||||||
|
/* d = *dstp */
|
||||||
|
vd = (vector unsigned char) vec_ld(0, dstp);
|
||||||
|
vd = vec_perm(vd, vd, vsdstPermute);
|
||||||
|
|
||||||
|
VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
|
||||||
|
|
||||||
|
/* set the alpha channel to full on */
|
||||||
|
vd = vec_or(vd, valphamask);
|
||||||
|
vd = vec_perm(vd, vbits, vdstPermute);
|
||||||
|
|
||||||
|
/* *dstp = res */
|
||||||
|
vec_st((vector unsigned int) vd, 0, dstp);
|
||||||
|
|
||||||
|
srcp += 4;
|
||||||
|
dstp += 4;
|
||||||
|
width -= 4;
|
||||||
|
vs = voverflow;
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((extrawidth), extrawidth);
|
||||||
|
}
|
||||||
|
#undef ONE_PIXEL_BLEND
|
||||||
|
|
||||||
|
srcp += srcskip;
|
||||||
|
dstp += dstskip;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* fast RGB888->(A)RGB888 blending */
|
||||||
|
static void
|
||||||
|
BlitRGBtoRGBSurfaceAlphaAltivec(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
unsigned alpha = info->a;
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint32 *srcp = (Uint32 *) info->src;
|
||||||
|
int srcskip = info->src_skip >> 2;
|
||||||
|
Uint32 *dstp = (Uint32 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip >> 2;
|
||||||
|
vector unsigned char mergePermute;
|
||||||
|
vector unsigned char valpha;
|
||||||
|
vector unsigned char valphamask;
|
||||||
|
vector unsigned short v1;
|
||||||
|
vector unsigned short v8;
|
||||||
|
|
||||||
|
mergePermute = VEC_MERGE_PERMUTE();
|
||||||
|
v1 = vec_splat_u16(1);
|
||||||
|
v8 = vec_splat_u16(8);
|
||||||
|
|
||||||
|
/* set the alpha to 255 on the destination surf */
|
||||||
|
valphamask = VEC_ALPHA_MASK();
|
||||||
|
|
||||||
|
/* set a vector full of alpha and 255-alpha */
|
||||||
|
((unsigned char *) &valpha)[0] = alpha;
|
||||||
|
valpha = vec_splat(valpha, 0);
|
||||||
|
|
||||||
|
while (height--) {
|
||||||
|
int width = info->dst_w;
|
||||||
|
#define ONE_PIXEL_BLEND(condition, widthvar) while ((condition)) { \
|
||||||
|
Uint32 s = *srcp; \
|
||||||
|
Uint32 d = *dstp; \
|
||||||
|
Uint32 s1 = s & 0xff00ff; \
|
||||||
|
Uint32 d1 = d & 0xff00ff; \
|
||||||
|
d1 = (d1 + ((s1 - d1) * alpha >> 8)) \
|
||||||
|
& 0xff00ff; \
|
||||||
|
s &= 0xff00; \
|
||||||
|
d &= 0xff00; \
|
||||||
|
d = (d + ((s - d) * alpha >> 8)) & 0xff00; \
|
||||||
|
*dstp = d1 | d | 0xff000000; \
|
||||||
|
++srcp; \
|
||||||
|
++dstp; \
|
||||||
|
widthvar--; \
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
|
||||||
|
if (width > 0) {
|
||||||
|
int extrawidth = (width % 4);
|
||||||
|
vector unsigned char valigner = VEC_ALIGNER(srcp);
|
||||||
|
vector unsigned char vs = (vector unsigned char) vec_ld(0, srcp);
|
||||||
|
width -= extrawidth;
|
||||||
|
while (width) {
|
||||||
|
vector unsigned char voverflow;
|
||||||
|
vector unsigned char vd;
|
||||||
|
|
||||||
|
/* s = *srcp */
|
||||||
|
voverflow = (vector unsigned char) vec_ld(15, srcp);
|
||||||
|
vs = vec_perm(vs, voverflow, valigner);
|
||||||
|
|
||||||
|
/* d = *dstp */
|
||||||
|
vd = (vector unsigned char) vec_ld(0, dstp);
|
||||||
|
|
||||||
|
VEC_MULTIPLY_ALPHA(vs, vd, valpha, mergePermute, v1, v8);
|
||||||
|
|
||||||
|
/* set the alpha channel to full on */
|
||||||
|
vd = vec_or(vd, valphamask);
|
||||||
|
|
||||||
|
/* *dstp = res */
|
||||||
|
vec_st((vector unsigned int) vd, 0, dstp);
|
||||||
|
|
||||||
|
srcp += 4;
|
||||||
|
dstp += 4;
|
||||||
|
width -= 4;
|
||||||
|
vs = voverflow;
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((extrawidth), extrawidth);
|
||||||
|
}
|
||||||
|
#undef ONE_PIXEL_BLEND
|
||||||
|
|
||||||
|
srcp += srcskip;
|
||||||
|
dstp += dstskip;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#if __MWERKS__
|
||||||
|
#pragma altivec_model off
|
||||||
|
#endif
|
||||||
|
#endif /* SDL_ALTIVEC_BLITTERS */
|
||||||
|
|
||||||
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
|
/* fast RGB888->(A)RGB888 blending with surface alpha=128 special case */
|
||||||
static void
|
static void
|
||||||
BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info)
|
BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo * info)
|
||||||
|
@ -538,6 +1338,79 @@ BlitRGBtoRGBPixelAlpha(SDL_BlitInfo * info)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef __3dNOW__
|
||||||
|
/* fast (as in MMX with prefetch) ARGB888->(A)RGB888 blending with pixel alpha */
|
||||||
|
static void
|
||||||
|
BlitRGBtoRGBPixelAlphaMMX3DNOW(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
int width = info->dst_w;
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint32 *srcp = (Uint32 *) info->src;
|
||||||
|
int srcskip = info->src_skip >> 2;
|
||||||
|
Uint32 *dstp = (Uint32 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip >> 2;
|
||||||
|
SDL_PixelFormat *sf = info->src_fmt;
|
||||||
|
Uint32 chanmask = sf->Rmask | sf->Gmask | sf->Bmask;
|
||||||
|
Uint32 amask = sf->Amask;
|
||||||
|
Uint32 ashift = sf->Ashift;
|
||||||
|
Uint64 multmask;
|
||||||
|
|
||||||
|
__m64 src1, dst1, mm_alpha, mm_zero, dmask;
|
||||||
|
|
||||||
|
mm_zero = _mm_setzero_si64(); /* 0 -> mm_zero */
|
||||||
|
multmask = 0xFFFF;
|
||||||
|
multmask <<= (ashift * 2);
|
||||||
|
multmask = ~multmask;
|
||||||
|
dmask = *(__m64 *) & multmask; /* dst alpha mask -> dmask */
|
||||||
|
|
||||||
|
while (height--) {
|
||||||
|
/* *INDENT-OFF* */
|
||||||
|
DUFFS_LOOP4({
|
||||||
|
Uint32 alpha;
|
||||||
|
|
||||||
|
_m_prefetch(srcp + 16);
|
||||||
|
_m_prefetch(dstp + 16);
|
||||||
|
|
||||||
|
alpha = *srcp & amask;
|
||||||
|
if (alpha == 0) {
|
||||||
|
/* do nothing */
|
||||||
|
} else if (alpha == amask) {
|
||||||
|
/* copy RGB, keep dst alpha */
|
||||||
|
*dstp = (*srcp & chanmask) | (*dstp & ~chanmask);
|
||||||
|
} else {
|
||||||
|
src1 = _mm_cvtsi32_si64(*srcp); /* src(ARGB) -> src1 (0000ARGB)*/
|
||||||
|
src1 = _mm_unpacklo_pi8(src1, mm_zero); /* 0A0R0G0B -> src1 */
|
||||||
|
|
||||||
|
dst1 = _mm_cvtsi32_si64(*dstp); /* dst(ARGB) -> dst1 (0000ARGB)*/
|
||||||
|
dst1 = _mm_unpacklo_pi8(dst1, mm_zero); /* 0A0R0G0B -> dst1 */
|
||||||
|
|
||||||
|
mm_alpha = _mm_cvtsi32_si64(alpha); /* alpha -> mm_alpha (0000000A) */
|
||||||
|
mm_alpha = _mm_srli_si64(mm_alpha, ashift); /* mm_alpha >> ashift -> mm_alpha(0000000A) */
|
||||||
|
mm_alpha = _mm_unpacklo_pi16(mm_alpha, mm_alpha); /* 00000A0A -> mm_alpha */
|
||||||
|
mm_alpha = _mm_unpacklo_pi32(mm_alpha, mm_alpha); /* 0A0A0A0A -> mm_alpha */
|
||||||
|
mm_alpha = _mm_and_si64(mm_alpha, dmask); /* 000A0A0A -> mm_alpha, preserve dst alpha on add */
|
||||||
|
|
||||||
|
/* blend */
|
||||||
|
src1 = _mm_sub_pi16(src1, dst1);/* src - dst -> src1 */
|
||||||
|
src1 = _mm_mullo_pi16(src1, mm_alpha); /* (src - dst) * alpha -> src1 */
|
||||||
|
src1 = _mm_srli_pi16(src1, 8); /* src1 >> 8 -> src1(000R0G0B) */
|
||||||
|
dst1 = _mm_add_pi8(src1, dst1); /* src1 + dst1(dst) -> dst1(0A0R0G0B) */
|
||||||
|
dst1 = _mm_packs_pu16(dst1, mm_zero); /* 0000ARGB -> dst1 */
|
||||||
|
|
||||||
|
*dstp = _mm_cvtsi64_si32(dst1); /* dst1 -> pixel */
|
||||||
|
}
|
||||||
|
++srcp;
|
||||||
|
++dstp;
|
||||||
|
}, width);
|
||||||
|
/* *INDENT-ON* */
|
||||||
|
srcp += srcskip;
|
||||||
|
dstp += dstskip;
|
||||||
|
}
|
||||||
|
_mm_empty();
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __MMX__ */
|
||||||
|
|
||||||
/* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
|
/* 16bpp special case for per-surface alpha=50%: blend 2 pixels in parallel */
|
||||||
|
|
||||||
/* blend a single 16 bit pixel at 50% */
|
/* blend a single 16 bit pixel at 50% */
|
||||||
|
@ -1257,10 +2130,17 @@ SDL_CalculateBlitA(SDL_Surface * surface)
|
||||||
return BlitNto1PixelAlpha;
|
return BlitNto1PixelAlpha;
|
||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
|
#if SDL_ALTIVEC_BLITTERS
|
||||||
&& sf->Gmask == 0xff00
|
if (sf->BytesPerPixel == 4
|
||||||
&& ((sf->Rmask == 0xff && df->Rmask == 0x1f)
|
&& df->Gmask == 0x7e0 && df->Bmask == 0x1f
|
||||||
|| (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
|
&& SDL_HasAltiVec())
|
||||||
|
return Blit32to565PixelAlphaAltivec;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
if (sf->BytesPerPixel == 4 && sf->Amask == 0xff000000
|
||||||
|
&& sf->Gmask == 0xff00
|
||||||
|
&& ((sf->Rmask == 0xff && df->Rmask == 0x1f)
|
||||||
|
|| (sf->Bmask == 0xff && df->Bmask == 0x1f))) {
|
||||||
if (df->Gmask == 0x7e0)
|
if (df->Gmask == 0x7e0)
|
||||||
return BlitARGBto565PixelAlpha;
|
return BlitARGBto565PixelAlpha;
|
||||||
else if (df->Gmask == 0x3e0)
|
else if (df->Gmask == 0x3e0)
|
||||||
|
@ -1272,20 +2152,35 @@ SDL_CalculateBlitA(SDL_Surface * surface)
|
||||||
if (sf->Rmask == df->Rmask
|
if (sf->Rmask == df->Rmask
|
||||||
&& sf->Gmask == df->Gmask
|
&& sf->Gmask == df->Gmask
|
||||||
&& sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
|
&& sf->Bmask == df->Bmask && sf->BytesPerPixel == 4) {
|
||||||
#if defined(__MMX__)
|
#if defined(__MMX__) || defined(__3dNOW__)
|
||||||
if (sf->Rshift % 8 == 0
|
if (sf->Rshift % 8 == 0
|
||||||
&& sf->Gshift % 8 == 0
|
&& sf->Gshift % 8 == 0
|
||||||
&& sf->Bshift % 8 == 0
|
&& sf->Bshift % 8 == 0
|
||||||
&& sf->Ashift % 8 == 0 && sf->Aloss == 0) {
|
&& sf->Ashift % 8 == 0 && sf->Aloss == 0) {
|
||||||
|
#ifdef __3dNOW__
|
||||||
|
if (SDL_Has3DNow())
|
||||||
|
return BlitRGBtoRGBPixelAlphaMMX3DNOW;
|
||||||
|
#endif
|
||||||
|
#ifdef __MMX__
|
||||||
if (SDL_HasMMX())
|
if (SDL_HasMMX())
|
||||||
return BlitRGBtoRGBPixelAlphaMMX;
|
return BlitRGBtoRGBPixelAlphaMMX;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
#endif /* __MMX__ */
|
#endif /* __MMX__ || __3dNOW__ */
|
||||||
if (sf->Amask == 0xff000000) {
|
if (sf->Amask == 0xff000000) {
|
||||||
|
#if SDL_ALTIVEC_BLITTERS
|
||||||
|
if (SDL_HasAltiVec())
|
||||||
|
return BlitRGBtoRGBPixelAlphaAltivec;
|
||||||
|
#endif
|
||||||
return BlitRGBtoRGBPixelAlpha;
|
return BlitRGBtoRGBPixelAlpha;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return BlitNtoNPixelAlpha;
|
#if SDL_ALTIVEC_BLITTERS
|
||||||
|
if (sf->Amask && sf->BytesPerPixel == 4 && SDL_HasAltiVec())
|
||||||
|
return Blit32to32PixelAlphaAltivec;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
return BlitNtoNPixelAlpha;
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
default:
|
default:
|
||||||
|
@ -1331,10 +2226,19 @@ SDL_CalculateBlitA(SDL_Surface * surface)
|
||||||
return BlitRGBtoRGBSurfaceAlphaMMX;
|
return BlitRGBtoRGBSurfaceAlphaMMX;
|
||||||
#endif
|
#endif
|
||||||
if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
|
if ((sf->Rmask | sf->Gmask | sf->Bmask) == 0xffffff) {
|
||||||
|
#if SDL_ALTIVEC_BLITTERS
|
||||||
|
if (SDL_HasAltiVec())
|
||||||
|
return BlitRGBtoRGBSurfaceAlphaAltivec;
|
||||||
|
#endif
|
||||||
return BlitRGBtoRGBSurfaceAlpha;
|
return BlitRGBtoRGBSurfaceAlpha;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return BlitNtoNSurfaceAlpha;
|
#if SDL_ALTIVEC_BLITTERS
|
||||||
|
if ((sf->BytesPerPixel == 4) && SDL_HasAltiVec())
|
||||||
|
return Blit32to32SurfaceAlphaAltivec;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
|
return BlitNtoNSurfaceAlpha;
|
||||||
|
|
||||||
case 3:
|
case 3:
|
||||||
default:
|
default:
|
||||||
|
@ -1348,6 +2252,12 @@ SDL_CalculateBlitA(SDL_Surface * surface)
|
||||||
if (df->BytesPerPixel == 1)
|
if (df->BytesPerPixel == 1)
|
||||||
return BlitNto1SurfaceAlphaKey;
|
return BlitNto1SurfaceAlphaKey;
|
||||||
else
|
else
|
||||||
|
#if SDL_ALTIVEC_BLITTERS
|
||||||
|
if (sf->BytesPerPixel == 4 && df->BytesPerPixel == 4 &&
|
||||||
|
SDL_HasAltiVec())
|
||||||
|
return Blit32to32SurfaceAlphaKeyAltivec;
|
||||||
|
else
|
||||||
|
#endif
|
||||||
return BlitNtoNSurfaceAlphaKey;
|
return BlitNtoNSurfaceAlphaKey;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
|
@ -28,8 +28,840 @@
|
||||||
|
|
||||||
/* Functions to blit from N-bit surfaces to other surfaces */
|
/* Functions to blit from N-bit surfaces to other surfaces */
|
||||||
|
|
||||||
|
#if SDL_ALTIVEC_BLITTERS
|
||||||
|
#define assert(X)
|
||||||
|
#ifdef __MACOSX__
|
||||||
|
#include <sys/sysctl.h>
|
||||||
|
static size_t
|
||||||
|
GetL3CacheSize(void)
|
||||||
|
{
|
||||||
|
const char key[] = "hw.l3cachesize";
|
||||||
|
u_int64_t result = 0;
|
||||||
|
size_t typeSize = sizeof(result);
|
||||||
|
|
||||||
|
|
||||||
|
int err = sysctlbyname(key, &result, &typeSize, NULL, 0);
|
||||||
|
if (0 != err)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static size_t
|
||||||
|
GetL3CacheSize(void)
|
||||||
|
{
|
||||||
|
/* XXX: Just guess G4 */
|
||||||
|
return 2097152;
|
||||||
|
}
|
||||||
|
#endif /* __MACOSX__ */
|
||||||
|
|
||||||
|
#if (defined(__MACOSX__) && (__GNUC__ < 4))
|
||||||
|
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
|
||||||
|
(vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
|
||||||
|
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
|
||||||
|
(vector unsigned short) ( a,b,c,d,e,f,g,h )
|
||||||
|
#else
|
||||||
|
#define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
|
||||||
|
(vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
|
||||||
|
#define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
|
||||||
|
(vector unsigned short) { a,b,c,d,e,f,g,h }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
|
||||||
|
#define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
|
||||||
|
( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
|
||||||
|
0x04+a, 0x04+b, 0x04+c, 0x04+d, \
|
||||||
|
0x08+a, 0x08+b, 0x08+c, 0x08+d, \
|
||||||
|
0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
|
||||||
|
|
||||||
|
#define MAKE8888(dstfmt, r, g, b, a) \
|
||||||
|
( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
|
||||||
|
((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
|
||||||
|
((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
|
||||||
|
((a<<dstfmt->Ashift)&dstfmt->Amask) )
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Data Stream Touch...Altivec cache prefetching.
|
||||||
|
*
|
||||||
|
* Don't use this on a G5...however, the speed boost is very significant
|
||||||
|
* on a G4.
|
||||||
|
*/
|
||||||
|
#define DST_CHAN_SRC 1
|
||||||
|
#define DST_CHAN_DEST 2
|
||||||
|
|
||||||
|
/* macro to set DST control word value... */
|
||||||
|
#define DST_CTRL(size, count, stride) \
|
||||||
|
(((size) << 24) | ((count) << 16) | (stride))
|
||||||
|
|
||||||
|
#define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
|
||||||
|
? vec_lvsl(0, src) \
|
||||||
|
: vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
|
||||||
|
|
||||||
|
/* Calculate the permute vector used for 32->32 swizzling */
|
||||||
|
static vector unsigned char
|
||||||
|
calc_swizzle32(const SDL_PixelFormat * srcfmt, const SDL_PixelFormat * dstfmt)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* We have to assume that the bits that aren't used by other
|
||||||
|
* colors is alpha, and it's one complete byte, since some formats
|
||||||
|
* leave alpha with a zero mask, but we should still swizzle the bits.
|
||||||
|
*/
|
||||||
|
/* ARGB */
|
||||||
|
const static const struct SDL_PixelFormat default_pixel_format = {
|
||||||
|
NULL, 32, 4,
|
||||||
|
0, 0, 0, 0,
|
||||||
|
16, 8, 0, 24,
|
||||||
|
0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000
|
||||||
|
};
|
||||||
|
if (!srcfmt) {
|
||||||
|
srcfmt = &default_pixel_format;
|
||||||
|
}
|
||||||
|
if (!dstfmt) {
|
||||||
|
dstfmt = &default_pixel_format;
|
||||||
|
}
|
||||||
|
const vector unsigned char plus = VECUINT8_LITERAL(0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x04, 0x04, 0x04, 0x04,
|
||||||
|
0x08, 0x08, 0x08, 0x08,
|
||||||
|
0x0C, 0x0C, 0x0C,
|
||||||
|
0x0C);
|
||||||
|
vector unsigned char vswiz;
|
||||||
|
vector unsigned int srcvec;
|
||||||
|
#define RESHIFT(X) (3 - ((X) >> 3))
|
||||||
|
Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
|
||||||
|
Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
|
||||||
|
Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
|
||||||
|
Uint32 amask;
|
||||||
|
/* Use zero for alpha if either surface doesn't have alpha */
|
||||||
|
if (dstfmt->Amask) {
|
||||||
|
amask =
|
||||||
|
((srcfmt->Amask) ? RESHIFT(srcfmt->
|
||||||
|
Ashift) : 0x10) << (dstfmt->Ashift);
|
||||||
|
} else {
|
||||||
|
amask =
|
||||||
|
0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^
|
||||||
|
0xFFFFFFFF);
|
||||||
|
}
|
||||||
|
#undef RESHIFT
|
||||||
|
((unsigned int *) (char *) &srcvec)[0] = (rmask | gmask | bmask | amask);
|
||||||
|
vswiz = vec_add(plus, (vector unsigned char) vec_splat(srcvec, 0));
|
||||||
|
return (vswiz);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void Blit_RGB888_RGB565(SDL_BlitInfo * info);
|
||||||
|
static void
|
||||||
|
Blit_RGB888_RGB565Altivec(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint8 *src = (Uint8 *) info->src;
|
||||||
|
int srcskip = info->src_skip;
|
||||||
|
Uint8 *dst = (Uint8 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip;
|
||||||
|
SDL_PixelFormat *srcfmt = info->src_fmt;
|
||||||
|
vector unsigned char valpha = vec_splat_u8(0);
|
||||||
|
vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
|
||||||
|
vector unsigned char vgmerge = VECUINT8_LITERAL(0x00, 0x02, 0x00, 0x06,
|
||||||
|
0x00, 0x0a, 0x00, 0x0e,
|
||||||
|
0x00, 0x12, 0x00, 0x16,
|
||||||
|
0x00, 0x1a, 0x00, 0x1e);
|
||||||
|
vector unsigned short v1 = vec_splat_u16(1);
|
||||||
|
vector unsigned short v3 = vec_splat_u16(3);
|
||||||
|
vector unsigned short v3f =
|
||||||
|
VECUINT16_LITERAL(0x003f, 0x003f, 0x003f, 0x003f,
|
||||||
|
0x003f, 0x003f, 0x003f, 0x003f);
|
||||||
|
vector unsigned short vfc =
|
||||||
|
VECUINT16_LITERAL(0x00fc, 0x00fc, 0x00fc, 0x00fc,
|
||||||
|
0x00fc, 0x00fc, 0x00fc, 0x00fc);
|
||||||
|
vector unsigned short vf800 = (vector unsigned short) vec_splat_u8(-7);
|
||||||
|
vf800 = vec_sl(vf800, vec_splat_u16(8));
|
||||||
|
|
||||||
|
while (height--) {
|
||||||
|
vector unsigned char valigner;
|
||||||
|
vector unsigned char voverflow;
|
||||||
|
vector unsigned char vsrc;
|
||||||
|
|
||||||
|
int width = info->dst_w;
|
||||||
|
int extrawidth;
|
||||||
|
|
||||||
|
/* do scalar until we can align... */
|
||||||
|
#define ONE_PIXEL_BLEND(condition, widthvar) \
|
||||||
|
while (condition) { \
|
||||||
|
Uint32 Pixel; \
|
||||||
|
unsigned sR, sG, sB, sA; \
|
||||||
|
DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
|
||||||
|
sR, sG, sB, sA); \
|
||||||
|
*(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
|
||||||
|
((sG << 3) & 0x000007E0) | \
|
||||||
|
((sB >> 3) & 0x0000001F)); \
|
||||||
|
dst += 2; \
|
||||||
|
src += 4; \
|
||||||
|
widthvar--; \
|
||||||
|
}
|
||||||
|
|
||||||
|
ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
|
||||||
|
|
||||||
|
/* After all that work, here's the vector part! */
|
||||||
|
extrawidth = (width % 8); /* trailing unaligned stores */
|
||||||
|
width -= extrawidth;
|
||||||
|
vsrc = vec_ld(0, src);
|
||||||
|
valigner = VEC_ALIGNER(src);
|
||||||
|
|
||||||
|
while (width) {
|
||||||
|
vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
|
||||||
|
vector unsigned int vsrc1, vsrc2;
|
||||||
|
vector unsigned char vdst;
|
||||||
|
|
||||||
|
voverflow = vec_ld(15, src);
|
||||||
|
vsrc = vec_perm(vsrc, voverflow, valigner);
|
||||||
|
vsrc1 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
|
||||||
|
src += 16;
|
||||||
|
vsrc = voverflow;
|
||||||
|
voverflow = vec_ld(15, src);
|
||||||
|
vsrc = vec_perm(vsrc, voverflow, valigner);
|
||||||
|
vsrc2 = (vector unsigned int) vec_perm(vsrc, valpha, vpermute);
|
||||||
|
/* 1555 */
|
||||||
|
vpixel = (vector unsigned short) vec_packpx(vsrc1, vsrc2);
|
||||||
|
vgpixel = (vector unsigned short) vec_perm(vsrc1, vsrc2, vgmerge);
|
||||||
|
vgpixel = vec_and(vgpixel, vfc);
|
||||||
|
vgpixel = vec_sl(vgpixel, v3);
|
||||||
|
vrpixel = vec_sl(vpixel, v1);
|
||||||
|
vrpixel = vec_and(vrpixel, vf800);
|
||||||
|
vbpixel = vec_and(vpixel, v3f);
|
||||||
|
vdst =
|
||||||
|
vec_or((vector unsigned char) vrpixel,
|
||||||
|
(vector unsigned char) vgpixel);
|
||||||
|
/* 565 */
|
||||||
|
vdst = vec_or(vdst, (vector unsigned char) vbpixel);
|
||||||
|
vec_st(vdst, 0, dst);
|
||||||
|
|
||||||
|
width -= 8;
|
||||||
|
src += 16;
|
||||||
|
dst += 16;
|
||||||
|
vsrc = voverflow;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(width == 0);
|
||||||
|
|
||||||
|
/* do scalar until we can align... */
|
||||||
|
ONE_PIXEL_BLEND((extrawidth), extrawidth);
|
||||||
|
#undef ONE_PIXEL_BLEND
|
||||||
|
|
||||||
|
src += srcskip; /* move to next row, accounting for pitch. */
|
||||||
|
dst += dstskip;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
Blit_RGB565_32Altivec(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint8 *src = (Uint8 *) info->src;
|
||||||
|
int srcskip = info->src_skip;
|
||||||
|
Uint8 *dst = (Uint8 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip;
|
||||||
|
SDL_PixelFormat *srcfmt = info->src_fmt;
|
||||||
|
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
||||||
|
unsigned alpha;
|
||||||
|
vector unsigned char valpha;
|
||||||
|
vector unsigned char vpermute;
|
||||||
|
vector unsigned short vf800;
|
||||||
|
vector unsigned int v8 = vec_splat_u32(8);
|
||||||
|
vector unsigned int v16 = vec_add(v8, v8);
|
||||||
|
vector unsigned short v2 = vec_splat_u16(2);
|
||||||
|
vector unsigned short v3 = vec_splat_u16(3);
|
||||||
|
/*
|
||||||
|
0x10 - 0x1f is the alpha
|
||||||
|
0x00 - 0x0e evens are the red
|
||||||
|
0x01 - 0x0f odds are zero
|
||||||
|
*/
|
||||||
|
vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
|
||||||
|
0x10, 0x02, 0x01, 0x01,
|
||||||
|
0x10, 0x04, 0x01, 0x01,
|
||||||
|
0x10, 0x06, 0x01,
|
||||||
|
0x01);
|
||||||
|
vector unsigned char vredalpha2 =
|
||||||
|
(vector unsigned
|
||||||
|
char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
|
||||||
|
);
|
||||||
|
/*
|
||||||
|
0x00 - 0x0f is ARxx ARxx ARxx ARxx
|
||||||
|
0x11 - 0x0f odds are blue
|
||||||
|
*/
|
||||||
|
vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
|
||||||
|
0x04, 0x05, 0x06, 0x13,
|
||||||
|
0x08, 0x09, 0x0a, 0x15,
|
||||||
|
0x0c, 0x0d, 0x0e, 0x17);
|
||||||
|
vector unsigned char vblue2 =
|
||||||
|
(vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
|
||||||
|
);
|
||||||
|
/*
|
||||||
|
0x00 - 0x0f is ARxB ARxB ARxB ARxB
|
||||||
|
0x10 - 0x0e evens are green
|
||||||
|
*/
|
||||||
|
vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
|
||||||
|
0x04, 0x05, 0x12, 0x07,
|
||||||
|
0x08, 0x09, 0x14, 0x0b,
|
||||||
|
0x0c, 0x0d, 0x16, 0x0f);
|
||||||
|
vector unsigned char vgreen2 =
|
||||||
|
(vector unsigned
|
||||||
|
char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
assert(srcfmt->BytesPerPixel == 2);
|
||||||
|
assert(dstfmt->BytesPerPixel == 4);
|
||||||
|
|
||||||
|
vf800 = (vector unsigned short) vec_splat_u8(-7);
|
||||||
|
vf800 = vec_sl(vf800, vec_splat_u16(8));
|
||||||
|
|
||||||
|
if (dstfmt->Amask && info->a) {
|
||||||
|
((unsigned char *) &valpha)[0] = alpha = info->a;
|
||||||
|
valpha = vec_splat(valpha, 0);
|
||||||
|
} else {
|
||||||
|
alpha = 0;
|
||||||
|
valpha = vec_splat_u8(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
vpermute = calc_swizzle32(NULL, dstfmt);
|
||||||
|
while (height--) {
|
||||||
|
vector unsigned char valigner;
|
||||||
|
vector unsigned char voverflow;
|
||||||
|
vector unsigned char vsrc;
|
||||||
|
|
||||||
|
int width = info->dst_w;
|
||||||
|
int extrawidth;
|
||||||
|
|
||||||
|
/* do scalar until we can align... */
|
||||||
|
#define ONE_PIXEL_BLEND(condition, widthvar) \
|
||||||
|
while (condition) { \
|
||||||
|
unsigned sR, sG, sB; \
|
||||||
|
unsigned short Pixel = *((unsigned short *)src); \
|
||||||
|
sR = (Pixel >> 8) & 0xf8; \
|
||||||
|
sG = (Pixel >> 3) & 0xfc; \
|
||||||
|
sB = (Pixel << 3) & 0xf8; \
|
||||||
|
ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
|
||||||
|
src += 2; \
|
||||||
|
dst += 4; \
|
||||||
|
widthvar--; \
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
|
||||||
|
|
||||||
|
/* After all that work, here's the vector part! */
|
||||||
|
extrawidth = (width % 8); /* trailing unaligned stores */
|
||||||
|
width -= extrawidth;
|
||||||
|
vsrc = vec_ld(0, src);
|
||||||
|
valigner = VEC_ALIGNER(src);
|
||||||
|
|
||||||
|
while (width) {
|
||||||
|
vector unsigned short vR, vG, vB;
|
||||||
|
vector unsigned char vdst1, vdst2;
|
||||||
|
|
||||||
|
voverflow = vec_ld(15, src);
|
||||||
|
vsrc = vec_perm(vsrc, voverflow, valigner);
|
||||||
|
|
||||||
|
vR = vec_and((vector unsigned short) vsrc, vf800);
|
||||||
|
vB = vec_sl((vector unsigned short) vsrc, v3);
|
||||||
|
vG = vec_sl(vB, v2);
|
||||||
|
|
||||||
|
vdst1 =
|
||||||
|
(vector unsigned char) vec_perm((vector unsigned char) vR,
|
||||||
|
valpha, vredalpha1);
|
||||||
|
vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
|
||||||
|
vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
|
||||||
|
vdst1 = vec_perm(vdst1, valpha, vpermute);
|
||||||
|
vec_st(vdst1, 0, dst);
|
||||||
|
|
||||||
|
vdst2 =
|
||||||
|
(vector unsigned char) vec_perm((vector unsigned char) vR,
|
||||||
|
valpha, vredalpha2);
|
||||||
|
vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
|
||||||
|
vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
|
||||||
|
vdst2 = vec_perm(vdst2, valpha, vpermute);
|
||||||
|
vec_st(vdst2, 16, dst);
|
||||||
|
|
||||||
|
width -= 8;
|
||||||
|
dst += 32;
|
||||||
|
src += 16;
|
||||||
|
vsrc = voverflow;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(width == 0);
|
||||||
|
|
||||||
|
|
||||||
|
/* do scalar until we can align... */
|
||||||
|
ONE_PIXEL_BLEND((extrawidth), extrawidth);
|
||||||
|
#undef ONE_PIXEL_BLEND
|
||||||
|
|
||||||
|
src += srcskip; /* move to next row, accounting for pitch. */
|
||||||
|
dst += dstskip;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static void
|
||||||
|
Blit_RGB555_32Altivec(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint8 *src = (Uint8 *) info->src;
|
||||||
|
int srcskip = info->src_skip;
|
||||||
|
Uint8 *dst = (Uint8 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip;
|
||||||
|
SDL_PixelFormat *srcfmt = info->src_fmt;
|
||||||
|
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
||||||
|
unsigned alpha;
|
||||||
|
vector unsigned char valpha;
|
||||||
|
vector unsigned char vpermute;
|
||||||
|
vector unsigned short vf800;
|
||||||
|
vector unsigned int v8 = vec_splat_u32(8);
|
||||||
|
vector unsigned int v16 = vec_add(v8, v8);
|
||||||
|
vector unsigned short v1 = vec_splat_u16(1);
|
||||||
|
vector unsigned short v3 = vec_splat_u16(3);
|
||||||
|
/*
|
||||||
|
0x10 - 0x1f is the alpha
|
||||||
|
0x00 - 0x0e evens are the red
|
||||||
|
0x01 - 0x0f odds are zero
|
||||||
|
*/
|
||||||
|
vector unsigned char vredalpha1 = VECUINT8_LITERAL(0x10, 0x00, 0x01, 0x01,
|
||||||
|
0x10, 0x02, 0x01, 0x01,
|
||||||
|
0x10, 0x04, 0x01, 0x01,
|
||||||
|
0x10, 0x06, 0x01,
|
||||||
|
0x01);
|
||||||
|
vector unsigned char vredalpha2 =
|
||||||
|
(vector unsigned
|
||||||
|
char) (vec_add((vector unsigned int) vredalpha1, vec_sl(v8, v16))
|
||||||
|
);
|
||||||
|
/*
|
||||||
|
0x00 - 0x0f is ARxx ARxx ARxx ARxx
|
||||||
|
0x11 - 0x0f odds are blue
|
||||||
|
*/
|
||||||
|
vector unsigned char vblue1 = VECUINT8_LITERAL(0x00, 0x01, 0x02, 0x11,
|
||||||
|
0x04, 0x05, 0x06, 0x13,
|
||||||
|
0x08, 0x09, 0x0a, 0x15,
|
||||||
|
0x0c, 0x0d, 0x0e, 0x17);
|
||||||
|
vector unsigned char vblue2 =
|
||||||
|
(vector unsigned char) (vec_add((vector unsigned int) vblue1, v8)
|
||||||
|
);
|
||||||
|
/*
|
||||||
|
0x00 - 0x0f is ARxB ARxB ARxB ARxB
|
||||||
|
0x10 - 0x0e evens are green
|
||||||
|
*/
|
||||||
|
vector unsigned char vgreen1 = VECUINT8_LITERAL(0x00, 0x01, 0x10, 0x03,
|
||||||
|
0x04, 0x05, 0x12, 0x07,
|
||||||
|
0x08, 0x09, 0x14, 0x0b,
|
||||||
|
0x0c, 0x0d, 0x16, 0x0f);
|
||||||
|
vector unsigned char vgreen2 =
|
||||||
|
(vector unsigned
|
||||||
|
char) (vec_add((vector unsigned int) vgreen1, vec_sl(v8, v8))
|
||||||
|
);
|
||||||
|
|
||||||
|
|
||||||
|
assert(srcfmt->BytesPerPixel == 2);
|
||||||
|
assert(dstfmt->BytesPerPixel == 4);
|
||||||
|
|
||||||
|
vf800 = (vector unsigned short) vec_splat_u8(-7);
|
||||||
|
vf800 = vec_sl(vf800, vec_splat_u16(8));
|
||||||
|
|
||||||
|
if (dstfmt->Amask && info->a) {
|
||||||
|
((unsigned char *) &valpha)[0] = alpha = info->a;
|
||||||
|
valpha = vec_splat(valpha, 0);
|
||||||
|
} else {
|
||||||
|
alpha = 0;
|
||||||
|
valpha = vec_splat_u8(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
vpermute = calc_swizzle32(NULL, dstfmt);
|
||||||
|
while (height--) {
|
||||||
|
vector unsigned char valigner;
|
||||||
|
vector unsigned char voverflow;
|
||||||
|
vector unsigned char vsrc;
|
||||||
|
|
||||||
|
int width = info->dst_w;
|
||||||
|
int extrawidth;
|
||||||
|
|
||||||
|
/* do scalar until we can align... */
|
||||||
|
#define ONE_PIXEL_BLEND(condition, widthvar) \
|
||||||
|
while (condition) { \
|
||||||
|
unsigned sR, sG, sB; \
|
||||||
|
unsigned short Pixel = *((unsigned short *)src); \
|
||||||
|
sR = (Pixel >> 7) & 0xf8; \
|
||||||
|
sG = (Pixel >> 2) & 0xf8; \
|
||||||
|
sB = (Pixel << 3) & 0xf8; \
|
||||||
|
ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
|
||||||
|
src += 2; \
|
||||||
|
dst += 4; \
|
||||||
|
widthvar--; \
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
|
||||||
|
|
||||||
|
/* After all that work, here's the vector part! */
|
||||||
|
extrawidth = (width % 8); /* trailing unaligned stores */
|
||||||
|
width -= extrawidth;
|
||||||
|
vsrc = vec_ld(0, src);
|
||||||
|
valigner = VEC_ALIGNER(src);
|
||||||
|
|
||||||
|
while (width) {
|
||||||
|
vector unsigned short vR, vG, vB;
|
||||||
|
vector unsigned char vdst1, vdst2;
|
||||||
|
|
||||||
|
voverflow = vec_ld(15, src);
|
||||||
|
vsrc = vec_perm(vsrc, voverflow, valigner);
|
||||||
|
|
||||||
|
vR = vec_and(vec_sl((vector unsigned short) vsrc, v1), vf800);
|
||||||
|
vB = vec_sl((vector unsigned short) vsrc, v3);
|
||||||
|
vG = vec_sl(vB, v3);
|
||||||
|
|
||||||
|
vdst1 =
|
||||||
|
(vector unsigned char) vec_perm((vector unsigned char) vR,
|
||||||
|
valpha, vredalpha1);
|
||||||
|
vdst1 = vec_perm(vdst1, (vector unsigned char) vB, vblue1);
|
||||||
|
vdst1 = vec_perm(vdst1, (vector unsigned char) vG, vgreen1);
|
||||||
|
vdst1 = vec_perm(vdst1, valpha, vpermute);
|
||||||
|
vec_st(vdst1, 0, dst);
|
||||||
|
|
||||||
|
vdst2 =
|
||||||
|
(vector unsigned char) vec_perm((vector unsigned char) vR,
|
||||||
|
valpha, vredalpha2);
|
||||||
|
vdst2 = vec_perm(vdst2, (vector unsigned char) vB, vblue2);
|
||||||
|
vdst2 = vec_perm(vdst2, (vector unsigned char) vG, vgreen2);
|
||||||
|
vdst2 = vec_perm(vdst2, valpha, vpermute);
|
||||||
|
vec_st(vdst2, 16, dst);
|
||||||
|
|
||||||
|
width -= 8;
|
||||||
|
dst += 32;
|
||||||
|
src += 16;
|
||||||
|
vsrc = voverflow;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(width == 0);
|
||||||
|
|
||||||
|
|
||||||
|
/* do scalar until we can align... */
|
||||||
|
ONE_PIXEL_BLEND((extrawidth), extrawidth);
|
||||||
|
#undef ONE_PIXEL_BLEND
|
||||||
|
|
||||||
|
src += srcskip; /* move to next row, accounting for pitch. */
|
||||||
|
dst += dstskip;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void BlitNtoNKey(SDL_BlitInfo * info);
|
||||||
|
static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo * info);
|
||||||
|
static void
|
||||||
|
Blit32to32KeyAltivec(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint32 *srcp = (Uint32 *) info->src;
|
||||||
|
int srcskip = info->src_skip / 4;
|
||||||
|
Uint32 *dstp = (Uint32 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip / 4;
|
||||||
|
SDL_PixelFormat *srcfmt = info->src_fmt;
|
||||||
|
int srcbpp = srcfmt->BytesPerPixel;
|
||||||
|
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
||||||
|
int dstbpp = dstfmt->BytesPerPixel;
|
||||||
|
int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
|
||||||
|
unsigned alpha = dstfmt->Amask ? info->a : 0;
|
||||||
|
Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
|
||||||
|
Uint32 ckey = info->colorkey;
|
||||||
|
vector unsigned int valpha;
|
||||||
|
vector unsigned char vpermute;
|
||||||
|
vector unsigned char vzero;
|
||||||
|
vector unsigned int vckey;
|
||||||
|
vector unsigned int vrgbmask;
|
||||||
|
vpermute = calc_swizzle32(srcfmt, dstfmt);
|
||||||
|
if (info->dst_w < 16) {
|
||||||
|
if (copy_alpha) {
|
||||||
|
BlitNtoNKeyCopyAlpha(info);
|
||||||
|
} else {
|
||||||
|
BlitNtoNKey(info);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
vzero = vec_splat_u8(0);
|
||||||
|
if (alpha) {
|
||||||
|
((unsigned char *) &valpha)[0] = (unsigned char) alpha;
|
||||||
|
valpha =
|
||||||
|
(vector unsigned int) vec_splat((vector unsigned char) valpha, 0);
|
||||||
|
} else {
|
||||||
|
valpha = (vector unsigned int) vzero;
|
||||||
|
}
|
||||||
|
ckey &= rgbmask;
|
||||||
|
((unsigned int *) (char *) &vckey)[0] = ckey;
|
||||||
|
vckey = vec_splat(vckey, 0);
|
||||||
|
((unsigned int *) (char *) &vrgbmask)[0] = rgbmask;
|
||||||
|
vrgbmask = vec_splat(vrgbmask, 0);
|
||||||
|
|
||||||
|
while (height--) {
|
||||||
|
#define ONE_PIXEL_BLEND(condition, widthvar) \
|
||||||
|
if (copy_alpha) { \
|
||||||
|
while (condition) { \
|
||||||
|
Uint32 Pixel; \
|
||||||
|
unsigned sR, sG, sB, sA; \
|
||||||
|
DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
|
||||||
|
sR, sG, sB, sA); \
|
||||||
|
if ( (Pixel & rgbmask) != ckey ) { \
|
||||||
|
ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
|
||||||
|
sR, sG, sB, sA); \
|
||||||
|
} \
|
||||||
|
dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
|
||||||
|
srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
|
||||||
|
widthvar--; \
|
||||||
|
} \
|
||||||
|
} else { \
|
||||||
|
while (condition) { \
|
||||||
|
Uint32 Pixel; \
|
||||||
|
unsigned sR, sG, sB; \
|
||||||
|
RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
|
||||||
|
if ( Pixel != ckey ) { \
|
||||||
|
RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
|
||||||
|
ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
|
||||||
|
sR, sG, sB, alpha); \
|
||||||
|
} \
|
||||||
|
dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
|
||||||
|
srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
|
||||||
|
widthvar--; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
int width = info->dst_w;
|
||||||
|
ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
|
||||||
|
assert(width > 0);
|
||||||
|
if (width > 0) {
|
||||||
|
int extrawidth = (width % 4);
|
||||||
|
vector unsigned char valigner = VEC_ALIGNER(srcp);
|
||||||
|
vector unsigned int vs = vec_ld(0, srcp);
|
||||||
|
width -= extrawidth;
|
||||||
|
assert(width >= 4);
|
||||||
|
while (width) {
|
||||||
|
vector unsigned char vsel;
|
||||||
|
vector unsigned int vd;
|
||||||
|
vector unsigned int voverflow = vec_ld(15, srcp);
|
||||||
|
/* load the source vec */
|
||||||
|
vs = vec_perm(vs, voverflow, valigner);
|
||||||
|
/* vsel is set for items that match the key */
|
||||||
|
vsel = (vector unsigned char) vec_and(vs, vrgbmask);
|
||||||
|
vsel = (vector unsigned char) vec_cmpeq(vs, vckey);
|
||||||
|
/* permute the src vec to the dest format */
|
||||||
|
vs = vec_perm(vs, valpha, vpermute);
|
||||||
|
/* load the destination vec */
|
||||||
|
vd = vec_ld(0, dstp);
|
||||||
|
/* select the source and dest into vs */
|
||||||
|
vd = (vector unsigned int) vec_sel((vector unsigned char) vs,
|
||||||
|
(vector unsigned char) vd,
|
||||||
|
vsel);
|
||||||
|
|
||||||
|
vec_st(vd, 0, dstp);
|
||||||
|
srcp += 4;
|
||||||
|
width -= 4;
|
||||||
|
dstp += 4;
|
||||||
|
vs = voverflow;
|
||||||
|
}
|
||||||
|
ONE_PIXEL_BLEND((extrawidth), extrawidth);
|
||||||
|
#undef ONE_PIXEL_BLEND
|
||||||
|
srcp += srcskip;
|
||||||
|
dstp += dstskip;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
|
||||||
|
/* Use this on a G5 */
|
||||||
|
static void
|
||||||
|
ConvertAltivec32to32_noprefetch(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint32 *src = (Uint32 *) info->src;
|
||||||
|
int srcskip = info->src_skip / 4;
|
||||||
|
Uint32 *dst = (Uint32 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip / 4;
|
||||||
|
SDL_PixelFormat *srcfmt = info->src_fmt;
|
||||||
|
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
||||||
|
vector unsigned int vzero = vec_splat_u32(0);
|
||||||
|
vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
|
||||||
|
if (dstfmt->Amask && !srcfmt->Amask) {
|
||||||
|
if (info->a) {
|
||||||
|
vector unsigned char valpha;
|
||||||
|
((unsigned char *) &valpha)[0] = info->a;
|
||||||
|
vzero = (vector unsigned int) vec_splat(valpha, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(srcfmt->BytesPerPixel == 4);
|
||||||
|
assert(dstfmt->BytesPerPixel == 4);
|
||||||
|
|
||||||
|
while (height--) {
|
||||||
|
vector unsigned char valigner;
|
||||||
|
vector unsigned int vbits;
|
||||||
|
vector unsigned int voverflow;
|
||||||
|
Uint32 bits;
|
||||||
|
Uint8 r, g, b, a;
|
||||||
|
|
||||||
|
int width = info->dst_w;
|
||||||
|
int extrawidth;
|
||||||
|
|
||||||
|
/* do scalar until we can align... */
|
||||||
|
while ((UNALIGNED_PTR(dst)) && (width)) {
|
||||||
|
bits = *(src++);
|
||||||
|
RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
|
||||||
|
*(dst++) = MAKE8888(dstfmt, r, g, b, a);
|
||||||
|
width--;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* After all that work, here's the vector part! */
|
||||||
|
extrawidth = (width % 4);
|
||||||
|
width -= extrawidth;
|
||||||
|
valigner = VEC_ALIGNER(src);
|
||||||
|
vbits = vec_ld(0, src);
|
||||||
|
|
||||||
|
while (width) {
|
||||||
|
voverflow = vec_ld(15, src);
|
||||||
|
src += 4;
|
||||||
|
width -= 4;
|
||||||
|
vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */
|
||||||
|
vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */
|
||||||
|
vec_st(vbits, 0, dst); /* store it back out. */
|
||||||
|
dst += 4;
|
||||||
|
vbits = voverflow;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(width == 0);
|
||||||
|
|
||||||
|
/* cover pixels at the end of the row that didn't fit in 16 bytes. */
|
||||||
|
while (extrawidth) {
|
||||||
|
bits = *(src++); /* max 7 pixels, don't bother with prefetch. */
|
||||||
|
RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
|
||||||
|
*(dst++) = MAKE8888(dstfmt, r, g, b, a);
|
||||||
|
extrawidth--;
|
||||||
|
}
|
||||||
|
|
||||||
|
src += srcskip;
|
||||||
|
dst += dstskip;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
|
||||||
|
/* Use this on a G4 */
|
||||||
|
static void
|
||||||
|
ConvertAltivec32to32_prefetch(SDL_BlitInfo * info)
|
||||||
|
{
|
||||||
|
const int scalar_dst_lead = sizeof(Uint32) * 4;
|
||||||
|
const int vector_dst_lead = sizeof(Uint32) * 16;
|
||||||
|
|
||||||
|
int height = info->dst_h;
|
||||||
|
Uint32 *src = (Uint32 *) info->src;
|
||||||
|
int srcskip = info->src_skip / 4;
|
||||||
|
Uint32 *dst = (Uint32 *) info->dst;
|
||||||
|
int dstskip = info->dst_skip / 4;
|
||||||
|
SDL_PixelFormat *srcfmt = info->src_fmt;
|
||||||
|
SDL_PixelFormat *dstfmt = info->dst_fmt;
|
||||||
|
vector unsigned int vzero = vec_splat_u32(0);
|
||||||
|
vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
|
||||||
|
if (dstfmt->Amask && !srcfmt->Amask) {
|
||||||
|
if (info->a) {
|
||||||
|
vector unsigned char valpha;
|
||||||
|
((unsigned char *) &valpha)[0] = info->a;
|
||||||
|
vzero = (vector unsigned int) vec_splat(valpha, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(srcfmt->BytesPerPixel == 4);
|
||||||
|
assert(dstfmt->BytesPerPixel == 4);
|
||||||
|
|
||||||
|
while (height--) {
|
||||||
|
vector unsigned char valigner;
|
||||||
|
vector unsigned int vbits;
|
||||||
|
vector unsigned int voverflow;
|
||||||
|
Uint32 bits;
|
||||||
|
Uint8 r, g, b, a;
|
||||||
|
|
||||||
|
int width = info->dst_w;
|
||||||
|
int extrawidth;
|
||||||
|
|
||||||
|
/* do scalar until we can align... */
|
||||||
|
while ((UNALIGNED_PTR(dst)) && (width)) {
|
||||||
|
vec_dstt(src + scalar_dst_lead, DST_CTRL(2, 32, 1024),
|
||||||
|
DST_CHAN_SRC);
|
||||||
|
vec_dstst(dst + scalar_dst_lead, DST_CTRL(2, 32, 1024),
|
||||||
|
DST_CHAN_DEST);
|
||||||
|
bits = *(src++);
|
||||||
|
RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
|
||||||
|
*(dst++) = MAKE8888(dstfmt, r, g, b, a);
|
||||||
|
width--;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* After all that work, here's the vector part! */
|
||||||
|
extrawidth = (width % 4);
|
||||||
|
width -= extrawidth;
|
||||||
|
valigner = VEC_ALIGNER(src);
|
||||||
|
vbits = vec_ld(0, src);
|
||||||
|
|
||||||
|
while (width) {
|
||||||
|
vec_dstt(src + vector_dst_lead, DST_CTRL(2, 32, 1024),
|
||||||
|
DST_CHAN_SRC);
|
||||||
|
vec_dstst(dst + vector_dst_lead, DST_CTRL(2, 32, 1024),
|
||||||
|
DST_CHAN_DEST);
|
||||||
|
voverflow = vec_ld(15, src);
|
||||||
|
src += 4;
|
||||||
|
width -= 4;
|
||||||
|
vbits = vec_perm(vbits, voverflow, valigner); /* src is ready. */
|
||||||
|
vbits = vec_perm(vbits, vzero, vpermute); /* swizzle it. */
|
||||||
|
vec_st(vbits, 0, dst); /* store it back out. */
|
||||||
|
dst += 4;
|
||||||
|
vbits = voverflow;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(width == 0);
|
||||||
|
|
||||||
|
/* cover pixels at the end of the row that didn't fit in 16 bytes. */
|
||||||
|
while (extrawidth) {
|
||||||
|
bits = *(src++); /* max 7 pixels, don't bother with prefetch. */
|
||||||
|
RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
|
||||||
|
*(dst++) = MAKE8888(dstfmt, r, g, b, a);
|
||||||
|
extrawidth--;
|
||||||
|
}
|
||||||
|
|
||||||
|
src += srcskip;
|
||||||
|
dst += dstskip;
|
||||||
|
}
|
||||||
|
|
||||||
|
vec_dss(DST_CHAN_SRC);
|
||||||
|
vec_dss(DST_CHAN_DEST);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Uint32
|
||||||
|
GetBlitFeatures(void)
|
||||||
|
{
|
||||||
|
static Uint32 features = 0xffffffff;
|
||||||
|
if (features == 0xffffffff) {
|
||||||
|
/* Provide an override for testing .. */
|
||||||
|
char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
|
||||||
|
if (override) {
|
||||||
|
features = 0;
|
||||||
|
SDL_sscanf(override, "%u", &features);
|
||||||
|
} else {
|
||||||
|
features = (0
|
||||||
|
/* Feature 1 is has-MMX */
|
||||||
|
| ((SDL_HasMMX())? 1 : 0)
|
||||||
|
/* Feature 2 is has-AltiVec */
|
||||||
|
| ((SDL_HasAltiVec())? 2 : 0)
|
||||||
|
/* Feature 4 is dont-use-prefetch */
|
||||||
|
/* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
|
||||||
|
| ((GetL3CacheSize() == 0) ? 4 : 0)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return features;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if __MWERKS__
|
||||||
|
#pragma altivec_model off
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
/* Feature 1 is has-MMX */
|
/* Feature 1 is has-MMX */
|
||||||
#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
|
#define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
|
||||||
|
#endif
|
||||||
|
|
||||||
/* This is now endian dependent */
|
/* This is now endian dependent */
|
||||||
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
|
#if SDL_BYTEORDER == SDL_LIL_ENDIAN
|
||||||
|
@ -1508,6 +2340,15 @@ static const struct blit_table normal_blit_1[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct blit_table normal_blit_2[] = {
|
static const struct blit_table normal_blit_2[] = {
|
||||||
|
#if SDL_ALTIVEC_BLITTERS
|
||||||
|
/* has-altivec */
|
||||||
|
{0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00000000, 0x00000000,
|
||||||
|
0x00000000,
|
||||||
|
2, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
|
||||||
|
{0x00007C00, 0x000003E0, 0x0000001F, 4, 0x00000000, 0x00000000,
|
||||||
|
0x00000000,
|
||||||
|
2, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA},
|
||||||
|
#endif
|
||||||
{0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00,
|
{0x0000F800, 0x000007E0, 0x0000001F, 4, 0x00FF0000, 0x0000FF00,
|
||||||
0x000000FF,
|
0x000000FF,
|
||||||
0, Blit_RGB565_ARGB8888, SET_ALPHA},
|
0, Blit_RGB565_ARGB8888, SET_ALPHA},
|
||||||
|
@ -1531,6 +2372,22 @@ static const struct blit_table normal_blit_3[] = {
|
||||||
};
|
};
|
||||||
|
|
||||||
static const struct blit_table normal_blit_4[] = {
|
static const struct blit_table normal_blit_4[] = {
|
||||||
|
#if SDL_ALTIVEC_BLITTERS
|
||||||
|
/* has-altivec | dont-use-prefetch */
|
||||||
|
{0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
|
||||||
|
0x00000000,
|
||||||
|
6, ConvertAltivec32to32_noprefetch,
|
||||||
|
NO_ALPHA | COPY_ALPHA | SET_ALPHA},
|
||||||
|
/* has-altivec */
|
||||||
|
{0x00000000, 0x00000000, 0x00000000, 4, 0x00000000, 0x00000000,
|
||||||
|
0x00000000,
|
||||||
|
2, ConvertAltivec32to32_prefetch,
|
||||||
|
NO_ALPHA | COPY_ALPHA | SET_ALPHA},
|
||||||
|
/* has-altivec */
|
||||||
|
{0x00000000, 0x00000000, 0x00000000, 2, 0x0000F800, 0x000007E0,
|
||||||
|
0x0000001F,
|
||||||
|
2, Blit_RGB888_RGB565Altivec, NO_ALPHA},
|
||||||
|
#endif
|
||||||
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
|
{0x00FF0000, 0x0000FF00, 0x000000FF, 2, 0x0000F800, 0x000007E0,
|
||||||
0x0000001F,
|
0x0000001F,
|
||||||
0, Blit_RGB888_RGB565, NO_ALPHA},
|
0, Blit_RGB888_RGB565, NO_ALPHA},
|
||||||
|
@ -1628,6 +2485,12 @@ SDL_CalculateBlitN(SDL_Surface * surface)
|
||||||
else if (dstfmt->BytesPerPixel == 1)
|
else if (dstfmt->BytesPerPixel == 1)
|
||||||
return BlitNto1Key;
|
return BlitNto1Key;
|
||||||
else {
|
else {
|
||||||
|
#if SDL_ALTIVEC_BLITTERS
|
||||||
|
if ((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4)
|
||||||
|
&& SDL_HasAltiVec()) {
|
||||||
|
return Blit32to32KeyAltivec;
|
||||||
|
} else
|
||||||
|
#endif
|
||||||
if (srcfmt->Amask && dstfmt->Amask) {
|
if (srcfmt->Amask && dstfmt->Amask) {
|
||||||
return BlitNtoNKeyCopyAlpha;
|
return BlitNtoNKeyCopyAlpha;
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -158,7 +158,9 @@ int test_platform (void)
|
||||||
SDL_ATprintVerbose( 1, "CPU count: %d\n", SDL_GetCPUCount());
|
SDL_ATprintVerbose( 1, "CPU count: %d\n", SDL_GetCPUCount());
|
||||||
SDL_ATprintVerbose( 1, "Available extensions:\n" );
|
SDL_ATprintVerbose( 1, "Available extensions:\n" );
|
||||||
SDL_ATprintVerbose( 1, " RDTSC %s\n", SDL_HasRDTSC()? "detected" : "not detected" );
|
SDL_ATprintVerbose( 1, " RDTSC %s\n", SDL_HasRDTSC()? "detected" : "not detected" );
|
||||||
|
SDL_ATprintVerbose( 1, " AltiVec %s\n", SDL_HasAltiVec()? "detected" : "not detected" );
|
||||||
SDL_ATprintVerbose( 1, " MMX %s\n", SDL_HasMMX()? "detected" : "not detected" );
|
SDL_ATprintVerbose( 1, " MMX %s\n", SDL_HasMMX()? "detected" : "not detected" );
|
||||||
|
SDL_ATprintVerbose( 1, " 3DNow! %s\n", SDL_Has3DNow()? "detected" : "not detected" );
|
||||||
SDL_ATprintVerbose( 1, " SSE %s\n", SDL_HasSSE()? "detected" : "not detected" );
|
SDL_ATprintVerbose( 1, " SSE %s\n", SDL_HasSSE()? "detected" : "not detected" );
|
||||||
SDL_ATprintVerbose( 1, " SSE2 %s\n", SDL_HasSSE2()? "detected" : "not detected" );
|
SDL_ATprintVerbose( 1, " SSE2 %s\n", SDL_HasSSE2()? "detected" : "not detected" );
|
||||||
SDL_ATprintVerbose( 1, " SSE3 %s\n", SDL_HasSSE3()? "detected" : "not detected" );
|
SDL_ATprintVerbose( 1, " SSE3 %s\n", SDL_HasSSE3()? "detected" : "not detected" );
|
||||||
|
|
4
test/testplatform.c
Normal file → Executable file
4
test/testplatform.c
Normal file → Executable file
|
@ -140,9 +140,11 @@ TestCPUInfo(SDL_bool verbose)
|
||||||
{
|
{
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
printf("CPU count: %d\n", SDL_GetCPUCount());
|
printf("CPU count: %d\n", SDL_GetCPUCount());
|
||||||
printf("CPU cache line size: %d\n", SDL_GetCPUCacheLineSize());
|
printf("CPU cache line size: %d\n", SDL_GetCPUCacheLineSize());
|
||||||
printf("RDTSC %s\n", SDL_HasRDTSC()? "detected" : "not detected");
|
printf("RDTSC %s\n", SDL_HasRDTSC()? "detected" : "not detected");
|
||||||
|
printf("AltiVec %s\n", SDL_HasAltiVec()? "detected" : "not detected");
|
||||||
printf("MMX %s\n", SDL_HasMMX()? "detected" : "not detected");
|
printf("MMX %s\n", SDL_HasMMX()? "detected" : "not detected");
|
||||||
|
printf("3DNow! %s\n", SDL_Has3DNow()? "detected" : "not detected");
|
||||||
printf("SSE %s\n", SDL_HasSSE()? "detected" : "not detected");
|
printf("SSE %s\n", SDL_HasSSE()? "detected" : "not detected");
|
||||||
printf("SSE2 %s\n", SDL_HasSSE2()? "detected" : "not detected");
|
printf("SSE2 %s\n", SDL_HasSSE2()? "detected" : "not detected");
|
||||||
printf("SSE3 %s\n", SDL_HasSSE3()? "detected" : "not detected");
|
printf("SSE3 %s\n", SDL_HasSSE3()? "detected" : "not detected");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue